@openclaw/discord 2026.5.7 → 2026.5.9-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{access-CHY9FK3X.js → access-Lk7H_e7y.js} +1 -1
- package/dist/action-runtime-api.js +1 -1
- package/dist/{allow-list-ek-1hMKN.js → allow-list-Dtho5Hww.js} +1 -1
- package/dist/api.js +19 -17
- package/dist/{approval-handler.runtime-BBZRYAGs.js → approval-handler.runtime-CqvKEAqO.js} +3 -3
- package/dist/{approval-native-CBdZsAR7.js → approval-native-W5mtrjYy.js} +2 -2
- package/dist/{approval-shared-Ck6TxKgo.js → approval-shared-D0FVj8b_.js} +1 -1
- package/dist/{audit-CCJ0h49k.js → audit-CrCPCVFG.js} +3 -3
- package/dist/{channel-UXGa9PGc.js → channel-CRTWY5R-.js} +53 -31
- package/dist/{channel-actions-Br29_1nE.js → channel-actions-BRgtLo8F.js} +27 -1
- package/dist/{channel-actions.runtime-ChmNUig1.js → channel-actions.runtime-gosyu06g.js} +5 -5
- package/dist/channel-config-api.js +1 -1
- package/dist/channel-plugin-api.js +1 -1
- package/dist/{channel.setup-D_xyQu_h.js → channel.setup-BQlXR1Gt.js} +3 -3
- package/dist/chunk-BDgKaWaV.js +179 -0
- package/dist/{components-D5LnN7ZQ.js → components-BWdIitIb.js} +2 -2
- package/dist/{config-schema-Cc953rAs.js → config-schema-D1DUqXws.js} +46 -1
- package/dist/contract-api.js +7 -7
- package/dist/{conversation-identity-DHhS0ez3.js → conversation-identity-6hcTVj3L.js} +1 -1
- package/dist/directory-contract-api.js +1 -1
- package/dist/{directory-live-DJ0V5asB.js → directory-live-D3kbmVAu.js} +3 -3
- package/dist/{discord-eZlimVfW.js → discord-Du6FnKAq.js} +30 -13
- package/dist/{doctor-Bo-yifB3.js → doctor-Ys2-q5bc.js} +3 -3
- package/dist/{doctor-contract-Bso46EOQ.js → doctor-contract-DSB2zzJA.js} +1 -1
- package/dist/doctor-contract-api.js +1 -1
- package/dist/{handle-action.guild-admin-sJiQymg8.js → handle-action.guild-admin-LM6kZ6zE.js} +6 -3
- package/dist/{inbound-context-CRylwjg0.js → inbound-context-BobVUBqo.js} +1 -1
- package/dist/{manager.runtime-Cug1PoeZ.js → manager.runtime-C_cPd048.js} +802 -134
- package/dist/{mentions-BPZUaFk7.js → mentions-CiPUID82.js} +1 -1
- package/dist/{message-handler-C9Ohf-ea.js → message-handler-C2ZQV7ZQ.js} +7 -7
- package/dist/{message-handler.preflight-BrvazsYn.js → message-handler.preflight-B5hN7RpX.js} +15 -15
- package/dist/{message-handler.process-CEnzuLiN.js → message-handler.process-Dns8D7t2.js} +82 -65
- package/dist/{message-utils-9kaGF59d.js → message-utils-B3uf0_3D.js} +2 -2
- package/dist/normalize-DBcng6RL.js +58 -0
- package/dist/{outbound-adapter-DNsTVJfH.js → outbound-adapter-jP0OgIyW.js} +78 -17
- package/dist/{outbound-session-route-DK9qkPgP.js → outbound-session-route-BaJRt05p.js} +1 -1
- package/dist/{preflight-audio-CRmUxxuM.js → preflight-audio-6J0vFNtu.js} +1 -1
- package/dist/{preview-streaming-Cc_oeIPP.js → preview-streaming-CXrFDP2T.js} +1 -0
- package/dist/{probe-E80IMT1X.js → probe-91lU5eh8.js} +1 -1
- package/dist/{probe.runtime-CMgUDax3.js → probe.runtime-DV37RDCU.js} +1 -1
- package/dist/{provider-CuOh6z_b.js → provider-D_QVXvp8.js} +53 -50
- package/dist/{provider-session.runtime-CCESIHVo.js → provider-session.runtime-CcPDguh6.js} +3 -3
- package/dist/provider.runtime-O7G03kik.js +2 -0
- package/dist/{reply-delivery-D9aKHtDH.js → reply-delivery-DKTZ6HkK.js} +10 -7
- package/dist/{resolve-allowlist-common-_e1cWOb3.js → resolve-allowlist-common-LhuVITjh.js} +2 -2
- package/dist/{resolve-channels-kyuvrXJg.js → resolve-channels-DjpVRJdT.js} +3 -3
- package/dist/{resolve-users-CAwh4EBq.js → resolve-users-DrZYxZSX.js} +2 -2
- package/dist/{route-resolution-BWErj5Cn.js → route-resolution-CYRPDKY4.js} +3 -3
- package/dist/{runtime-D8alY00g.js → runtime-BHAwVXEa.js} +7 -7
- package/dist/runtime-api.actions.js +2 -2
- package/dist/runtime-api.js +25 -25
- package/dist/runtime-api.lookup.js +6 -6
- package/dist/runtime-api.monitor--iuvLjPX.js +6 -0
- package/dist/runtime-api.monitor.js +7 -7
- package/dist/runtime-api.send.js +5 -5
- package/dist/runtime-api.threads.js +5 -5
- package/dist/runtime-setter-api.js +1 -1
- package/dist/secret-contract-api.js +1 -1
- package/dist/{security-audit-BtRd_VhN.js → security-audit-D8IaFuCm.js} +1 -1
- package/dist/security-audit-contract-api.js +1 -1
- package/dist/{security-audit.runtime-Dm1LW9KX.js → security-audit.runtime-DO1398sV.js} +1 -1
- package/dist/security-contract-api.js +1 -1
- package/dist/{send-8S_HKJpQ.js → send-C3peGbWO.js} +83 -56
- package/dist/{send.components-A42c_5tQ.js → send.components-Dsk3IzS_.js} +22 -14
- package/dist/{send.outbound-D3tonSz8.js → send.outbound-CXyInQ3c.js} +22 -11
- package/dist/send.receipt-DXimpUGs.js +35 -0
- package/dist/{send.shared-BQGiUPvZ.js → send.shared-BydEWvYg.js} +26 -7
- package/dist/{sender-identity-BGUfyvOC.js → sender-identity-DVJCrDxs.js} +1 -1
- package/dist/session-key-api.js +1 -1
- package/dist/setup-plugin-api.js +1 -1
- package/dist/{shared-BEW4H3bj.js → shared-BWD6uc0p.js} +8 -8
- package/dist/{shared-interactive-KgJjCqnB.js → shared-interactive-BZCU0ZJ8.js} +2 -2
- package/dist/{subagent-hooks-T0LPLh4H.js → subagent-hooks-DtbWOdAK.js} +1 -1
- package/dist/subagent-hooks-api.js +1 -1
- package/dist/{system-events-_fzSG--3.js → system-events-Dsc8MFYs.js} +2 -2
- package/dist/target-parsing-D-H7nnh2.js +51 -0
- package/dist/target-resolver-D7t8FMhO.js +85 -0
- package/dist/targets-DzAZIwg4.js +3 -0
- package/dist/test-api.js +4 -4
- package/dist/{thread-bindings-CMpZjP50.js → thread-bindings-B90pBWJg.js} +6 -6
- package/dist/{thread-bindings.discord-api-CwWGoyei.js → thread-bindings.discord-api-Dvf789Je.js} +7 -7
- package/dist/{thread-bindings.manager-BtxfLfWf.js → thread-bindings.manager-AwZ5Vble.js} +5 -5
- package/dist/{thread-bindings.session-updates-jcZSiRPI.js → thread-bindings.session-updates-B_AcsXSf.js} +1 -1
- package/dist/{threading-BMmpA2JR.js → threading-Dku_cGS8.js} +4 -4
- package/dist/timeouts.js +1 -1
- package/dist/{typing-Bw6NKWLZ.js → typing-BIjMmOZB.js} +2 -2
- package/openclaw.plugin.json +222 -1
- package/package.json +5 -5
- package/dist/provider.runtime-B68g3qLv.js +0 -2
- package/dist/runtime-api.monitor-DzkCxeBL.js +0 -6
- package/dist/target-resolver-DA84_xbt.js +0 -365
- package/dist/targets-FwL1BPTU.js +0 -2
- /package/dist/{agent-components.runtime-Dof1YMSz.js → agent-components.runtime-caj0h9y4.js} +0 -0
- /package/dist/{api-DzNBVTto.js → api-DaTujGTe.js} +0 -0
- /package/dist/{audit-core-CejGc3hO.js → audit-core-CIvZE94W.js} +0 -0
- /package/dist/{channel-access-DFIQqbYm.js → channel-access-CXAdcM95.js} +0 -0
- /package/dist/{directory-cache-D93eSrpB.js → directory-cache-D3l2v2_L.js} +0 -0
- /package/dist/{directory-config-DoETeOOx.js → directory-config-S2852QcC.js} +0 -0
- /package/dist/{doctor-shared-Cqvfgv9m.js → doctor-shared-DtOaqWzy.js} +0 -0
- /package/dist/{format-D8TsaXxW.js → format-XDPCvGK4.js} +0 -0
- /package/dist/{pluralkit-OFss_pIy.js → pluralkit-EXKKni07.js} +0 -0
- /package/dist/{preflight-audio.runtime-DPVbpZid.js → preflight-audio.runtime-3hiRefuj.js} +0 -0
- /package/dist/{runtime-K9RT6Egn.js → runtime-EoVRXYxX.js} +0 -0
- /package/dist/{secret-config-contract-5S9U9pjx.js → secret-config-contract-BA9jVaKx.js} +0 -0
- /package/dist/{security-contract-BE8rsdPq.js → security-contract-B7i5uqn6.js} +0 -0
- /package/dist/{security-doctor-DiilN216.js → security-doctor-2SGSPibT.js} +0 -0
- /package/dist/{session-contract-CuW9Nlxg.js → session-contract-DwjfF970.js} +0 -0
- /package/dist/{session-key-normalization-B5La-jFM.js → session-key-normalization-DvI2OlyS.js} +0 -0
- /package/dist/{thread-bindings.state-WU4duXKY.js → thread-bindings.state-CBwtFsbB.js} +0 -0
- /package/dist/{timeouts-CdsmBWWs.js → timeouts-BxAzVpSG.js} +0 -0
|
@@ -1,21 +1,23 @@
|
|
|
1
1
|
import { c as resolveDiscordAccountAllowFrom } from "./accounts-CaHGiVB4.js";
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import { t as
|
|
7
|
-
import { t as
|
|
8
|
-
import {
|
|
2
|
+
import { t as parseDiscordTarget } from "./target-parsing-D-H7nnh2.js";
|
|
3
|
+
import { c as ResumedListener, s as ReadyListener, t as discord_exports } from "./discord-Du6FnKAq.js";
|
|
4
|
+
import { n as formatDiscordUserTag } from "./format-XDPCvGK4.js";
|
|
5
|
+
import { a as normalizeDiscordSlug, m as resolveDiscordOwnerAccess } from "./allow-list-Dtho5Hww.js";
|
|
6
|
+
import { t as formatMention } from "./mentions-CiPUID82.js";
|
|
7
|
+
import { t as getDiscordRuntime } from "./runtime-EoVRXYxX.js";
|
|
8
|
+
import { t as buildDiscordGroupSystemPrompt } from "./inbound-context-BobVUBqo.js";
|
|
9
|
+
import { n as resolveDiscordVoiceEnabled, t as authorizeDiscordVoiceIngress } from "./access-Lk7H_e7y.js";
|
|
9
10
|
import { createRequire } from "node:module";
|
|
10
|
-
import { normalizeOptionalString, stripInlineDirectiveTagsForDisplay } from "openclaw/plugin-sdk/text-runtime";
|
|
11
|
+
import { escapeRegExp, normalizeOptionalString, stripInlineDirectiveTagsForDisplay } from "openclaw/plugin-sdk/text-runtime";
|
|
11
12
|
import { resolveAgentRoute } from "openclaw/plugin-sdk/routing";
|
|
12
|
-
import { randomUUID } from "node:crypto";
|
|
13
|
-
import fs from "node:fs/promises";
|
|
14
13
|
import path from "node:path";
|
|
15
14
|
import { createSubsystemLogger, logVerbose, shouldLogVerbose } from "openclaw/plugin-sdk/runtime-env";
|
|
16
|
-
import
|
|
15
|
+
import fs from "node:fs/promises";
|
|
16
|
+
import { resolvePreferredOpenClawTmpDir, tempWorkspace } from "openclaw/plugin-sdk/temp-path";
|
|
17
17
|
import { formatErrorMessage } from "openclaw/plugin-sdk/ssrf-runtime";
|
|
18
18
|
import { agentCommandFromIngress, getTtsProvider, resolveAgentDir, resolveTtsConfig, resolveTtsPrefsPath } from "openclaw/plugin-sdk/agent-runtime";
|
|
19
|
+
import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ, buildRealtimeVoiceAgentConsultChatMessage, buildRealtimeVoiceAgentConsultPolicyInstructions, buildRealtimeVoiceAgentConsultWorkingResponse, createRealtimeVoiceAgentTalkbackQueue, createRealtimeVoiceBridgeSession, resamplePcm, resolveConfiguredRealtimeVoiceProvider, resolveRealtimeVoiceAgentConsultToolPolicy, resolveRealtimeVoiceAgentConsultTools, resolveRealtimeVoiceAgentConsultToolsAllow } from "openclaw/plugin-sdk/realtime-voice";
|
|
20
|
+
import { PassThrough, Readable } from "node:stream";
|
|
19
21
|
import { parseTtsDirectives } from "openclaw/plugin-sdk/speech";
|
|
20
22
|
//#region extensions/discord/src/voice/audio.ts
|
|
21
23
|
const require = createRequire(import.meta.url);
|
|
@@ -98,17 +100,58 @@ async function decodeOpusStream(stream, params) {
|
|
|
98
100
|
}
|
|
99
101
|
return chunks.length > 0 ? Buffer.concat(chunks) : Buffer.alloc(0);
|
|
100
102
|
}
|
|
103
|
+
async function decodeOpusStreamChunks(stream, params) {
|
|
104
|
+
const selected = createOpusDecoder({ onWarn: params.onWarn });
|
|
105
|
+
if (!selected) return;
|
|
106
|
+
params.onVerbose(`opus decoder: ${selected.name}`);
|
|
107
|
+
try {
|
|
108
|
+
for await (const chunk of stream) {
|
|
109
|
+
if (!chunk || !(chunk instanceof Buffer) || chunk.length === 0) continue;
|
|
110
|
+
const decoded = selected.decoder.decode(chunk);
|
|
111
|
+
if (decoded && decoded.length > 0) params.onChunk(Buffer.from(decoded));
|
|
112
|
+
}
|
|
113
|
+
} catch (err) {
|
|
114
|
+
if (shouldLogVerbose()) logVerbose(`discord voice: opus decode failed: ${formatErrorMessage(err)}`);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
function convertDiscordPcm48kStereoToRealtimePcm24kMono(pcm) {
|
|
118
|
+
const frameCount = Math.floor(pcm.length / 4);
|
|
119
|
+
if (frameCount === 0) return Buffer.alloc(0);
|
|
120
|
+
const mono48k = Buffer.alloc(frameCount * 2);
|
|
121
|
+
for (let frame = 0; frame < frameCount; frame += 1) {
|
|
122
|
+
const offset = frame * 4;
|
|
123
|
+
const left = pcm.readInt16LE(offset);
|
|
124
|
+
const right = pcm.readInt16LE(offset + 2);
|
|
125
|
+
mono48k.writeInt16LE(Math.round((left + right) / 2), frame * 2);
|
|
126
|
+
}
|
|
127
|
+
return resamplePcm(mono48k, SAMPLE_RATE, 24e3);
|
|
128
|
+
}
|
|
129
|
+
function convertRealtimePcm24kMonoToDiscordPcm48kStereo(pcm) {
|
|
130
|
+
const mono48k = resamplePcm(pcm, 24e3, SAMPLE_RATE);
|
|
131
|
+
const sampleCount = Math.floor(mono48k.length / 2);
|
|
132
|
+
if (sampleCount === 0) return Buffer.alloc(0);
|
|
133
|
+
const stereo = Buffer.alloc(sampleCount * 4);
|
|
134
|
+
for (let sampleIndex = 0; sampleIndex < sampleCount; sampleIndex += 1) {
|
|
135
|
+
const sample = mono48k.readInt16LE(sampleIndex * 2);
|
|
136
|
+
const offset = sampleIndex * 4;
|
|
137
|
+
stereo.writeInt16LE(sample, offset);
|
|
138
|
+
stereo.writeInt16LE(sample, offset + 2);
|
|
139
|
+
}
|
|
140
|
+
return stereo;
|
|
141
|
+
}
|
|
101
142
|
function estimateDurationSeconds(pcm) {
|
|
102
143
|
const bytesPerSample = BIT_DEPTH / 8 * CHANNELS;
|
|
103
144
|
if (bytesPerSample <= 0) return 0;
|
|
104
145
|
return pcm.length / (bytesPerSample * SAMPLE_RATE);
|
|
105
146
|
}
|
|
106
147
|
async function writeVoiceWavFile(pcm) {
|
|
107
|
-
const
|
|
108
|
-
|
|
148
|
+
const workspace = await tempWorkspace({
|
|
149
|
+
rootDir: resolvePreferredOpenClawTmpDir(),
|
|
150
|
+
prefix: "discord-voice-"
|
|
151
|
+
});
|
|
109
152
|
const wav = buildWavBuffer(pcm);
|
|
110
|
-
await
|
|
111
|
-
scheduleTempCleanup(
|
|
153
|
+
const filePath = await workspace.write("segment.wav", wav);
|
|
154
|
+
scheduleTempCleanup(workspace.dir);
|
|
112
155
|
return {
|
|
113
156
|
path: filePath,
|
|
114
157
|
durationSeconds: estimateDurationSeconds(pcm)
|
|
@@ -193,6 +236,447 @@ function scheduleVoiceCaptureFinalize(params) {
|
|
|
193
236
|
});
|
|
194
237
|
return true;
|
|
195
238
|
}
|
|
239
|
+
async function resolveDiscordVoiceIngressContext(params) {
|
|
240
|
+
const { entry, userId } = params;
|
|
241
|
+
if (!entry.guildName) entry.guildName = await params.fetchGuildName(entry.guildId);
|
|
242
|
+
const speaker = await params.speakerContext.resolveContext(entry.guildId, userId);
|
|
243
|
+
const speakerIdentity = await params.speakerContext.resolveIdentity(entry.guildId, userId);
|
|
244
|
+
const access = await authorizeDiscordVoiceIngress({
|
|
245
|
+
cfg: params.cfg,
|
|
246
|
+
discordConfig: params.discordConfig,
|
|
247
|
+
guildName: entry.guildName,
|
|
248
|
+
guildId: entry.guildId,
|
|
249
|
+
channelId: entry.channelId,
|
|
250
|
+
channelName: entry.channelName,
|
|
251
|
+
channelSlug: entry.channelName ? normalizeDiscordSlug(entry.channelName) : "",
|
|
252
|
+
channelLabel: formatMention({ channelId: entry.channelId }),
|
|
253
|
+
memberRoleIds: speakerIdentity.memberRoleIds,
|
|
254
|
+
ownerAllowFrom: params.ownerAllowFrom,
|
|
255
|
+
sender: {
|
|
256
|
+
id: speakerIdentity.id,
|
|
257
|
+
name: speakerIdentity.name,
|
|
258
|
+
tag: speakerIdentity.tag
|
|
259
|
+
}
|
|
260
|
+
});
|
|
261
|
+
if (!access.ok) return null;
|
|
262
|
+
return {
|
|
263
|
+
extraSystemPrompt: buildDiscordGroupSystemPrompt(access.channelConfig),
|
|
264
|
+
senderIsOwner: speaker.senderIsOwner,
|
|
265
|
+
speakerLabel: speaker.label
|
|
266
|
+
};
|
|
267
|
+
}
|
|
268
|
+
async function runDiscordVoiceAgentTurn(params) {
|
|
269
|
+
const context = params.context ?? await resolveDiscordVoiceIngressContext({
|
|
270
|
+
entry: params.entry,
|
|
271
|
+
userId: params.userId,
|
|
272
|
+
cfg: params.cfg,
|
|
273
|
+
discordConfig: params.discordConfig,
|
|
274
|
+
ownerAllowFrom: params.ownerAllowFrom,
|
|
275
|
+
fetchGuildName: params.fetchGuildName,
|
|
276
|
+
speakerContext: params.speakerContext
|
|
277
|
+
});
|
|
278
|
+
if (!context) return null;
|
|
279
|
+
const voiceModel = normalizeOptionalString(params.discordConfig.voice?.model);
|
|
280
|
+
return {
|
|
281
|
+
context,
|
|
282
|
+
text: ((await agentCommandFromIngress({
|
|
283
|
+
message: params.message,
|
|
284
|
+
sessionKey: params.entry.route.sessionKey,
|
|
285
|
+
agentId: params.entry.route.agentId,
|
|
286
|
+
messageChannel: "discord",
|
|
287
|
+
messageProvider: "discord-voice",
|
|
288
|
+
extraSystemPrompt: context.extraSystemPrompt,
|
|
289
|
+
senderIsOwner: context.senderIsOwner,
|
|
290
|
+
allowModelOverride: Boolean(voiceModel),
|
|
291
|
+
model: voiceModel,
|
|
292
|
+
toolsAllow: params.toolsAllow,
|
|
293
|
+
deliver: false
|
|
294
|
+
}, params.runtime)).payloads ?? []).map((payload) => payload.text).filter((text) => typeof text === "string" && text.trim()).join("\n").trim()
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
//#endregion
|
|
298
|
+
//#region extensions/discord/src/voice/prompt.ts
|
|
299
|
+
const DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT = [
|
|
300
|
+
"You are OpenClaw's Discord voice interface in a live voice channel.",
|
|
301
|
+
"Discord voice reply requirements:",
|
|
302
|
+
"- Return only the concise text that should be spoken aloud in the voice channel.",
|
|
303
|
+
"- Treat the transcript as speech-to-text from a live conversation; repair obvious transcription artifacts and ignore repeated partial fragments caused by voice buffering.",
|
|
304
|
+
"- If the transcript is garbled, incomplete, or missing the user's intent, ask one brief clarifying question instead of guessing.",
|
|
305
|
+
"- If the request needs deeper reasoning, current information, or tools, use the available tools before answering.",
|
|
306
|
+
"- Do not call the tts tool; Discord voice will synthesize and play the returned text.",
|
|
307
|
+
"- Do not reply with NO_REPLY unless no spoken response is appropriate.",
|
|
308
|
+
"- Keep the response brief, natural, and conversational. Prefer one to three short sentences.",
|
|
309
|
+
"- Avoid markdown tables, code fences, citations, and visual formatting unless the user explicitly asks for something that cannot be spoken naturally."
|
|
310
|
+
].join("\n");
|
|
311
|
+
function formatVoiceIngressPrompt(transcript, speakerLabel) {
|
|
312
|
+
const cleanedTranscript = transcript.trim();
|
|
313
|
+
const cleanedLabel = speakerLabel?.trim();
|
|
314
|
+
return [DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT, cleanedLabel ? [`Voice transcript from speaker "${cleanedLabel}":`, cleanedTranscript].join("\n") : cleanedTranscript].join("\n\n");
|
|
315
|
+
}
|
|
316
|
+
//#endregion
|
|
317
|
+
//#region extensions/discord/src/voice/sdk-runtime.ts
|
|
318
|
+
let cachedDiscordVoiceSdk = null;
|
|
319
|
+
function loadDiscordVoiceSdk() {
|
|
320
|
+
if (cachedDiscordVoiceSdk) return cachedDiscordVoiceSdk;
|
|
321
|
+
cachedDiscordVoiceSdk = createRequire(import.meta.url)("@discordjs/voice");
|
|
322
|
+
return cachedDiscordVoiceSdk;
|
|
323
|
+
}
|
|
324
|
+
const CAPTURE_FINALIZE_GRACE_MS = 2500;
|
|
325
|
+
const VOICE_CONNECT_READY_TIMEOUT_MS = 3e4;
|
|
326
|
+
const VOICE_RECONNECT_GRACE_MS = 15e3;
|
|
327
|
+
const PLAYBACK_READY_TIMEOUT_MS = 6e4;
|
|
328
|
+
const SPEAKING_READY_TIMEOUT_MS = 6e4;
|
|
329
|
+
function resolveVoiceTimeoutMs(value, fallbackMs) {
|
|
330
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return fallbackMs;
|
|
331
|
+
return Math.floor(value);
|
|
332
|
+
}
|
|
333
|
+
function logVoiceVerbose(message) {
|
|
334
|
+
logVerbose(`discord voice: ${message}`);
|
|
335
|
+
}
|
|
336
|
+
function isVoiceChannel(type) {
|
|
337
|
+
return type === discord_exports.ChannelType.GuildVoice || type === discord_exports.ChannelType.GuildStageVoice;
|
|
338
|
+
}
|
|
339
|
+
//#endregion
|
|
340
|
+
//#region extensions/discord/src/voice/realtime.ts
|
|
341
|
+
const logger$2 = createSubsystemLogger("discord/voice");
|
|
342
|
+
const DISCORD_REALTIME_TALKBACK_DEBOUNCE_MS = 350;
|
|
343
|
+
const DISCORD_REALTIME_FALLBACK_TEXT = "I hit an error while checking that. Please try again.";
|
|
344
|
+
const DISCORD_REALTIME_PENDING_SPEAKER_CONTEXT_LIMIT = 32;
|
|
345
|
+
const DISCORD_REALTIME_LOG_PREVIEW_CHARS = 500;
|
|
346
|
+
const DISCORD_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS = 250;
|
|
347
|
+
function formatRealtimeLogPreview(text) {
|
|
348
|
+
const oneLine = text.replace(/\s+/g, " ").trim();
|
|
349
|
+
if (oneLine.length <= DISCORD_REALTIME_LOG_PREVIEW_CHARS) return oneLine;
|
|
350
|
+
return `${oneLine.slice(0, DISCORD_REALTIME_LOG_PREVIEW_CHARS)}...`;
|
|
351
|
+
}
|
|
352
|
+
function formatRealtimeInterruptionLog(event) {
|
|
353
|
+
const detail = event.detail ? ` ${event.detail}` : "";
|
|
354
|
+
if (event.direction === "client") {
|
|
355
|
+
if (event.type === "response.cancel") return `discord voice: realtime model interrupt requested ${event.direction}:${event.type}${detail}`;
|
|
356
|
+
if (event.type === "conversation.item.truncate.skipped") return `discord voice: realtime model interrupt ignored ${event.direction}:${event.type}${detail}`;
|
|
357
|
+
if (event.type === "conversation.item.truncate") return `discord voice: realtime model audio truncated ${event.direction}:${event.type}${detail}`;
|
|
358
|
+
}
|
|
359
|
+
if (event.direction === "server") {
|
|
360
|
+
if (event.type === "response.cancelled") return `discord voice: realtime model interrupt confirmed ${event.direction}:${event.type}${detail}`;
|
|
361
|
+
if (event.type === "response.done" && event.detail?.includes("status=cancelled")) return `discord voice: realtime model interrupt confirmed ${event.direction}:${event.type}${detail}`;
|
|
362
|
+
if (event.type === "error" && event.detail === "Cancellation failed: no active response found") return `discord voice: realtime model interrupt raced ${event.direction}:${event.type}${detail}`;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
function readProviderConfigString(config, key) {
|
|
366
|
+
const value = config[key];
|
|
367
|
+
return typeof value === "string" && value.trim() ? value.trim() : void 0;
|
|
368
|
+
}
|
|
369
|
+
function readProviderConfigBoolean(config, key) {
|
|
370
|
+
const value = config?.[key];
|
|
371
|
+
return typeof value === "boolean" ? value : void 0;
|
|
372
|
+
}
|
|
373
|
+
function resolveDiscordVoiceMode(voice) {
|
|
374
|
+
const mode = voice?.mode;
|
|
375
|
+
if (mode === "stt-tts" || mode === "bidi") return mode;
|
|
376
|
+
return "agent-proxy";
|
|
377
|
+
}
|
|
378
|
+
function isDiscordRealtimeVoiceMode(mode) {
|
|
379
|
+
return mode === "agent-proxy" || mode === "bidi";
|
|
380
|
+
}
|
|
381
|
+
function isDiscordAgentProxyVoiceMode(mode) {
|
|
382
|
+
return mode === "agent-proxy";
|
|
383
|
+
}
|
|
384
|
+
function resolveDiscordRealtimeInterruptResponseOnInputAudio(params) {
|
|
385
|
+
const providerConfig = params.realtimeConfig?.providers?.[params.providerId];
|
|
386
|
+
return readProviderConfigBoolean(providerConfig, "interruptResponseOnInputAudio") ?? true;
|
|
387
|
+
}
|
|
388
|
+
function resolveDiscordRealtimeBargeIn(params) {
|
|
389
|
+
const configured = params.realtimeConfig?.bargeIn;
|
|
390
|
+
if (typeof configured === "boolean") return configured;
|
|
391
|
+
return resolveDiscordRealtimeInterruptResponseOnInputAudio(params);
|
|
392
|
+
}
|
|
393
|
+
function buildDiscordSpeakExactUserMessage(text) {
|
|
394
|
+
return ["Speak this exact OpenClaw answer to the Discord voice channel, without adding, removing, or rephrasing words.", `Answer: ${JSON.stringify(text)}`].join("\n");
|
|
395
|
+
}
|
|
396
|
+
var DiscordRealtimeVoiceSession = class {
|
|
397
|
+
constructor(params) {
|
|
398
|
+
this.params = params;
|
|
399
|
+
this.bridge = null;
|
|
400
|
+
this.outputStream = null;
|
|
401
|
+
this.stopped = false;
|
|
402
|
+
this.consultToolPolicy = "safe-read-only";
|
|
403
|
+
this.pendingSpeakerTurns = [];
|
|
404
|
+
this.playerIdleHandler = () => {
|
|
405
|
+
this.resetOutputStream();
|
|
406
|
+
};
|
|
407
|
+
this.talkback = createRealtimeVoiceAgentTalkbackQueue({
|
|
408
|
+
debounceMs: this.realtimeConfig?.debounceMs ?? DISCORD_REALTIME_TALKBACK_DEBOUNCE_MS,
|
|
409
|
+
isStopped: () => this.stopped,
|
|
410
|
+
logger: logger$2,
|
|
411
|
+
logPrefix: "[discord] realtime agent",
|
|
412
|
+
responseStyle: "Brief, natural spoken answer for a Discord voice channel.",
|
|
413
|
+
fallbackText: DISCORD_REALTIME_FALLBACK_TEXT,
|
|
414
|
+
consult: async ({ question, responseStyle, metadata }) => {
|
|
415
|
+
const context = isDiscordRealtimeSpeakerContext(metadata) ? metadata : void 0;
|
|
416
|
+
return { text: await this.runAgentTurn({
|
|
417
|
+
context,
|
|
418
|
+
message: formatVoiceIngressPrompt([question, responseStyle ? `Spoken style: ${responseStyle}` : void 0].filter(Boolean).join("\n\n"), context?.speakerLabel ?? "Discord voice speaker")
|
|
419
|
+
}) };
|
|
420
|
+
},
|
|
421
|
+
deliver: (text) => this.bridge?.sendUserMessage(buildDiscordSpeakExactUserMessage(text))
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
async connect() {
|
|
425
|
+
const resolved = resolveConfiguredRealtimeVoiceProvider({
|
|
426
|
+
configuredProviderId: this.realtimeConfig?.provider,
|
|
427
|
+
providerConfigs: buildProviderConfigs(this.realtimeConfig),
|
|
428
|
+
providerConfigOverrides: buildProviderConfigOverrides(this.realtimeConfig),
|
|
429
|
+
cfg: this.params.cfg,
|
|
430
|
+
defaultModel: this.realtimeConfig?.model,
|
|
431
|
+
noRegisteredProviderMessage: "No configured realtime voice provider registered"
|
|
432
|
+
});
|
|
433
|
+
const toolPolicy = resolveRealtimeVoiceAgentConsultToolPolicy(this.realtimeConfig?.toolPolicy, "safe-read-only");
|
|
434
|
+
this.consultToolPolicy = toolPolicy;
|
|
435
|
+
this.consultToolsAllow = resolveRealtimeVoiceAgentConsultToolsAllow(toolPolicy);
|
|
436
|
+
const consultPolicy = this.realtimeConfig?.consultPolicy ?? "auto";
|
|
437
|
+
const interruptResponseOnInputAudio = resolveDiscordRealtimeInterruptResponseOnInputAudio({
|
|
438
|
+
realtimeConfig: this.realtimeConfig,
|
|
439
|
+
providerId: resolved.provider.id
|
|
440
|
+
});
|
|
441
|
+
const instructions = buildDiscordRealtimeInstructions({
|
|
442
|
+
mode: this.params.mode,
|
|
443
|
+
instructions: this.realtimeConfig?.instructions,
|
|
444
|
+
toolPolicy,
|
|
445
|
+
consultPolicy
|
|
446
|
+
});
|
|
447
|
+
this.bridge = createRealtimeVoiceBridgeSession({
|
|
448
|
+
provider: resolved.provider,
|
|
449
|
+
providerConfig: resolved.providerConfig,
|
|
450
|
+
audioFormat: REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ,
|
|
451
|
+
instructions,
|
|
452
|
+
autoRespondToAudio: this.params.mode === "bidi",
|
|
453
|
+
interruptResponseOnInputAudio,
|
|
454
|
+
markStrategy: "ack-immediately",
|
|
455
|
+
tools: this.params.mode === "bidi" ? resolveRealtimeVoiceAgentConsultTools(toolPolicy) : [],
|
|
456
|
+
audioSink: {
|
|
457
|
+
isOpen: () => !this.stopped,
|
|
458
|
+
sendAudio: (audio) => this.sendOutputAudio(audio),
|
|
459
|
+
clearAudio: () => this.clearOutputAudio()
|
|
460
|
+
},
|
|
461
|
+
onTranscript: (role, text, isFinal) => {
|
|
462
|
+
if (isFinal && text.trim()) logger$2.info(`discord voice: realtime ${role} transcript (${text.length} chars): ${formatRealtimeLogPreview(text)}`);
|
|
463
|
+
if (!isFinal || role !== "user" || !isDiscordAgentProxyVoiceMode(this.params.mode)) return;
|
|
464
|
+
this.talkback.enqueue(text, this.consumePendingSpeakerContext());
|
|
465
|
+
},
|
|
466
|
+
onToolCall: (event, session) => this.handleToolCall(event, session),
|
|
467
|
+
onEvent: (event) => {
|
|
468
|
+
const detail = event.detail ? ` ${event.detail}` : "";
|
|
469
|
+
logVoiceVerbose(`realtime ${event.direction}:${event.type}${detail}`);
|
|
470
|
+
const interruptionLog = formatRealtimeInterruptionLog(event);
|
|
471
|
+
if (interruptionLog) logger$2.info(interruptionLog);
|
|
472
|
+
},
|
|
473
|
+
onError: (error) => logger$2.warn(`discord voice: realtime error: ${formatErrorMessage(error)}`),
|
|
474
|
+
onClose: (reason) => logVoiceVerbose(`realtime closed: ${reason}`)
|
|
475
|
+
});
|
|
476
|
+
const resolvedModel = readProviderConfigString(resolved.providerConfig, "model") ?? resolved.provider.defaultModel;
|
|
477
|
+
const resolvedVoice = readProviderConfigString(resolved.providerConfig, "voice");
|
|
478
|
+
logger$2.info(`discord voice: realtime bridge starting mode=${this.params.mode} provider=${resolved.provider.id} model=${resolvedModel ?? "default"} voice=${resolvedVoice ?? "default"} consultPolicy=${consultPolicy} toolPolicy=${toolPolicy} autoRespond=${this.params.mode === "bidi"} interruptResponse=${interruptResponseOnInputAudio} bargeIn=${resolveDiscordRealtimeBargeIn({
|
|
479
|
+
realtimeConfig: this.realtimeConfig,
|
|
480
|
+
providerId: resolved.provider.id
|
|
481
|
+
})} minBargeInAudioEndMs=${resolveDiscordRealtimeMinBargeInAudioEndMs(this.realtimeConfig)}`);
|
|
482
|
+
const voiceSdk = loadDiscordVoiceSdk();
|
|
483
|
+
this.params.entry.player.on(voiceSdk.AudioPlayerStatus.Idle, this.playerIdleHandler);
|
|
484
|
+
await this.bridge.connect();
|
|
485
|
+
logger$2.info(`discord voice: realtime bridge ready mode=${this.params.mode} provider=${resolved.provider.id} model=${resolvedModel ?? "default"} voice=${resolvedVoice ?? "default"}`);
|
|
486
|
+
}
|
|
487
|
+
close() {
|
|
488
|
+
this.stopped = true;
|
|
489
|
+
this.talkback.close();
|
|
490
|
+
this.pendingSpeakerTurns.length = 0;
|
|
491
|
+
this.clearOutputAudio();
|
|
492
|
+
this.bridge?.close();
|
|
493
|
+
this.bridge = null;
|
|
494
|
+
const voiceSdk = loadDiscordVoiceSdk();
|
|
495
|
+
this.params.entry.player.off(voiceSdk.AudioPlayerStatus.Idle, this.playerIdleHandler);
|
|
496
|
+
}
|
|
497
|
+
beginSpeakerTurn(context, userId) {
|
|
498
|
+
const turn = {
|
|
499
|
+
context: {
|
|
500
|
+
...context,
|
|
501
|
+
userId
|
|
502
|
+
},
|
|
503
|
+
hasAudio: false,
|
|
504
|
+
interruptedPlayback: false,
|
|
505
|
+
closed: false
|
|
506
|
+
};
|
|
507
|
+
this.pendingSpeakerTurns.push(turn);
|
|
508
|
+
this.prunePendingSpeakerTurns();
|
|
509
|
+
return {
|
|
510
|
+
sendInputAudio: (discordPcm48kStereo) => this.sendInputAudioForTurn(turn, discordPcm48kStereo),
|
|
511
|
+
close: () => {
|
|
512
|
+
turn.closed = true;
|
|
513
|
+
this.prunePendingSpeakerTurns();
|
|
514
|
+
}
|
|
515
|
+
};
|
|
516
|
+
}
|
|
517
|
+
sendInputAudioForTurn(turn, discordPcm48kStereo) {
|
|
518
|
+
if (!this.bridge || this.stopped) return;
|
|
519
|
+
turn.hasAudio = true;
|
|
520
|
+
const realtimePcm = convertDiscordPcm48kStereoToRealtimePcm24kMono(discordPcm48kStereo);
|
|
521
|
+
if (realtimePcm.length > 0) {
|
|
522
|
+
if (!turn.interruptedPlayback && this.isBargeInEnabled()) {
|
|
523
|
+
turn.interruptedPlayback = true;
|
|
524
|
+
logVoiceVerbose(`realtime barge-in from active speaker audio: guild ${this.params.entry.guildId} channel ${this.params.entry.channelId} user ${turn.context.userId}`);
|
|
525
|
+
this.handleBargeIn();
|
|
526
|
+
}
|
|
527
|
+
this.bridge.sendAudio(realtimePcm);
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
handleBargeIn() {
|
|
531
|
+
if (!this.isBargeInEnabled()) return;
|
|
532
|
+
this.bridge?.handleBargeIn({ audioPlaybackActive: true });
|
|
533
|
+
}
|
|
534
|
+
isBargeInEnabled() {
|
|
535
|
+
const providerId = this.realtimeConfig?.provider ?? "openai";
|
|
536
|
+
return resolveDiscordRealtimeBargeIn({
|
|
537
|
+
realtimeConfig: this.realtimeConfig,
|
|
538
|
+
providerId
|
|
539
|
+
});
|
|
540
|
+
}
|
|
541
|
+
get realtimeConfig() {
|
|
542
|
+
return this.params.discordConfig.voice?.realtime;
|
|
543
|
+
}
|
|
544
|
+
sendOutputAudio(realtimePcm24kMono) {
|
|
545
|
+
const discordPcm = convertRealtimePcm24kMonoToDiscordPcm48kStereo(realtimePcm24kMono);
|
|
546
|
+
if (discordPcm.length === 0) return;
|
|
547
|
+
this.ensureOutputStream().write(discordPcm);
|
|
548
|
+
}
|
|
549
|
+
ensureOutputStream() {
|
|
550
|
+
if (this.outputStream && !this.outputStream.destroyed) return this.outputStream;
|
|
551
|
+
const voiceSdk = loadDiscordVoiceSdk();
|
|
552
|
+
const stream = new PassThrough();
|
|
553
|
+
this.outputStream = stream;
|
|
554
|
+
stream.once("close", () => {
|
|
555
|
+
if (this.outputStream === stream) this.outputStream = null;
|
|
556
|
+
});
|
|
557
|
+
const resource = voiceSdk.createAudioResource(stream, { inputType: voiceSdk.StreamType.Raw });
|
|
558
|
+
this.params.entry.player.play(resource);
|
|
559
|
+
const realtimeConfig = this.realtimeConfig;
|
|
560
|
+
logger$2.info(`discord voice: realtime audio playback started guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} mode=${this.params.mode} model=${realtimeConfig?.model ?? "provider-default"} voice=${realtimeConfig?.voice ?? "provider-default"}`);
|
|
561
|
+
return stream;
|
|
562
|
+
}
|
|
563
|
+
clearOutputAudio() {
|
|
564
|
+
this.resetOutputStream();
|
|
565
|
+
this.params.entry.player.stop(true);
|
|
566
|
+
}
|
|
567
|
+
resetOutputStream() {
|
|
568
|
+
const stream = this.outputStream;
|
|
569
|
+
this.outputStream = null;
|
|
570
|
+
stream?.end();
|
|
571
|
+
stream?.destroy();
|
|
572
|
+
}
|
|
573
|
+
handleToolCall(event, session) {
|
|
574
|
+
const callId = event.callId || event.itemId;
|
|
575
|
+
if (this.params.mode !== "bidi") {
|
|
576
|
+
session.submitToolResult(callId, { error: `Tool "${event.name}" is only available in bidi Discord voice mode` });
|
|
577
|
+
return;
|
|
578
|
+
}
|
|
579
|
+
if (event.name !== REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
|
|
580
|
+
session.submitToolResult(callId, { error: `Tool "${event.name}" not available` });
|
|
581
|
+
return;
|
|
582
|
+
}
|
|
583
|
+
if (this.consultToolPolicy === "none") {
|
|
584
|
+
session.submitToolResult(callId, { error: `Tool "${event.name}" not available` });
|
|
585
|
+
return;
|
|
586
|
+
}
|
|
587
|
+
const consultMessage = buildRealtimeVoiceAgentConsultChatMessage(event.args);
|
|
588
|
+
logger$2.info(`discord voice: realtime consult requested call=${callId || "unknown"} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} question=${formatRealtimeLogPreview(consultMessage)}`);
|
|
589
|
+
if (session.bridge.supportsToolResultContinuation) session.submitToolResult(callId, buildRealtimeVoiceAgentConsultWorkingResponse("speaker"), { willContinue: true });
|
|
590
|
+
const context = this.consumePendingSpeakerContext();
|
|
591
|
+
if (!context) {
|
|
592
|
+
logger$2.warn(`discord voice: realtime consult has no speaker context call=${callId || "unknown"}`);
|
|
593
|
+
session.submitToolResult(callId, { error: "No Discord speaker context available" });
|
|
594
|
+
return;
|
|
595
|
+
}
|
|
596
|
+
this.runAgentTurn({
|
|
597
|
+
context,
|
|
598
|
+
message: consultMessage
|
|
599
|
+
}).then((text) => {
|
|
600
|
+
logger$2.info(`discord voice: realtime consult answer (${text.length} chars) voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} speaker=${context.speakerLabel} owner=${context.senderIsOwner}: ${formatRealtimeLogPreview(text)}`);
|
|
601
|
+
session.submitToolResult(callId, { text });
|
|
602
|
+
}).catch((error) => {
|
|
603
|
+
logger$2.warn(`discord voice: realtime consult failed call=${callId || "unknown"}: ${formatErrorMessage(error)}`);
|
|
604
|
+
session.submitToolResult(callId, { error: formatErrorMessage(error) });
|
|
605
|
+
});
|
|
606
|
+
}
|
|
607
|
+
async runAgentTurn(params) {
|
|
608
|
+
const context = params.context;
|
|
609
|
+
if (!context) return "";
|
|
610
|
+
return this.params.runAgentTurn({
|
|
611
|
+
context,
|
|
612
|
+
message: params.message,
|
|
613
|
+
toolsAllow: this.params.mode === "bidi" ? this.consultToolsAllow : void 0,
|
|
614
|
+
userId: context.userId
|
|
615
|
+
});
|
|
616
|
+
}
|
|
617
|
+
consumePendingSpeakerContext() {
|
|
618
|
+
this.prunePendingSpeakerTurns();
|
|
619
|
+
this.expireClosedSpeakerTurnsBeforeLaterAudio();
|
|
620
|
+
const index = this.pendingSpeakerTurns.findIndex((turn) => turn.hasAudio);
|
|
621
|
+
if (index < 0) return;
|
|
622
|
+
const [turn] = this.pendingSpeakerTurns.splice(index, 1);
|
|
623
|
+
this.prunePendingSpeakerTurns();
|
|
624
|
+
return turn?.context;
|
|
625
|
+
}
|
|
626
|
+
prunePendingSpeakerTurns() {
|
|
627
|
+
for (let index = this.pendingSpeakerTurns.length - 1; index >= 0; index -= 1) {
|
|
628
|
+
const turn = this.pendingSpeakerTurns[index];
|
|
629
|
+
if (turn?.closed && !turn.hasAudio) this.pendingSpeakerTurns.splice(index, 1);
|
|
630
|
+
}
|
|
631
|
+
while (this.pendingSpeakerTurns.length > DISCORD_REALTIME_PENDING_SPEAKER_CONTEXT_LIMIT) {
|
|
632
|
+
const completedIndex = this.pendingSpeakerTurns.findIndex((turn) => turn.closed);
|
|
633
|
+
this.pendingSpeakerTurns.splice(Math.max(completedIndex, 0), 1);
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
expireClosedSpeakerTurnsBeforeLaterAudio() {
|
|
637
|
+
let hasLaterAudio = false;
|
|
638
|
+
for (let index = this.pendingSpeakerTurns.length - 1; index >= 0; index -= 1) {
|
|
639
|
+
const turn = this.pendingSpeakerTurns[index];
|
|
640
|
+
if (!turn?.hasAudio) continue;
|
|
641
|
+
if (turn.closed && hasLaterAudio) {
|
|
642
|
+
this.pendingSpeakerTurns.splice(index, 1);
|
|
643
|
+
continue;
|
|
644
|
+
}
|
|
645
|
+
hasLaterAudio = true;
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
};
|
|
649
|
+
function isDiscordRealtimeSpeakerContext(value) {
|
|
650
|
+
return Boolean(value) && typeof value === "object" && typeof value.userId === "string" && typeof value.senderIsOwner === "boolean" && typeof value.speakerLabel === "string";
|
|
651
|
+
}
|
|
652
|
+
function buildProviderConfigs(realtimeConfig) {
|
|
653
|
+
const configs = realtimeConfig?.providers;
|
|
654
|
+
return configs && Object.keys(configs).length > 0 ? { ...configs } : void 0;
|
|
655
|
+
}
|
|
656
|
+
function buildProviderConfigOverrides(realtimeConfig) {
|
|
657
|
+
const overrides = {
|
|
658
|
+
...realtimeConfig?.model ? { model: realtimeConfig.model } : {},
|
|
659
|
+
...realtimeConfig?.voice ? { voice: realtimeConfig.voice } : {},
|
|
660
|
+
...typeof realtimeConfig?.minBargeInAudioEndMs === "number" ? { minBargeInAudioEndMs: realtimeConfig.minBargeInAudioEndMs } : {}
|
|
661
|
+
};
|
|
662
|
+
return Object.keys(overrides).length > 0 ? overrides : void 0;
|
|
663
|
+
}
|
|
664
|
+
function resolveDiscordRealtimeMinBargeInAudioEndMs(realtimeConfig) {
|
|
665
|
+
return typeof realtimeConfig?.minBargeInAudioEndMs === "number" ? realtimeConfig.minBargeInAudioEndMs : DISCORD_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS;
|
|
666
|
+
}
|
|
667
|
+
function buildDiscordRealtimeInstructions(params) {
|
|
668
|
+
const base = params.instructions ?? ["You are OpenClaw's Discord voice interface.", "Keep spoken replies concise, natural, and suitable for a live Discord voice channel."].join("\n");
|
|
669
|
+
if (isDiscordAgentProxyVoiceMode(params.mode)) return [
|
|
670
|
+
base,
|
|
671
|
+
"Mode: OpenClaw agent proxy.",
|
|
672
|
+
"Use audio input only to transcribe the speaker. Do not answer user speech by yourself.",
|
|
673
|
+
"When OpenClaw sends an exact answer to speak, say only that answer."
|
|
674
|
+
].join("\n\n");
|
|
675
|
+
return [base, buildRealtimeVoiceAgentConsultPolicyInstructions({
|
|
676
|
+
toolPolicy: params.toolPolicy,
|
|
677
|
+
consultPolicy: params.consultPolicy
|
|
678
|
+
})].filter(Boolean).join("\n\n");
|
|
679
|
+
}
|
|
196
680
|
//#endregion
|
|
197
681
|
//#region extensions/discord/src/voice/receive-recovery.ts
|
|
198
682
|
const DECRYPT_FAILURE_WINDOW_MS = 3e4;
|
|
@@ -261,53 +745,8 @@ function enableDaveReceivePassthrough(params) {
|
|
|
261
745
|
}
|
|
262
746
|
}
|
|
263
747
|
//#endregion
|
|
264
|
-
//#region extensions/discord/src/voice/sdk-runtime.ts
|
|
265
|
-
let cachedDiscordVoiceSdk = null;
|
|
266
|
-
function loadDiscordVoiceSdk() {
|
|
267
|
-
if (cachedDiscordVoiceSdk) return cachedDiscordVoiceSdk;
|
|
268
|
-
cachedDiscordVoiceSdk = createRequire(import.meta.url)("@discordjs/voice");
|
|
269
|
-
return cachedDiscordVoiceSdk;
|
|
270
|
-
}
|
|
271
|
-
//#endregion
|
|
272
|
-
//#region extensions/discord/src/voice/prompt.ts
|
|
273
|
-
const DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT = [
|
|
274
|
-
"You are OpenClaw's Discord voice interface in a live voice channel.",
|
|
275
|
-
"Discord voice reply requirements:",
|
|
276
|
-
"- Return only the concise text that should be spoken aloud in the voice channel.",
|
|
277
|
-
"- Treat the transcript as speech-to-text from a live conversation; repair obvious transcription artifacts and ignore repeated partial fragments caused by voice buffering.",
|
|
278
|
-
"- If the transcript is garbled, incomplete, or missing the user's intent, ask one brief clarifying question instead of guessing.",
|
|
279
|
-
"- If the request needs deeper reasoning, current information, or tools, use the available tools before answering.",
|
|
280
|
-
"- Do not call the tts tool; Discord voice will synthesize and play the returned text.",
|
|
281
|
-
"- Do not reply with NO_REPLY unless no spoken response is appropriate.",
|
|
282
|
-
"- Keep the response brief, natural, and conversational. Prefer one to three short sentences.",
|
|
283
|
-
"- Avoid markdown tables, code fences, citations, and visual formatting unless the user explicitly asks for something that cannot be spoken naturally."
|
|
284
|
-
].join("\n");
|
|
285
|
-
function formatVoiceIngressPrompt(transcript, speakerLabel) {
|
|
286
|
-
const cleanedTranscript = transcript.trim();
|
|
287
|
-
const cleanedLabel = speakerLabel?.trim();
|
|
288
|
-
return [DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT, cleanedLabel ? [`Voice transcript from speaker "${cleanedLabel}":`, cleanedTranscript].join("\n") : cleanedTranscript].join("\n\n");
|
|
289
|
-
}
|
|
290
|
-
const CAPTURE_FINALIZE_GRACE_MS = 2500;
|
|
291
|
-
const VOICE_CONNECT_READY_TIMEOUT_MS = 3e4;
|
|
292
|
-
const VOICE_RECONNECT_GRACE_MS = 15e3;
|
|
293
|
-
const PLAYBACK_READY_TIMEOUT_MS = 6e4;
|
|
294
|
-
const SPEAKING_READY_TIMEOUT_MS = 6e4;
|
|
295
|
-
function resolveVoiceTimeoutMs(value, fallbackMs) {
|
|
296
|
-
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) return fallbackMs;
|
|
297
|
-
return Math.floor(value);
|
|
298
|
-
}
|
|
299
|
-
function logVoiceVerbose(message) {
|
|
300
|
-
logVerbose(`discord voice: ${message}`);
|
|
301
|
-
}
|
|
302
|
-
function isVoiceChannel(type) {
|
|
303
|
-
return type === discord_exports.ChannelType.GuildVoice || type === discord_exports.ChannelType.GuildStageVoice;
|
|
304
|
-
}
|
|
305
|
-
//#endregion
|
|
306
748
|
//#region extensions/discord/src/voice/sanitize.ts
|
|
307
749
|
const SPEECH_EMOJI_RE = /(?:\p{Extended_Pictographic}(?:\uFE0F|\u200D|\p{Extended_Pictographic}|\p{Emoji_Modifier})*)+/gu;
|
|
308
|
-
function escapeRegExp(value) {
|
|
309
|
-
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
310
|
-
}
|
|
311
750
|
function stripEmojiForSpeech(text) {
|
|
312
751
|
return text.replace(SPEECH_EMOJI_RE, " ").replace(/\s+([?!.,:;])/g, "$1").replace(/[ \t]{2,}/g, " ").replace(/ *\n */g, "\n").trim();
|
|
313
752
|
}
|
|
@@ -384,7 +823,22 @@ async function synthesizeVoiceReplyAudio(params) {
|
|
|
384
823
|
});
|
|
385
824
|
const speakText = sanitizeVoiceReplyTextForSpeech(directive.overrides.ttsText ?? directive.cleanedText.trim(), params.speakerLabel);
|
|
386
825
|
if (!speakText) return { status: "empty" };
|
|
387
|
-
const
|
|
826
|
+
const runtime = getDiscordRuntime();
|
|
827
|
+
const streamResult = await runtime.tts.textToSpeechStream?.({
|
|
828
|
+
text: speakText,
|
|
829
|
+
cfg: ttsCfg,
|
|
830
|
+
channel: "discord",
|
|
831
|
+
overrides: directive.overrides,
|
|
832
|
+
disableFallback: true
|
|
833
|
+
});
|
|
834
|
+
if (streamResult?.success && streamResult.audioStream) return {
|
|
835
|
+
status: "ok",
|
|
836
|
+
mode: "stream",
|
|
837
|
+
audioStream: streamResult.audioStream,
|
|
838
|
+
release: streamResult.release,
|
|
839
|
+
speakText
|
|
840
|
+
};
|
|
841
|
+
const result = await runtime.tts.textToSpeech({
|
|
388
842
|
text: speakText,
|
|
389
843
|
cfg: ttsCfg,
|
|
390
844
|
channel: "discord",
|
|
@@ -396,39 +850,34 @@ async function synthesizeVoiceReplyAudio(params) {
|
|
|
396
850
|
};
|
|
397
851
|
return {
|
|
398
852
|
status: "ok",
|
|
853
|
+
mode: "file",
|
|
399
854
|
audioPath: result.audioPath,
|
|
400
855
|
speakText
|
|
401
856
|
};
|
|
402
857
|
}
|
|
403
858
|
//#endregion
|
|
404
859
|
//#region extensions/discord/src/voice/segment.ts
|
|
405
|
-
const
|
|
860
|
+
const VOICE_TRANSCRIPT_LOG_PREVIEW_CHARS = 500;
|
|
406
861
|
const logger$1 = createSubsystemLogger("discord/voice");
|
|
862
|
+
function formatVoiceTranscriptLogPreview(text) {
|
|
863
|
+
const oneLine = text.replace(/\s+/g, " ").trim();
|
|
864
|
+
if (oneLine.length <= VOICE_TRANSCRIPT_LOG_PREVIEW_CHARS) return oneLine;
|
|
865
|
+
return `${oneLine.slice(0, VOICE_TRANSCRIPT_LOG_PREVIEW_CHARS)}...`;
|
|
866
|
+
}
|
|
407
867
|
async function processDiscordVoiceSegment(params) {
|
|
408
868
|
const { entry, wavPath, userId, durationSeconds } = params;
|
|
409
869
|
logVoiceVerbose(`segment processing (${durationSeconds.toFixed(2)}s): guild ${entry.guildId} channel ${entry.channelId}`);
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
const access = await authorizeDiscordVoiceIngress({
|
|
870
|
+
const ingress = await resolveDiscordVoiceIngressContext({
|
|
871
|
+
entry,
|
|
872
|
+
userId,
|
|
414
873
|
cfg: params.cfg,
|
|
415
874
|
discordConfig: params.discordConfig,
|
|
416
|
-
guildName: entry.guildName,
|
|
417
|
-
guildId: entry.guildId,
|
|
418
|
-
channelId: entry.channelId,
|
|
419
|
-
channelName: entry.channelName,
|
|
420
|
-
channelSlug: entry.channelName ? normalizeDiscordSlug(entry.channelName) : "",
|
|
421
|
-
channelLabel: formatMention({ channelId: entry.channelId }),
|
|
422
|
-
memberRoleIds: speakerIdentity.memberRoleIds,
|
|
423
875
|
ownerAllowFrom: params.ownerAllowFrom,
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
name: speakerIdentity.name,
|
|
427
|
-
tag: speakerIdentity.tag
|
|
428
|
-
}
|
|
876
|
+
fetchGuildName: params.fetchGuildName,
|
|
877
|
+
speakerContext: params.speakerContext
|
|
429
878
|
});
|
|
430
|
-
if (!
|
|
431
|
-
logVoiceVerbose(`segment unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId}
|
|
879
|
+
if (!ingress) {
|
|
880
|
+
logVoiceVerbose(`segment unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
|
|
432
881
|
return;
|
|
433
882
|
}
|
|
434
883
|
const transcript = await transcribeVoiceAudio({
|
|
@@ -441,21 +890,24 @@ async function processDiscordVoiceSegment(params) {
|
|
|
441
890
|
return;
|
|
442
891
|
}
|
|
443
892
|
logVoiceVerbose(`transcription ok (${transcript.length} chars): guild ${entry.guildId} channel ${entry.channelId}`);
|
|
444
|
-
|
|
445
|
-
const
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
message:
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
893
|
+
logVoiceVerbose(`transcript from ${ingress.speakerLabel} (${userId}) in guild ${entry.guildId} channel ${entry.channelId}: ${formatVoiceTranscriptLogPreview(transcript)}`);
|
|
894
|
+
const turn = await runDiscordVoiceAgentTurn({
|
|
895
|
+
entry,
|
|
896
|
+
userId,
|
|
897
|
+
message: formatVoiceIngressPrompt(transcript, ingress.speakerLabel),
|
|
898
|
+
cfg: params.cfg,
|
|
899
|
+
discordConfig: params.discordConfig,
|
|
900
|
+
runtime: params.runtime,
|
|
901
|
+
context: ingress,
|
|
902
|
+
ownerAllowFrom: params.ownerAllowFrom,
|
|
903
|
+
fetchGuildName: params.fetchGuildName,
|
|
904
|
+
speakerContext: params.speakerContext
|
|
905
|
+
});
|
|
906
|
+
if (!turn) {
|
|
907
|
+
logVoiceVerbose(`segment unauthorized before agent turn: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
|
|
908
|
+
return;
|
|
909
|
+
}
|
|
910
|
+
const replyText = turn.text;
|
|
459
911
|
if (!replyText) {
|
|
460
912
|
logVoiceVerbose(`reply empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
|
|
461
913
|
return;
|
|
@@ -465,7 +917,7 @@ async function processDiscordVoiceSegment(params) {
|
|
|
465
917
|
cfg: params.cfg,
|
|
466
918
|
override: params.discordConfig.voice?.tts,
|
|
467
919
|
replyText,
|
|
468
|
-
speakerLabel:
|
|
920
|
+
speakerLabel: ingress.speakerLabel
|
|
469
921
|
});
|
|
470
922
|
if (voiceReplyAudio.status === "empty") {
|
|
471
923
|
logVoiceVerbose(`tts skipped (empty): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
|
|
@@ -477,13 +929,25 @@ async function processDiscordVoiceSegment(params) {
|
|
|
477
929
|
}
|
|
478
930
|
logVoiceVerbose(`tts ok (${voiceReplyAudio.speakText.length} chars): guild ${entry.guildId} channel ${entry.channelId}`);
|
|
479
931
|
params.enqueuePlayback(entry, async () => {
|
|
480
|
-
logVoiceVerbose(`playback start: guild ${entry.guildId} channel ${entry.channelId} file ${path.basename(voiceReplyAudio.audioPath)}`);
|
|
481
932
|
const voiceSdk = loadDiscordVoiceSdk();
|
|
482
|
-
const
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
933
|
+
const releaseAudioStream = voiceReplyAudio.mode === "stream" ? voiceReplyAudio.release : void 0;
|
|
934
|
+
try {
|
|
935
|
+
if (voiceReplyAudio.mode === "stream") {
|
|
936
|
+
logVoiceVerbose(`playback start: guild ${entry.guildId} channel ${entry.channelId} stream`);
|
|
937
|
+
const nodeStream = Readable.fromWeb(voiceReplyAudio.audioStream);
|
|
938
|
+
const resource = voiceSdk.createAudioResource(nodeStream);
|
|
939
|
+
entry.player.play(resource);
|
|
940
|
+
} else {
|
|
941
|
+
logVoiceVerbose(`playback start: guild ${entry.guildId} channel ${entry.channelId} file ${path.basename(voiceReplyAudio.audioPath)}`);
|
|
942
|
+
const resource = voiceSdk.createAudioResource(voiceReplyAudio.audioPath);
|
|
943
|
+
entry.player.play(resource);
|
|
944
|
+
}
|
|
945
|
+
await voiceSdk.entersState(entry.player, voiceSdk.AudioPlayerStatus.Playing, PLAYBACK_READY_TIMEOUT_MS).catch(() => void 0);
|
|
946
|
+
await voiceSdk.entersState(entry.player, voiceSdk.AudioPlayerStatus.Idle, SPEAKING_READY_TIMEOUT_MS).catch(() => void 0);
|
|
947
|
+
logVoiceVerbose(`playback done: guild ${entry.guildId} channel ${entry.channelId}`);
|
|
948
|
+
} finally {
|
|
949
|
+
await releaseAudioStream?.();
|
|
950
|
+
}
|
|
487
951
|
});
|
|
488
952
|
}
|
|
489
953
|
//#endregion
|
|
@@ -580,6 +1044,12 @@ var DiscordVoiceSpeakerContextResolver = class {
|
|
|
580
1044
|
//#endregion
|
|
581
1045
|
//#region extensions/discord/src/voice/manager.ts
|
|
582
1046
|
const logger = createSubsystemLogger("discord/voice");
|
|
1047
|
+
const VOICE_LOG_PREVIEW_CHARS = 500;
|
|
1048
|
+
function formatVoiceLogPreview(text) {
|
|
1049
|
+
const oneLine = text.replace(/\s+/g, " ").trim();
|
|
1050
|
+
if (oneLine.length <= VOICE_LOG_PREVIEW_CHARS) return oneLine;
|
|
1051
|
+
return `${oneLine.slice(0, VOICE_LOG_PREVIEW_CHARS)}...`;
|
|
1052
|
+
}
|
|
583
1053
|
function isVoiceConnectionDestroyed(connection, voiceSdk) {
|
|
584
1054
|
return connection.state.status === voiceSdk.VoiceConnectionStatus.Destroyed;
|
|
585
1055
|
}
|
|
@@ -602,6 +1072,44 @@ function destroyVoiceConnectionSafely(params) {
|
|
|
602
1072
|
function startAutoJoin(manager) {
|
|
603
1073
|
manager.autoJoin().catch((err) => logger.warn(`discord voice: autoJoin failed: ${formatErrorMessage(err)}`));
|
|
604
1074
|
}
|
|
1075
|
+
function resolveDiscordVoiceAgentRoute(params) {
|
|
1076
|
+
const voiceRoute = resolveAgentRoute({
|
|
1077
|
+
cfg: params.cfg,
|
|
1078
|
+
channel: "discord",
|
|
1079
|
+
accountId: params.accountId,
|
|
1080
|
+
guildId: params.guildId,
|
|
1081
|
+
peer: {
|
|
1082
|
+
kind: "channel",
|
|
1083
|
+
id: params.sessionChannelId
|
|
1084
|
+
}
|
|
1085
|
+
});
|
|
1086
|
+
const agentSession = params.voiceConfig?.agentSession;
|
|
1087
|
+
if (agentSession?.mode !== "target") return {
|
|
1088
|
+
route: voiceRoute,
|
|
1089
|
+
voiceRoute,
|
|
1090
|
+
agentSessionMode: "voice",
|
|
1091
|
+
agentSessionTarget: void 0
|
|
1092
|
+
};
|
|
1093
|
+
const target = agentSession.target?.trim();
|
|
1094
|
+
if (!target) throw new Error("channels.discord.voice.agentSession.target is required when mode is \"target\"");
|
|
1095
|
+
const parsed = parseDiscordTarget(target, { defaultKind: "channel" });
|
|
1096
|
+
if (!parsed) throw new Error(`Invalid Discord voice agent session target "${target}"`);
|
|
1097
|
+
return {
|
|
1098
|
+
route: resolveAgentRoute({
|
|
1099
|
+
cfg: params.cfg,
|
|
1100
|
+
channel: "discord",
|
|
1101
|
+
accountId: params.accountId,
|
|
1102
|
+
guildId: params.guildId,
|
|
1103
|
+
peer: {
|
|
1104
|
+
kind: parsed.kind === "user" ? "direct" : "channel",
|
|
1105
|
+
id: parsed.id
|
|
1106
|
+
}
|
|
1107
|
+
}),
|
|
1108
|
+
voiceRoute,
|
|
1109
|
+
agentSessionMode: "target",
|
|
1110
|
+
agentSessionTarget: parsed.normalized
|
|
1111
|
+
};
|
|
1112
|
+
}
|
|
605
1113
|
var DiscordVoiceManager$1 = class {
|
|
606
1114
|
constructor(params) {
|
|
607
1115
|
this.params = params;
|
|
@@ -629,17 +1137,25 @@ var DiscordVoiceManager$1 = class {
|
|
|
629
1137
|
if (this.autoJoinTask) return this.autoJoinTask;
|
|
630
1138
|
this.autoJoinTask = (async () => {
|
|
631
1139
|
const entries = this.params.discordConfig.voice?.autoJoin ?? [];
|
|
632
|
-
|
|
633
|
-
const
|
|
1140
|
+
const entriesByGuild = /* @__PURE__ */ new Map();
|
|
1141
|
+
const duplicateGuilds = /* @__PURE__ */ new Set();
|
|
634
1142
|
for (const entry of entries) {
|
|
635
1143
|
const guildId = entry.guildId.trim();
|
|
636
|
-
|
|
637
|
-
if (
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
1144
|
+
const channelId = entry.channelId.trim();
|
|
1145
|
+
if (!guildId || !channelId) continue;
|
|
1146
|
+
if (entriesByGuild.has(guildId)) duplicateGuilds.add(guildId);
|
|
1147
|
+
entriesByGuild.set(guildId, {
|
|
1148
|
+
guildId,
|
|
1149
|
+
channelId
|
|
1150
|
+
});
|
|
1151
|
+
}
|
|
1152
|
+
logVoiceVerbose(`autoJoin: ${entries.length} entries, ${entriesByGuild.size} guilds`);
|
|
1153
|
+
for (const guildId of duplicateGuilds) {
|
|
1154
|
+
const selected = entriesByGuild.get(guildId);
|
|
1155
|
+
if (selected) logger.warn(`discord voice: autoJoin has multiple entries for guild ${guildId}; using channel ${selected.channelId}`);
|
|
1156
|
+
}
|
|
1157
|
+
for (const entry of entriesByGuild.values()) {
|
|
1158
|
+
logVoiceVerbose(`autoJoin: joining guild ${entry.guildId} channel ${entry.channelId}`);
|
|
643
1159
|
await this.join({
|
|
644
1160
|
guildId: entry.guildId,
|
|
645
1161
|
channelId: entry.channelId
|
|
@@ -700,6 +1216,7 @@ var DiscordVoiceManager$1 = class {
|
|
|
700
1216
|
message: "Discord voice plugin is not available."
|
|
701
1217
|
};
|
|
702
1218
|
const voiceConfig = this.params.discordConfig.voice;
|
|
1219
|
+
const voiceMode = resolveDiscordVoiceMode(voiceConfig);
|
|
703
1220
|
const adapterCreator = voicePlugin.getGatewayAdapterCreator(guildId);
|
|
704
1221
|
const daveEncryption = voiceConfig?.daveEncryption;
|
|
705
1222
|
const decryptionFailureTolerance = voiceConfig?.decryptionFailureTolerance;
|
|
@@ -744,16 +1261,30 @@ var DiscordVoiceManager$1 = class {
|
|
|
744
1261
|
}
|
|
745
1262
|
const sessionChannelId = channelInfo?.id ?? channelId;
|
|
746
1263
|
if (sessionChannelId !== channelId) logVoiceVerbose(`join: using session channel ${sessionChannelId} for voice channel ${channelId}`);
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
}
|
|
756
|
-
})
|
|
1264
|
+
let routeInfo;
|
|
1265
|
+
try {
|
|
1266
|
+
routeInfo = resolveDiscordVoiceAgentRoute({
|
|
1267
|
+
cfg: this.params.cfg,
|
|
1268
|
+
accountId: this.params.accountId,
|
|
1269
|
+
guildId,
|
|
1270
|
+
sessionChannelId,
|
|
1271
|
+
voiceConfig
|
|
1272
|
+
});
|
|
1273
|
+
} catch (err) {
|
|
1274
|
+
destroyVoiceConnectionSafely({
|
|
1275
|
+
connection,
|
|
1276
|
+
voiceSdk,
|
|
1277
|
+
reason: `voice agent session route failed guild ${guildId} channel ${channelId}`
|
|
1278
|
+
});
|
|
1279
|
+
return {
|
|
1280
|
+
ok: false,
|
|
1281
|
+
message: `Failed to resolve Discord voice agent session: ${formatErrorMessage(err)}`,
|
|
1282
|
+
guildId,
|
|
1283
|
+
channelId
|
|
1284
|
+
};
|
|
1285
|
+
}
|
|
1286
|
+
const { route, voiceRoute, agentSessionMode, agentSessionTarget } = routeInfo;
|
|
1287
|
+
logger.info(`discord voice: joining guild=${guildId} channel=${channelId} mode=${voiceMode} agent=${route.agentId} voiceSession=${voiceRoute.sessionKey} supervisorSession=${route.sessionKey} agentSessionMode=${agentSessionMode}${agentSessionTarget ? ` agentSessionTarget=${agentSessionTarget}` : ""} voiceModel=${voiceConfig?.model ?? "route-default"} realtimeProvider=${voiceConfig?.realtime?.provider ?? "auto"} realtimeModel=${voiceConfig?.realtime?.model ?? "provider-default"} realtimeVoice=${voiceConfig?.realtime?.voice ?? "provider-default"}`);
|
|
757
1288
|
const player = voiceSdk.createAudioPlayer();
|
|
758
1289
|
connection.subscribe(player);
|
|
759
1290
|
let speakingHandler;
|
|
@@ -761,15 +1292,35 @@ var DiscordVoiceManager$1 = class {
|
|
|
761
1292
|
let disconnectedHandler;
|
|
762
1293
|
let destroyedHandler;
|
|
763
1294
|
let playerErrorHandler;
|
|
1295
|
+
let stopped = false;
|
|
764
1296
|
const clearSessionIfCurrent = () => {
|
|
765
1297
|
if (this.sessions.get(guildId)?.connection === connection) this.sessions.delete(guildId);
|
|
766
1298
|
};
|
|
1299
|
+
const stopEntry = (entry, options) => {
|
|
1300
|
+
if (stopped) return;
|
|
1301
|
+
stopped = true;
|
|
1302
|
+
if (speakingHandler) connection.receiver.speaking.off("start", speakingHandler);
|
|
1303
|
+
if (speakingEndHandler) connection.receiver.speaking.off("end", speakingEndHandler);
|
|
1304
|
+
stopVoiceCaptureState(entry.capture);
|
|
1305
|
+
if (disconnectedHandler) connection.off(voiceSdk.VoiceConnectionStatus.Disconnected, disconnectedHandler);
|
|
1306
|
+
if (destroyedHandler) connection.off(voiceSdk.VoiceConnectionStatus.Destroyed, destroyedHandler);
|
|
1307
|
+
if (playerErrorHandler) player.off("error", playerErrorHandler);
|
|
1308
|
+
entry.realtime?.close();
|
|
1309
|
+
entry.realtime = void 0;
|
|
1310
|
+
player.stop();
|
|
1311
|
+
if (options.destroyConnection) destroyVoiceConnectionSafely({
|
|
1312
|
+
connection,
|
|
1313
|
+
voiceSdk,
|
|
1314
|
+
reason: options.reason
|
|
1315
|
+
});
|
|
1316
|
+
};
|
|
767
1317
|
const entry = {
|
|
768
1318
|
guildId,
|
|
769
1319
|
guildName: channelInfo && "guild" in channelInfo && channelInfo.guild && typeof channelInfo.guild.name === "string" ? channelInfo.guild.name : void 0,
|
|
770
1320
|
channelId,
|
|
771
1321
|
channelName: channelInfo && "name" in channelInfo && typeof channelInfo.name === "string" ? channelInfo.name : void 0,
|
|
772
1322
|
sessionChannelId,
|
|
1323
|
+
voiceSessionKey: voiceRoute.sessionKey,
|
|
773
1324
|
route,
|
|
774
1325
|
connection,
|
|
775
1326
|
player,
|
|
@@ -778,20 +1329,43 @@ var DiscordVoiceManager$1 = class {
|
|
|
778
1329
|
capture: createVoiceCaptureState(),
|
|
779
1330
|
receiveRecovery: createVoiceReceiveRecoveryState(),
|
|
780
1331
|
stop: () => {
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
1332
|
+
stopEntry(entry, {
|
|
1333
|
+
destroyConnection: true,
|
|
1334
|
+
reason: `stop guild ${guildId} channel ${channelId}`
|
|
1335
|
+
});
|
|
1336
|
+
}
|
|
1337
|
+
};
|
|
1338
|
+
if (voiceMode !== "stt-tts") {
|
|
1339
|
+
entry.realtime = new DiscordRealtimeVoiceSession({
|
|
1340
|
+
cfg: this.params.cfg,
|
|
1341
|
+
discordConfig: this.params.discordConfig,
|
|
1342
|
+
entry,
|
|
1343
|
+
mode: voiceMode,
|
|
1344
|
+
runAgentTurn: ({ context, message, toolsAllow, userId }) => this.runDiscordRealtimeAgentTurn({
|
|
1345
|
+
context,
|
|
1346
|
+
entry,
|
|
1347
|
+
message,
|
|
1348
|
+
toolsAllow,
|
|
1349
|
+
userId
|
|
1350
|
+
})
|
|
1351
|
+
});
|
|
1352
|
+
try {
|
|
1353
|
+
await entry.realtime.connect();
|
|
1354
|
+
} catch (err) {
|
|
1355
|
+
entry.realtime.close();
|
|
788
1356
|
destroyVoiceConnectionSafely({
|
|
789
1357
|
connection,
|
|
790
1358
|
voiceSdk,
|
|
791
|
-
reason: `
|
|
1359
|
+
reason: `realtime setup failed guild ${guildId} channel ${channelId}`
|
|
792
1360
|
});
|
|
1361
|
+
return {
|
|
1362
|
+
ok: false,
|
|
1363
|
+
message: `Failed to start Discord realtime voice: ${formatErrorMessage(err)}`,
|
|
1364
|
+
guildId,
|
|
1365
|
+
channelId
|
|
1366
|
+
};
|
|
793
1367
|
}
|
|
794
|
-
}
|
|
1368
|
+
}
|
|
795
1369
|
speakingHandler = (userId) => {
|
|
796
1370
|
this.handleSpeakingStart(entry, userId).catch((err) => {
|
|
797
1371
|
logger.warn(`discord voice: capture failed: ${formatErrorMessage(err)}`);
|
|
@@ -808,15 +1382,18 @@ var DiscordVoiceManager$1 = class {
|
|
|
808
1382
|
} catch (err) {
|
|
809
1383
|
logger.warn(`discord voice: disconnect recovery failed: guild ${guildId} channel ${channelId} timeout=${reconnectGraceMs}ms error=${formatErrorMessage(err)}; destroying connection`);
|
|
810
1384
|
clearSessionIfCurrent();
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
voiceSdk,
|
|
1385
|
+
stopEntry(entry, {
|
|
1386
|
+
destroyConnection: true,
|
|
814
1387
|
reason: `disconnect recovery failed guild ${guildId} channel ${channelId}`
|
|
815
1388
|
});
|
|
816
1389
|
}
|
|
817
1390
|
};
|
|
818
1391
|
destroyedHandler = () => {
|
|
819
1392
|
clearSessionIfCurrent();
|
|
1393
|
+
stopEntry(entry, {
|
|
1394
|
+
destroyConnection: false,
|
|
1395
|
+
reason: `destroyed guild ${guildId} channel ${channelId}`
|
|
1396
|
+
});
|
|
820
1397
|
};
|
|
821
1398
|
playerErrorHandler = (err) => {
|
|
822
1399
|
logger.warn(`discord voice: playback error: ${formatErrorMessage(err)}`);
|
|
@@ -828,6 +1405,7 @@ var DiscordVoiceManager$1 = class {
|
|
|
828
1405
|
connection.on(voiceSdk.VoiceConnectionStatus.Destroyed, destroyedHandler);
|
|
829
1406
|
player.on("error", playerErrorHandler);
|
|
830
1407
|
this.sessions.set(guildId, entry);
|
|
1408
|
+
logger.info(`discord voice: joined guild=${guildId} channel=${channelId} mode=${voiceMode} agent=${route.agentId} voiceSession=${voiceRoute.sessionKey} supervisorSession=${route.sessionKey} voiceModel=${voiceConfig?.model ?? "route-default"}`);
|
|
831
1409
|
return {
|
|
832
1410
|
ok: true,
|
|
833
1411
|
message: `Joined ${formatMention({ channelId })}.`,
|
|
@@ -892,8 +1470,26 @@ var DiscordVoiceManager$1 = class {
|
|
|
892
1470
|
}
|
|
893
1471
|
logVoiceVerbose(`capture start: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
|
|
894
1472
|
const voiceSdk = loadDiscordVoiceSdk();
|
|
1473
|
+
const voiceMode = resolveDiscordVoiceMode(this.params.discordConfig.voice);
|
|
1474
|
+
const realtime = entry.realtime && isDiscordRealtimeVoiceMode(voiceMode) ? entry.realtime : void 0;
|
|
1475
|
+
if (entry.player.state.status === voiceSdk.AudioPlayerStatus.Playing && !realtime) {
|
|
1476
|
+
logVoiceVerbose(`capture ignored during playback: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
|
|
1477
|
+
return;
|
|
1478
|
+
}
|
|
1479
|
+
const realtimeIngress = realtime ? await this.resolveDiscordVoiceIngressContext(entry, userId) : void 0;
|
|
1480
|
+
if (realtime && !realtimeIngress) {
|
|
1481
|
+
logVoiceVerbose(`realtime capture unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
|
|
1482
|
+
return;
|
|
1483
|
+
}
|
|
1484
|
+
if (entry.player.state.status === voiceSdk.AudioPlayerStatus.Playing && realtime) {
|
|
1485
|
+
if (!realtime.isBargeInEnabled()) {
|
|
1486
|
+
logger.info(`discord voice: realtime capture ignored during playback (barge-in disabled): guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
|
|
1487
|
+
return;
|
|
1488
|
+
}
|
|
1489
|
+
logVoiceVerbose(`realtime barge-in: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
|
|
1490
|
+
realtime.handleBargeIn();
|
|
1491
|
+
}
|
|
895
1492
|
this.enableDaveReceivePassthrough(entry, `speaker ${userId} start`, 15);
|
|
896
|
-
if (entry.player.state.status === voiceSdk.AudioPlayerStatus.Playing) entry.player.stop(true);
|
|
897
1493
|
const stream = entry.connection.receiver.subscribe(userId, { end: { behavior: voiceSdk.EndBehaviorType.Manual } });
|
|
898
1494
|
const generation = beginVoiceCapture(entry.capture, userId, stream);
|
|
899
1495
|
let streamAborted = false;
|
|
@@ -902,6 +1498,19 @@ var DiscordVoiceManager$1 = class {
|
|
|
902
1498
|
this.handleReceiveError(entry, err);
|
|
903
1499
|
});
|
|
904
1500
|
try {
|
|
1501
|
+
if (realtime && realtimeIngress) {
|
|
1502
|
+
const turn = realtime.beginSpeakerTurn(realtimeIngress, userId);
|
|
1503
|
+
try {
|
|
1504
|
+
await this.processRealtimeAudioCapture({
|
|
1505
|
+
entry,
|
|
1506
|
+
stream,
|
|
1507
|
+
turn
|
|
1508
|
+
});
|
|
1509
|
+
} finally {
|
|
1510
|
+
turn.close();
|
|
1511
|
+
}
|
|
1512
|
+
return;
|
|
1513
|
+
}
|
|
905
1514
|
const pcm = await decodeOpusStream(stream, {
|
|
906
1515
|
onVerbose: logVoiceVerbose,
|
|
907
1516
|
onWarn: (message) => logger.warn(message)
|
|
@@ -929,6 +1538,61 @@ var DiscordVoiceManager$1 = class {
|
|
|
929
1538
|
finishVoiceCapture(entry.capture, userId, generation);
|
|
930
1539
|
}
|
|
931
1540
|
}
|
|
1541
|
+
async processRealtimeAudioCapture(params) {
|
|
1542
|
+
const { entry, stream, turn } = params;
|
|
1543
|
+
let resetReceiveRecovery = false;
|
|
1544
|
+
await decodeOpusStreamChunks(stream, {
|
|
1545
|
+
onChunk: (pcm) => {
|
|
1546
|
+
if (!resetReceiveRecovery && pcm.length > 0) {
|
|
1547
|
+
resetReceiveRecovery = true;
|
|
1548
|
+
this.resetDecryptFailureState(entry);
|
|
1549
|
+
}
|
|
1550
|
+
turn.sendInputAudio(pcm);
|
|
1551
|
+
},
|
|
1552
|
+
onVerbose: logVoiceVerbose,
|
|
1553
|
+
onWarn: (message) => logger.warn(message)
|
|
1554
|
+
});
|
|
1555
|
+
}
|
|
1556
|
+
async resolveDiscordVoiceIngressContext(entry, userId) {
|
|
1557
|
+
return await resolveDiscordVoiceIngressContext({
|
|
1558
|
+
entry,
|
|
1559
|
+
userId,
|
|
1560
|
+
cfg: this.params.cfg,
|
|
1561
|
+
discordConfig: this.params.discordConfig,
|
|
1562
|
+
ownerAllowFrom: this.ownerAllowFrom,
|
|
1563
|
+
fetchGuildName: async (guildId) => {
|
|
1564
|
+
const guild = await this.params.client.fetchGuild(guildId).catch(() => null);
|
|
1565
|
+
return guild && typeof guild.name === "string" && guild.name.trim() ? guild.name : void 0;
|
|
1566
|
+
},
|
|
1567
|
+
speakerContext: this.speakerContext
|
|
1568
|
+
});
|
|
1569
|
+
}
|
|
1570
|
+
async runDiscordRealtimeAgentTurn(params) {
|
|
1571
|
+
const { context, entry, message, toolsAllow, userId } = params;
|
|
1572
|
+
logger.info(`discord voice: agent turn start guild=${entry.guildId} channel=${entry.channelId} voiceSession=${entry.voiceSessionKey} supervisorSession=${entry.route.sessionKey} agent=${entry.route.agentId} user=${userId} speaker=${context.speakerLabel} owner=${context.senderIsOwner} model=${this.params.discordConfig.voice?.model ?? "route-default"} message=${formatVoiceLogPreview(message)}`);
|
|
1573
|
+
const turn = await runDiscordVoiceAgentTurn({
|
|
1574
|
+
entry,
|
|
1575
|
+
userId,
|
|
1576
|
+
message,
|
|
1577
|
+
cfg: this.params.cfg,
|
|
1578
|
+
discordConfig: this.params.discordConfig,
|
|
1579
|
+
runtime: this.params.runtime,
|
|
1580
|
+
context,
|
|
1581
|
+
toolsAllow,
|
|
1582
|
+
ownerAllowFrom: this.ownerAllowFrom,
|
|
1583
|
+
fetchGuildName: async (guildId) => {
|
|
1584
|
+
const guild = await this.params.client.fetchGuild(guildId).catch(() => null);
|
|
1585
|
+
return guild && typeof guild.name === "string" && guild.name.trim() ? guild.name : void 0;
|
|
1586
|
+
},
|
|
1587
|
+
speakerContext: this.speakerContext
|
|
1588
|
+
});
|
|
1589
|
+
if (!turn) {
|
|
1590
|
+
logVoiceVerbose(`realtime agent unauthorized: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
|
|
1591
|
+
return "";
|
|
1592
|
+
}
|
|
1593
|
+
logger.info(`discord voice: agent turn answer (${turn.text.length} chars) guild=${entry.guildId} channel=${entry.channelId} voiceSession=${entry.voiceSessionKey} supervisorSession=${entry.route.sessionKey} agent=${entry.route.agentId}: ${formatVoiceLogPreview(turn.text)}`);
|
|
1594
|
+
return turn.text;
|
|
1595
|
+
}
|
|
932
1596
|
async processSegment(params) {
|
|
933
1597
|
await processDiscordVoiceSegment({
|
|
934
1598
|
...params,
|
|
@@ -948,6 +1612,10 @@ var DiscordVoiceManager$1 = class {
|
|
|
948
1612
|
}
|
|
949
1613
|
handleReceiveError(entry, err) {
|
|
950
1614
|
const analysis = analyzeVoiceReceiveError(err);
|
|
1615
|
+
if (analysis.isAbortLike && !analysis.countsAsDecryptFailure) {
|
|
1616
|
+
logVoiceVerbose(`receive stream ended: ${analysis.message}`);
|
|
1617
|
+
return;
|
|
1618
|
+
}
|
|
951
1619
|
logger.warn(`discord voice: receive error: ${analysis.message}`);
|
|
952
1620
|
if (analysis.shouldAttemptPassthrough) this.enableDaveReceivePassthrough(entry, "receive decrypt error", 15);
|
|
953
1621
|
if (!analysis.countsAsDecryptFailure) return;
|