@openclaw/discord 2026.5.22 → 2026.5.24-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/action-runtime-api.js +1 -1
  2. package/dist/api.js +12 -12
  3. package/dist/{approval-handler.runtime-CyGGSiMH.js → approval-handler.runtime-DFRH9nZ4.js} +3 -3
  4. package/dist/{audit-DkBvCBsG.js → audit-D3I68CWK.js} +3 -3
  5. package/dist/{channel-LszCFESI.js → channel-DMgQbhnj.js} +24 -29
  6. package/dist/{channel-actions-DDwcJy6_.js → channel-actions-Cm0QHxsv.js} +2 -2
  7. package/dist/{channel-actions.runtime-CnwxnT58.js → channel-actions.runtime-BP_VU815.js} +5 -5
  8. package/dist/channel-config-api.js +1 -1
  9. package/dist/channel-plugin-api.js +1 -1
  10. package/dist/{channel.setup-rrsenw1h.js → channel.setup-Dh16wwV9.js} +3 -3
  11. package/dist/{components-CE63uOrc.js → components-BCxyMNaH.js} +1 -1
  12. package/dist/{config-schema-Kuf9IrLg.js → config-schema-DordNAyp.js} +8 -0
  13. package/dist/contract-api.js +2 -2
  14. package/dist/{conversation-identity-jwZ56ing.js → conversation-identity-CzE0g3tC.js} +2 -2
  15. package/dist/{directory-config-Do_NA_H8.js → directory-config-CkL6nnqZ.js} +1 -1
  16. package/dist/directory-contract-api.js +1 -1
  17. package/dist/{directory-live-DekuEEl8.js → directory-live-DZeXDKJ_.js} +1 -1
  18. package/dist/{doctor-DmPhrTLR.js → doctor-ymCgy8yc.js} +1 -1
  19. package/dist/{handle-action.guild-admin-09HC30Oh.js → handle-action.guild-admin-ozP4Jj--.js} +1 -1
  20. package/dist/index.js +3 -0
  21. package/dist/{manager.runtime-DPtx2pxB.js → manager.runtime-DfmiNzKh.js} +468 -76
  22. package/dist/meeting-notes-source-api.js +2 -0
  23. package/dist/meeting-notes-source-dVTfnnBi.js +123 -0
  24. package/dist/{message-handler-miSYjwxl.js → message-handler-ix5OBBs1.js} +6 -6
  25. package/dist/{message-handler.preflight-DiLKljGS.js → message-handler.preflight-BHR7pKU1.js} +10 -10
  26. package/dist/{message-handler.process-BKvQTmVP.js → message-handler.process-BheSnevd.js} +9 -9
  27. package/dist/{message-utils-U-ax3LTX.js → message-utils-CEaELqGw.js} +1 -1
  28. package/dist/{outbound-adapter-CF4KuNM6.js → outbound-adapter-BVTo8aVg.js} +6 -6
  29. package/dist/{pluralkit-sQLfeyIZ.js → pluralkit-BX81hJh4.js} +1 -1
  30. package/dist/{provider-Bv9Sx9lq.js → provider-CPLDWYC9.js} +26 -16
  31. package/dist/{provider-session.runtime-BbEz2CZt.js → provider-session.runtime-CwgJI6LP.js} +3 -3
  32. package/dist/provider.runtime-DnKAm3Kg.js +2 -0
  33. package/dist/{resolve-channels-BDjtLmjX.js → resolve-channels-BQl2WT8v.js} +1 -1
  34. package/dist/{resolve-users-BeaV5INm.js → resolve-users-CqW9vVcr.js} +1 -1
  35. package/dist/{runtime-DxQPyNY7.js → runtime-ZBAq7Trm.js} +7 -7
  36. package/dist/runtime-api.actions.js +2 -2
  37. package/dist/runtime-api.js +18 -18
  38. package/dist/runtime-api.lookup.js +4 -4
  39. package/dist/runtime-api.monitor-CN7g_Kp7.js +5 -0
  40. package/dist/runtime-api.monitor.js +4 -4
  41. package/dist/runtime-api.send.js +5 -5
  42. package/dist/runtime-api.threads.js +3 -3
  43. package/dist/{send-BjHCUrvn.js → send-CVjV4H6l.js} +3 -3
  44. package/dist/{send.components-BcCQ27av.js → send.components-BpnERIQ0.js} +4 -4
  45. package/dist/{send.outbound-CyvhKilL.js → send.outbound-CWhhKank.js} +3 -3
  46. package/dist/{send.receipt-spSPAC77.js → send.receipt--EtaMe11.js} +1 -1
  47. package/dist/{send.shared-C3uAu6bc.js → send.shared-CsGHs6ft.js} +2 -2
  48. package/dist/setup-plugin-api.js +1 -1
  49. package/dist/{shared-CgAcqTsg.js → shared-DFP_4nlg.js} +2 -2
  50. package/dist/{subagent-hooks-DehrqnoK.js → subagent-hooks-BLRmOFEa.js} +2 -2
  51. package/dist/subagent-hooks-api.js +1 -1
  52. package/dist/{system-events-HgHeiMHg.js → system-events-sMHIu6aa.js} +1 -1
  53. package/dist/{target-resolver-squYahVm.js → target-resolver-QZP1tC8H.js} +3 -3
  54. package/dist/targets-DRvCCf1i.js +3 -0
  55. package/dist/test-api.js +3 -3
  56. package/dist/{thread-bindings-BD9jKFo5.js → thread-bindings-DeBPNv4D.js} +4 -4
  57. package/dist/{thread-bindings.discord-api-D2EF_9XS.js → thread-bindings.discord-api-CaiGJbEP.js} +4 -4
  58. package/dist/{thread-bindings.manager-AEGKdE3O.js → thread-bindings.manager-ANA-Hk_y.js} +3 -3
  59. package/dist/{typing-DUUjLsPr.js → typing-tLlUYdt6.js} +1 -1
  60. package/npm-shrinkwrap.json +3 -3
  61. package/openclaw.plugin.json +33 -0
  62. package/package.json +4 -4
  63. package/dist/provider.runtime-DKBGMsC7.js +0 -2
  64. package/dist/runtime-api.monitor-Wt6Qf0SL.js +0 -5
  65. package/dist/targets-BeetkGO0.js +0 -3
@@ -1,9 +1,9 @@
1
- import { Ht as parseDiscordTarget, _ as VoiceStateUpdateListener, c as discord_exports, h as ResumedListener, jt as getGuildVoiceState, m as ReadyListener } from "./send.receipt-spSPAC77.js";
1
+ import { Ht as parseDiscordTarget, _ as VoiceStateUpdateListener, c as discord_exports, h as ResumedListener, jt as getGuildVoiceState, m as ReadyListener } from "./send.receipt--EtaMe11.js";
2
2
  import { c as resolveDiscordAccountAllowFrom } from "./accounts-dXTfmnSZ.js";
3
3
  import { a as normalizeDiscordSlug, b as formatDiscordUserTag, m as resolveDiscordOwnerAccess } from "./allow-list-BnkWtVpA.js";
4
- import { i as formatMention } from "./send.outbound-CyvhKilL.js";
4
+ import { i as formatMention } from "./send.outbound-CWhhKank.js";
5
5
  import { t as getDiscordRuntime } from "./runtime-DgnVQ7zW.js";
6
- import { o as authorizeDiscordVoiceIngress, u as resolveDiscordVoiceEnabled } from "./provider-Bv9Sx9lq.js";
6
+ import { o as authorizeDiscordVoiceIngress, u as resolveDiscordVoiceEnabled } from "./provider-CPLDWYC9.js";
7
7
  import { t as buildDiscordGroupSystemPrompt } from "./inbound-context-B5EsqsSr.js";
8
8
  import { createRequire } from "node:module";
9
9
  import { normalizeOptionalString } from "openclaw/plugin-sdk/string-coerce-runtime";
@@ -16,7 +16,7 @@ import { resolvePreferredOpenClawTmpDir, tempWorkspace } from "openclaw/plugin-s
16
16
  import { formatErrorMessage } from "openclaw/plugin-sdk/ssrf-runtime";
17
17
  import { agentCommandFromIngress, getTtsProvider, resolveAgentDir, resolveTtsConfig, resolveTtsPrefsPath } from "openclaw/plugin-sdk/agent-runtime";
18
18
  import { escapeRegExp } from "openclaw/plugin-sdk/text-utility-runtime";
19
- import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ, buildRealtimeVoiceAgentConsultChatMessage, buildRealtimeVoiceAgentConsultPolicyInstructions, buildRealtimeVoiceAgentConsultWorkingResponse, createRealtimeVoiceAgentTalkbackQueue, createRealtimeVoiceBridgeSession, resamplePcm, resolveConfiguredRealtimeVoiceProvider, resolveRealtimeVoiceAgentConsultToolPolicy, resolveRealtimeVoiceAgentConsultTools, resolveRealtimeVoiceAgentConsultToolsAllow } from "openclaw/plugin-sdk/realtime-voice";
19
+ import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, REALTIME_VOICE_AGENT_CONTROL_TOOL, REALTIME_VOICE_AGENT_CONTROL_TOOL_NAME, REALTIME_VOICE_AUDIO_FORMAT_PCM16_24KHZ, buildRealtimeVoiceAgentConsultChatMessage, buildRealtimeVoiceAgentConsultPolicyInstructions, controlRealtimeVoiceAgentRun, createRealtimeVoiceAgentTalkbackQueue, createRealtimeVoiceBridgeSession, parseRealtimeVoiceAgentControlToolArgs, resamplePcm, resolveConfiguredRealtimeVoiceProvider, resolveRealtimeVoiceAgentConsultToolPolicy, resolveRealtimeVoiceAgentConsultTools, resolveRealtimeVoiceAgentConsultToolsAllow, shouldAutoControlRealtimeVoiceAgentText } from "openclaw/plugin-sdk/realtime-voice";
20
20
  import { resolveRealtimeBootstrapContextInstructions } from "openclaw/plugin-sdk/realtime-bootstrap-context";
21
21
  import { PassThrough, Readable } from "node:stream";
22
22
  import { parseTtsDirectives } from "openclaw/plugin-sdk/speech";
@@ -341,6 +341,24 @@ async function resolveDiscordVoiceRealtimeBootstrapContext(params) {
341
341
  }
342
342
  }
343
343
  //#endregion
344
+ //#region extensions/discord/src/voice/agent-control.ts
345
+ async function maybeControlDiscordVoiceAgentRun(params) {
346
+ if (!shouldAutoControlRealtimeVoiceAgentText(params.text)) return { handled: false };
347
+ const result = await controlRealtimeVoiceAgentRun({
348
+ sessionKey: params.entry.route.sessionKey,
349
+ text: params.text
350
+ });
351
+ if (!result.active) return {
352
+ handled: false,
353
+ result
354
+ };
355
+ return {
356
+ handled: true,
357
+ result,
358
+ ...result.speak && !result.suppress ? { speakText: result.message } : {}
359
+ };
360
+ }
361
+ //#endregion
344
362
  //#region extensions/discord/src/voice/prompt.ts
345
363
  const DISCORD_VOICE_SPOKEN_OUTPUT_CONTRACT = [
346
364
  "You are OpenClaw's Discord voice interface in a live voice channel.",
@@ -385,6 +403,11 @@ function isVoiceChannel(type) {
385
403
  //#endregion
386
404
  //#region extensions/discord/src/voice/realtime.ts
387
405
  const logger$2 = createSubsystemLogger("discord/voice");
406
+ function resolveDiscordRealtimeVoiceAgentConsultTools(policy) {
407
+ const tools = resolveRealtimeVoiceAgentConsultTools(policy);
408
+ if (policy !== "none" && !tools.some((tool) => tool.name === REALTIME_VOICE_AGENT_CONTROL_TOOL.name)) return [...tools, REALTIME_VOICE_AGENT_CONTROL_TOOL];
409
+ return tools;
410
+ }
388
411
  const DISCORD_REALTIME_TALKBACK_DEBOUNCE_MS = 350;
389
412
  const DISCORD_REALTIME_FALLBACK_TEXT = "I hit an error while checking that. Please try again.";
390
413
  const DISCORD_REALTIME_PENDING_SPEAKER_CONTEXT_LIMIT = 32;
@@ -394,6 +417,8 @@ const DISCORD_REALTIME_LOG_PREVIEW_CHARS = 500;
394
417
  const DISCORD_REALTIME_DEFAULT_MIN_BARGE_IN_AUDIO_END_MS = 250;
395
418
  const DISCORD_REALTIME_FORCED_CONSULT_FALLBACK_DELAY_MS = 200;
396
419
  const DISCORD_REALTIME_DUPLICATE_ERROR_SUPPRESS_MS = 6e4;
420
+ const DISCORD_REALTIME_CONTROL_SPEECH_DEDUPE_MS = 5e3;
421
+ const DISCORD_REALTIME_OUTPUT_PLAYBACK_WATCHDOG_MARGIN_MS = 1500;
397
422
  const REALTIME_PCM16_BYTES_PER_SAMPLE = 2;
398
423
  const DISCORD_RAW_PCM_FRAME_BYTES = 3840;
399
424
  const DISCORD_REALTIME_OUTPUT_PREROLL_FRAMES = 25;
@@ -422,6 +447,7 @@ const DISCORD_REALTIME_FORCED_CONSULT_TRAILING_FRAGMENT_WORDS = new Set([
422
447
  "to",
423
448
  "with"
424
449
  ]);
450
+ const DISCORD_REALTIME_FORCED_CONSULT_REASON = "provider_final_transcript_without_openclaw_agent_consult";
425
451
  const DISCORD_REALTIME_VERBOSE_OMITTED_EVENTS = new Set([
426
452
  "conversation.output_audio.delta",
427
453
  "input_audio_buffer.append",
@@ -540,6 +566,39 @@ function extractDiscordExactSpeechConsultText(args) {
540
566
  function normalizeRealtimeConsultMatchText(text) {
541
567
  return text.toLowerCase().replace(/\s+/g, " ").trim();
542
568
  }
569
+ function normalizeControlSpeechText(text) {
570
+ return text.toLowerCase().replace(/\s+/g, " ").trim();
571
+ }
572
+ function normalizeWakeName(value) {
573
+ return value.toLowerCase().replace(/\s+/g, " ").trim() || void 0;
574
+ }
575
+ function escapeRegExp$1(value) {
576
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
577
+ }
578
+ function includesWakeName(text, wakeName) {
579
+ const normalizedText = normalizeRealtimeConsultMatchText(text);
580
+ const normalizedName = normalizeWakeName(wakeName);
581
+ if (!normalizedName) return false;
582
+ return new RegExp(`(^|[^a-z0-9])${escapeRegExp$1(normalizedName)}([^a-z0-9]|$)`).test(normalizedText);
583
+ }
584
+ function stripLeadingWakeName(text, wakeName) {
585
+ const normalizedName = normalizeWakeName(wakeName);
586
+ if (!normalizedName) return text.trim();
587
+ const wakePattern = normalizedName.split(" ").map(escapeRegExp$1).join("\\s+");
588
+ return text.replace(new RegExp(`^\\s*(?:(?:hey|ok|okay)(?:\\s*[-,:;]+\\s*|\\s+))?${wakePattern}(?:\\s*[-,:;]+\\s*|\\s+)`, "i"), "").trim();
589
+ }
590
+ function resolveDiscordRealtimeWakeNames(params) {
591
+ const configured = params.config?.wakeNames?.map((name) => normalizeWakeName(name)).filter((name) => Boolean(name));
592
+ if (configured && configured.length > 0) return sortWakeNames(Array.from(new Set(configured)));
593
+ const agent = params.cfg.agents?.list?.find((candidate) => candidate.id === params.agentId);
594
+ const configuredAgentNames = [agent?.name, agent?.identity?.name].map((name) => typeof name === "string" ? normalizeWakeName(name) : void 0).filter((name) => Boolean(name));
595
+ const productWakeNames = [normalizeWakeName("OpenClaw")].filter((name) => Boolean(name));
596
+ const defaults = configuredAgentNames.length > 0 ? [...configuredAgentNames, ...productWakeNames] : [normalizeWakeName(params.agentId), ...productWakeNames].filter((name) => Boolean(name));
597
+ return sortWakeNames(Array.from(new Set(defaults)));
598
+ }
599
+ function sortWakeNames(wakeNames) {
600
+ return wakeNames.toSorted((left, right) => right.length - left.length || left.localeCompare(right));
601
+ }
543
602
  function matchesPendingAgentProxyQuestion(consultMessage, question) {
544
603
  const normalizedConsult = normalizeRealtimeConsultMatchText(consultMessage);
545
604
  const normalizedQuestion = normalizeRealtimeConsultMatchText(question);
@@ -554,6 +613,8 @@ var DiscordRealtimeVoiceSession = class {
554
613
  this.stopped = false;
555
614
  this.consultToolPolicy = "safe-read-only";
556
615
  this.consultPolicy = "auto";
616
+ this.requireWakeName = false;
617
+ this.wakeNames = [];
557
618
  this.pendingAgentProxyConsultContexts = [];
558
619
  this.recentAgentProxyConsultContexts = [];
559
620
  this.pendingSpeakerTurns = [];
@@ -605,9 +666,16 @@ var DiscordRealtimeVoiceSession = class {
605
666
  this.consultToolsAllow = resolveRealtimeVoiceAgentConsultToolsAllow(toolPolicy);
606
667
  const consultPolicy = this.realtimeConfig?.consultPolicy ?? (isAgentProxy ? "always" : "auto");
607
668
  this.consultPolicy = consultPolicy;
669
+ const supportsWakeNameGate = resolved.provider.id === "openai";
670
+ this.requireWakeName = this.realtimeConfig?.requireWakeName === true && isAgentProxy && supportsWakeNameGate;
671
+ this.wakeNames = this.requireWakeName ? resolveDiscordRealtimeWakeNames({
672
+ config: this.realtimeConfig,
673
+ cfg: this.params.cfg,
674
+ agentId: this.params.entry.route.agentId
675
+ }) : [];
608
676
  const usesRealtimeAgentHandoff = this.params.mode === "bidi" || toolPolicy !== "none";
609
- const autoRespondToAudio = !isAgentProxy || consultPolicy !== "always";
610
- const interruptResponseOnInputAudio = resolveDiscordRealtimeInterruptResponseOnInputAudio({
677
+ const autoRespondToAudio = !this.requireWakeName && (!isAgentProxy || consultPolicy !== "always");
678
+ const interruptResponseOnInputAudio = !this.requireWakeName && resolveDiscordRealtimeInterruptResponseOnInputAudio({
611
679
  realtimeConfig: this.realtimeConfig,
612
680
  providerId: resolved.provider.id
613
681
  });
@@ -626,7 +694,7 @@ var DiscordRealtimeVoiceSession = class {
626
694
  autoRespondToAudio,
627
695
  interruptResponseOnInputAudio,
628
696
  markStrategy: "ack-immediately",
629
- tools: usesRealtimeAgentHandoff ? resolveRealtimeVoiceAgentConsultTools(toolPolicy) : [],
697
+ tools: usesRealtimeAgentHandoff ? resolveDiscordRealtimeVoiceAgentConsultTools(toolPolicy) : [],
630
698
  audioSink: {
631
699
  isOpen: () => !this.stopped,
632
700
  sendAudio: (audio) => this.sendOutputAudio(audio),
@@ -634,12 +702,9 @@ var DiscordRealtimeVoiceSession = class {
634
702
  },
635
703
  onTranscript: (role, text, isFinal) => {
636
704
  if (isFinal && text.trim()) logger$2.info(`discord voice: realtime ${role} transcript (${text.length} chars): ${formatRealtimeLogPreview(text)}`);
637
- if (!isFinal || role !== "user" || !isDiscordAgentProxyVoiceMode(this.params.mode)) return;
638
- if (usesRealtimeAgentHandoff) {
639
- this.scheduleForcedAgentProxyConsult(text);
640
- return;
641
- }
642
- this.talkback.enqueue(text, this.consumePendingSpeakerContext());
705
+ if (isFinal && role === "assistant") this.suppressDuplicateControlSpeech(text);
706
+ if (!isFinal || role !== "user") return;
707
+ this.handleFinalUserTranscript(text, { usesRealtimeAgentHandoff });
643
708
  },
644
709
  onToolCall: (event, session) => this.handleToolCall(event, session),
645
710
  onEvent: (event) => {
@@ -660,7 +725,7 @@ var DiscordRealtimeVoiceSession = class {
660
725
  });
661
726
  const resolvedModel = readProviderConfigString(resolved.providerConfig, "model") ?? resolved.provider.defaultModel;
662
727
  const resolvedVoice = readProviderConfigString(resolved.providerConfig, "voice");
663
- logger$2.info(`discord voice: realtime bridge starting mode=${this.params.mode} provider=${resolved.provider.id} model=${resolvedModel ?? "default"} voice=${resolvedVoice ?? "default"} consultPolicy=${consultPolicy} toolPolicy=${toolPolicy} autoRespond=${autoRespondToAudio} interruptResponse=${interruptResponseOnInputAudio} bargeIn=${resolveDiscordRealtimeBargeIn({
728
+ logger$2.info(`discord voice: realtime bridge starting mode=${this.params.mode} provider=${resolved.provider.id} model=${resolvedModel ?? "default"} voice=${resolvedVoice ?? "default"} consultPolicy=${consultPolicy} toolPolicy=${toolPolicy} autoRespond=${autoRespondToAudio} requireWakeName=${this.requireWakeName} wakeNames=${this.wakeNames.join(",") || "none"} interruptResponse=${interruptResponseOnInputAudio} bargeIn=${resolveDiscordRealtimeBargeIn({
664
729
  realtimeConfig: this.realtimeConfig,
665
730
  providerId: resolved.provider.id
666
731
  })} minBargeInAudioEndMs=${resolveDiscordRealtimeMinBargeInAudioEndMs(this.realtimeConfig)}`);
@@ -766,6 +831,7 @@ var DiscordRealtimeVoiceSession = class {
766
831
  this.bridge?.handleBargeIn({ audioPlaybackActive: true });
767
832
  }
768
833
  isBargeInEnabled() {
834
+ if (this.requireWakeName) return false;
769
835
  const providerId = this.realtimeProviderId ?? this.realtimeConfig?.provider ?? "openai";
770
836
  return resolveDiscordRealtimeBargeIn({
771
837
  realtimeConfig: this.realtimeConfig,
@@ -839,6 +905,7 @@ var DiscordRealtimeVoiceSession = class {
839
905
  }
840
906
  resetOutputStream(reason = "reset") {
841
907
  const stream = this.outputStream;
908
+ this.clearOutputPlaybackWatchdog();
842
909
  this.logOutputAudioStopped(reason);
843
910
  this.outputStream = null;
844
911
  this.outputPacedBuffer = Buffer.alloc(0);
@@ -852,8 +919,10 @@ var DiscordRealtimeVoiceSession = class {
852
919
  if (!stream || stream.destroyed || this.outputStreamEnding) return;
853
920
  this.outputStreamEnding = true;
854
921
  logger$2.info(`discord voice: realtime audio playback finishing reason=${reason} guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} audioMs=${Math.floor(this.outputAudioTimestampMs)} chunks=${this.outputAudioChunks}`);
855
- if (playBuffered) this.startOutputPlayback(stream);
856
- else {
922
+ if (playBuffered) {
923
+ this.startOutputPlayback(stream);
924
+ this.scheduleOutputPlaybackWatchdog(reason, stream);
925
+ } else {
857
926
  this.resetOutputStream(reason);
858
927
  this.params.entry.player.stop(true);
859
928
  this.completeExactSpeechResponse(reason);
@@ -861,6 +930,28 @@ var DiscordRealtimeVoiceSession = class {
861
930
  }
862
931
  stream.end();
863
932
  }
933
+ scheduleOutputPlaybackWatchdog(reason, stream) {
934
+ this.clearOutputPlaybackWatchdog();
935
+ if (!this.outputAudioStartedAt || this.outputAudioTimestampMs <= 0) return;
936
+ const elapsedMs = Date.now() - this.outputAudioStartedAt;
937
+ const timeoutMs = Math.max(1e3, this.outputAudioTimestampMs - elapsedMs + DISCORD_REALTIME_OUTPUT_PLAYBACK_WATCHDOG_MARGIN_MS);
938
+ this.outputPlaybackWatchdog = setTimeout(() => {
939
+ this.outputPlaybackWatchdog = void 0;
940
+ if (this.outputStream && this.outputStream !== stream) return;
941
+ if (!this.outputStream && !this.isOutputAudioActive()) {
942
+ this.completeExactSpeechResponse("playback-watchdog");
943
+ return;
944
+ }
945
+ logger$2.warn(`discord voice: realtime audio playback watchdog fired reason=${reason} guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} audioMs=${Math.floor(this.outputAudioTimestampMs)} elapsedMs=${this.outputAudioStartedAt ? Date.now() - this.outputAudioStartedAt : 0}`);
946
+ this.clearOutputAudio("playback-watchdog");
947
+ this.completeExactSpeechResponse("playback-watchdog");
948
+ }, timeoutMs);
949
+ }
950
+ clearOutputPlaybackWatchdog() {
951
+ if (!this.outputPlaybackWatchdog) return;
952
+ clearTimeout(this.outputPlaybackWatchdog);
953
+ this.outputPlaybackWatchdog = void 0;
954
+ }
864
955
  enqueueExactSpeechMessage(text) {
865
956
  if (this.stopped || !text.trim()) return;
866
957
  if (this.exactSpeechResponseActive || this.hasInterruptibleOutputAudio()) {
@@ -876,6 +967,40 @@ var DiscordRealtimeVoiceSession = class {
876
967
  this.exactSpeechAudioStarted = false;
877
968
  this.bridge?.sendUserMessage(buildDiscordSpeakExactUserMessage(text));
878
969
  }
970
+ speakControlResult(text) {
971
+ const trimmed = text.trim();
972
+ if (this.stopped || !trimmed) return;
973
+ this.queuedExactSpeechMessages = [];
974
+ this.completeExactSpeechResponse("active-run-control", { drain: false });
975
+ this.bridge?.handleBargeIn?.({
976
+ audioPlaybackActive: true,
977
+ force: true
978
+ });
979
+ this.clearOutputAudio("active-run-control");
980
+ this.lastControlSpeech = {
981
+ normalizedText: normalizeControlSpeechText(trimmed),
982
+ sentAt: Date.now(),
983
+ assistantTranscriptCount: 0
984
+ };
985
+ this.sendExactSpeechMessage(trimmed);
986
+ }
987
+ suppressDuplicateControlSpeech(text) {
988
+ const recent = this.lastControlSpeech;
989
+ if (!recent) return;
990
+ if (Date.now() - recent.sentAt > DISCORD_REALTIME_CONTROL_SPEECH_DEDUPE_MS) {
991
+ this.lastControlSpeech = void 0;
992
+ return;
993
+ }
994
+ if (normalizeControlSpeechText(text) !== recent.normalizedText) return;
995
+ recent.assistantTranscriptCount += 1;
996
+ if (recent.assistantTranscriptCount <= 1) return;
997
+ logger$2.info(`discord voice: realtime duplicate active-run control speech suppressed guild=${this.params.entry.guildId} channel=${this.params.entry.channelId}`);
998
+ this.bridge?.handleBargeIn?.({
999
+ audioPlaybackActive: true,
1000
+ force: true
1001
+ });
1002
+ this.clearOutputAudio("duplicate-active-run-control");
1003
+ }
879
1004
  completeExactSpeechResponse(reason, options) {
880
1005
  if (!this.exactSpeechResponseActive && this.queuedExactSpeechMessages.length === 0) return;
881
1006
  this.exactSpeechResponseActive = false;
@@ -932,6 +1057,10 @@ var DiscordRealtimeVoiceSession = class {
932
1057
  }
933
1058
  handleToolCall(event, session) {
934
1059
  const callId = event.callId || event.itemId || "unknown";
1060
+ if (event.name === REALTIME_VOICE_AGENT_CONTROL_TOOL_NAME) {
1061
+ this.handleAgentControlToolCall(event, session, callId);
1062
+ return;
1063
+ }
935
1064
  if (event.name !== REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
936
1065
  session.submitToolResult(callId, { error: `Tool "${event.name}" not available` });
937
1066
  return;
@@ -948,7 +1077,6 @@ var DiscordRealtimeVoiceSession = class {
948
1077
  }
949
1078
  const consultMessage = buildRealtimeVoiceAgentConsultChatMessage(event.args);
950
1079
  logger$2.info(`discord voice: realtime consult requested call=${callId || "unknown"} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} question=${formatRealtimeLogPreview(consultMessage)}`);
951
- if (session.bridge.supportsToolResultContinuation) session.submitToolResult(callId, buildRealtimeVoiceAgentConsultWorkingResponse("speaker"), { willContinue: true });
952
1080
  const pendingConsultContext = this.consumeAgentProxyConsultContext(consultMessage);
953
1081
  if (pendingConsultContext) this.addRecentAgentProxyConsultQuestion(pendingConsultContext.recent, consultMessage);
954
1082
  let context = pendingConsultContext?.context;
@@ -986,6 +1114,20 @@ var DiscordRealtimeVoiceSession = class {
986
1114
  session.submitToolResult(callId, { error: formatErrorMessage(error) });
987
1115
  });
988
1116
  }
1117
+ async handleAgentControlToolCall(event, session, callId) {
1118
+ try {
1119
+ const parsed = parseRealtimeVoiceAgentControlToolArgs(event.args);
1120
+ const result = await controlRealtimeVoiceAgentRun({
1121
+ sessionKey: this.params.entry.route.sessionKey,
1122
+ text: parsed.text,
1123
+ mode: parsed.mode
1124
+ });
1125
+ this.logAgentControlResult(result);
1126
+ session.submitToolResult(callId, result);
1127
+ } catch (error) {
1128
+ session.submitToolResult(callId, { error: formatErrorMessage(error) });
1129
+ }
1130
+ }
989
1131
  async runAgentTurn(params) {
990
1132
  const context = params.context;
991
1133
  if (!context) return "";
@@ -996,8 +1138,86 @@ var DiscordRealtimeVoiceSession = class {
996
1138
  userId: context.userId
997
1139
  });
998
1140
  }
999
- scheduleForcedAgentProxyConsult(transcript) {
1000
- if (this.consultPolicy !== "always") return;
1141
+ async handleFinalUserTranscript(text, params) {
1142
+ const trimmed = text.trim();
1143
+ if (!trimmed) return;
1144
+ const meetingNotesTurn = this.peekPendingSpeakerTurn();
1145
+ this.recordMeetingNotesUtterance(trimmed, meetingNotesTurn);
1146
+ const wakeNameResult = this.resolveWakeNameTranscript(trimmed);
1147
+ if (!wakeNameResult.allowed) {
1148
+ logger$2.info(`discord voice: realtime wake-name gate ignored transcript chars=${trimmed.length} voiceSession=${this.params.entry.voiceSessionKey} agent=${this.params.entry.route.agentId} wakeNames=${this.wakeNames.join(",") || "none"}`);
1149
+ this.consumePendingSpeakerContext();
1150
+ return;
1151
+ }
1152
+ const acceptedText = wakeNameResult.text || trimmed;
1153
+ const usesAgentProxy = isDiscordAgentProxyVoiceMode(this.params.mode);
1154
+ const pendingForcedConsult = usesAgentProxy && params.usesRealtimeAgentHandoff ? this.prepareForcedAgentProxyConsult(acceptedText) : void 0;
1155
+ const control = await maybeControlDiscordVoiceAgentRun({
1156
+ entry: this.params.entry,
1157
+ text: acceptedText
1158
+ }).catch((error) => {
1159
+ logger$2.warn(`discord voice: realtime active-run control failed; falling back to normal transcript handling: ${formatErrorMessage(error)}`);
1160
+ });
1161
+ if (control?.handled) {
1162
+ if (pendingForcedConsult) {
1163
+ this.removePendingAgentProxyConsultContext(pendingForcedConsult);
1164
+ this.forgetRecentAgentProxyConsultContext(pendingForcedConsult.recent);
1165
+ }
1166
+ this.logAgentControlResult(control.result);
1167
+ if (control.speakText) this.speakControlResult(control.speakText);
1168
+ return;
1169
+ }
1170
+ if (!usesAgentProxy) return;
1171
+ if (params.usesRealtimeAgentHandoff) {
1172
+ if (pendingForcedConsult) this.schedulePreparedForcedAgentProxyConsult(pendingForcedConsult);
1173
+ return;
1174
+ }
1175
+ this.talkback.enqueue(acceptedText, this.consumePendingSpeakerContext());
1176
+ }
1177
+ resolveWakeNameTranscript(text) {
1178
+ if (!this.requireWakeName) return {
1179
+ allowed: true,
1180
+ text
1181
+ };
1182
+ const wakeName = this.wakeNames.find((name) => includesWakeName(text, name));
1183
+ if (wakeName) return {
1184
+ allowed: true,
1185
+ text: stripLeadingWakeName(text, wakeName)
1186
+ };
1187
+ return {
1188
+ allowed: false,
1189
+ text
1190
+ };
1191
+ }
1192
+ recordMeetingNotesUtterance(text, turn) {
1193
+ const meetingNotes = this.params.entry.meetingNotes;
1194
+ if (!meetingNotes || !turn) return;
1195
+ const context = turn.context;
1196
+ const utterance = {
1197
+ sessionId: meetingNotes.sessionId,
1198
+ startedAt: new Date(turn.startedAt).toISOString(),
1199
+ final: true,
1200
+ speaker: {
1201
+ id: context.userId,
1202
+ label: context.speakerLabel
1203
+ },
1204
+ text,
1205
+ metadata: {
1206
+ channel: "discord",
1207
+ guildId: this.params.entry.guildId,
1208
+ channelId: this.params.entry.channelId,
1209
+ voiceSessionKey: this.params.entry.voiceSessionKey
1210
+ }
1211
+ };
1212
+ Promise.resolve().then(() => meetingNotes.onUtterance(utterance)).catch((error) => {
1213
+ logger$2.warn(`discord voice: realtime meeting notes utterance failed: ${formatErrorMessage(error)}`);
1214
+ });
1215
+ }
1216
+ logAgentControlResult(result) {
1217
+ logger$2.info(`discord voice: realtime active-run control handled mode=${result.mode} ok=${result.ok} active=${result.active} reason=${result.reason ?? "none"} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId}`);
1218
+ }
1219
+ prepareForcedAgentProxyConsult(transcript) {
1220
+ if (this.consultPolicy !== "always" && !this.requireWakeName) return;
1001
1221
  const question = transcript.trim();
1002
1222
  if (!question) return;
1003
1223
  const context = this.consumePendingSpeakerContext();
@@ -1021,6 +1241,10 @@ var DiscordRealtimeVoiceSession = class {
1021
1241
  recent: this.rememberRecentAgentProxyConsultContext(question, context)
1022
1242
  };
1023
1243
  this.pendingAgentProxyConsultContexts.push(pending);
1244
+ return pending;
1245
+ }
1246
+ schedulePreparedForcedAgentProxyConsult(pending) {
1247
+ if (!this.pendingAgentProxyConsultContexts.includes(pending) || pending.timer) return;
1024
1248
  pending.timer = setTimeout(() => {
1025
1249
  pending.timer = void 0;
1026
1250
  this.runForcedAgentProxyConsult(pending);
@@ -1052,18 +1276,23 @@ var DiscordRealtimeVoiceSession = class {
1052
1276
  const index = this.pendingAgentProxyConsultContexts.indexOf(pending);
1053
1277
  if (index >= 0) this.pendingAgentProxyConsultContexts.splice(index, 1);
1054
1278
  }
1279
+ forgetRecentAgentProxyConsultContext(recent) {
1280
+ const index = this.recentAgentProxyConsultContexts.indexOf(recent);
1281
+ if (index >= 0) this.recentAgentProxyConsultContexts.splice(index, 1);
1282
+ }
1055
1283
  async runForcedAgentProxyConsult(pending) {
1056
1284
  this.removePendingAgentProxyConsultContext(pending);
1057
1285
  const { context, question } = pending;
1058
1286
  if (this.stopped) return;
1059
1287
  const startedAt = Date.now();
1060
1288
  logger$2.info(`discord voice: realtime forced agent consult starting chars=${question.length} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} speaker=${context.speakerLabel} owner=${context.senderIsOwner}`);
1289
+ logger$2.debug(`discord voice: realtime forced agent consult reason=${DISCORD_REALTIME_FORCED_CONSULT_REASON} consultPolicy=${this.consultPolicy} requireWakeName=${this.requireWakeName} voiceSession=${this.params.entry.voiceSessionKey} supervisorSession=${this.params.entry.route.sessionKey} agent=${this.params.entry.route.agentId} speaker=${context.speakerLabel}`);
1061
1290
  if (this.hasInterruptibleOutputAudio()) logger$2.info(`discord voice: realtime forced agent consult preserving active playback guild=${this.params.entry.guildId} channel=${this.params.entry.channelId} outputAudioMs=${Math.floor(this.outputAudioTimestampMs)} outputActive=${this.isOutputAudioActive()} playbackChunks=${this.outputAudioChunks}`);
1062
1291
  pending.recent.handledByForcedPlayback = true;
1063
1292
  try {
1064
1293
  const promise = this.runAgentTurn({
1065
1294
  context,
1066
- message: [question, "Context: The realtime model produced a final user transcript without calling openclaw_agent_consult. OpenClaw is forcing the consult because consultPolicy is always."].join("\n\n")
1295
+ message: question
1067
1296
  });
1068
1297
  this.setRecentAgentProxyConsultPromise(pending.recent, promise);
1069
1298
  const text = await promise;
@@ -1083,6 +1312,11 @@ var DiscordRealtimeVoiceSession = class {
1083
1312
  this.prunePendingSpeakerTurns();
1084
1313
  return turn?.context;
1085
1314
  }
1315
+ peekPendingSpeakerTurn() {
1316
+ this.prunePendingSpeakerTurns();
1317
+ this.expireClosedSpeakerTurnsBeforeLaterAudio();
1318
+ return this.pendingSpeakerTurns.find((turn) => turn.hasAudio);
1319
+ }
1086
1320
  hasPendingSpeakerAudioContext() {
1087
1321
  this.prunePendingSpeakerTurns();
1088
1322
  this.expireClosedSpeakerTurnsBeforeLaterAudio();
@@ -1449,23 +1683,54 @@ async function processDiscordVoiceSegment(params) {
1449
1683
  }
1450
1684
  logVoiceVerbose(`transcription ok (${transcript.length} chars): guild ${entry.guildId} channel ${entry.channelId}`);
1451
1685
  logVoiceVerbose(`transcript from ${ingress.speakerLabel} (${userId}) in guild ${entry.guildId} channel ${entry.channelId}: ${formatVoiceTranscriptLogPreview(transcript)}`);
1452
- const turn = await runDiscordVoiceAgentTurn({
1686
+ if (params.meetingNotes) {
1687
+ await params.meetingNotes.onUtterance({
1688
+ sessionId: params.meetingNotes.sessionId,
1689
+ startedAt: (/* @__PURE__ */ new Date()).toISOString(),
1690
+ final: true,
1691
+ speaker: {
1692
+ id: userId,
1693
+ label: ingress.speakerLabel
1694
+ },
1695
+ text: transcript,
1696
+ metadata: {
1697
+ channel: "discord",
1698
+ guildId: entry.guildId,
1699
+ channelId: entry.channelId,
1700
+ voiceSessionKey: entry.voiceSessionKey
1701
+ }
1702
+ });
1703
+ return;
1704
+ }
1705
+ let replyText;
1706
+ const control = await maybeControlDiscordVoiceAgentRun({
1453
1707
  entry,
1454
- userId,
1455
- message: formatVoiceIngressPrompt(transcript, ingress.speakerLabel),
1456
- cfg: params.cfg,
1457
- discordConfig: params.discordConfig,
1458
- runtime: params.runtime,
1459
- context: ingress,
1460
- ownerAllowFrom: params.ownerAllowFrom,
1461
- fetchGuildName: params.fetchGuildName,
1462
- speakerContext: params.speakerContext
1708
+ text: transcript
1709
+ }).catch((error) => {
1710
+ logger$1.warn(`discord voice: active-run control failed; falling back to normal segment handling: ${formatErrorMessage(error)}`);
1463
1711
  });
1464
- if (!turn) {
1465
- logVoiceVerbose(`segment unauthorized before agent turn: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1466
- return;
1712
+ if (control?.handled) {
1713
+ logger$1.info(`discord voice: active-run control handled mode=${control.result.mode} ok=${control.result.ok} active=${control.result.active} reason=${control.result.reason ?? "none"} session=${entry.route.sessionKey}`);
1714
+ replyText = control.speakText ?? "";
1715
+ } else {
1716
+ const turn = await runDiscordVoiceAgentTurn({
1717
+ entry,
1718
+ userId,
1719
+ message: formatVoiceIngressPrompt(transcript, ingress.speakerLabel),
1720
+ cfg: params.cfg,
1721
+ discordConfig: params.discordConfig,
1722
+ runtime: params.runtime,
1723
+ context: ingress,
1724
+ ownerAllowFrom: params.ownerAllowFrom,
1725
+ fetchGuildName: params.fetchGuildName,
1726
+ speakerContext: params.speakerContext
1727
+ });
1728
+ if (!turn) {
1729
+ logVoiceVerbose(`segment unauthorized before agent turn: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1730
+ return;
1731
+ }
1732
+ replyText = turn.text;
1467
1733
  }
1468
- const replyText = turn.text;
1469
1734
  if (!replyText) {
1470
1735
  logVoiceVerbose(`reply empty: guild ${entry.guildId} channel ${entry.channelId} user ${userId}`);
1471
1736
  return;
@@ -1639,6 +1904,9 @@ function destroyVoiceConnectionSafely(params) {
1639
1904
  logger.warn(`discord voice: destroy failed: ${params.reason}: ${message}`);
1640
1905
  }
1641
1906
  }
1907
+ function isRetryableVoiceJoinReadyError(error) {
1908
+ return formatErrorMessage(error).toLowerCase().includes("operation was aborted");
1909
+ }
1642
1910
  function normalizeVoiceChannelResidencies(entries) {
1643
1911
  const normalized = [];
1644
1912
  for (const entry of entries ?? []) {
@@ -1725,6 +1993,7 @@ var DiscordVoiceManager$1 = class {
1725
1993
  constructor(params) {
1726
1994
  this.params = params;
1727
1995
  this.sessions = /* @__PURE__ */ new Map();
1996
+ this.joinTasks = /* @__PURE__ */ new Map();
1728
1997
  this.autoJoinTask = null;
1729
1998
  this.fatalAutoJoinFailures = /* @__PURE__ */ new Map();
1730
1999
  this.followedUserChannels = /* @__PURE__ */ new Map();
@@ -1849,8 +2118,45 @@ var DiscordVoiceManager$1 = class {
1849
2118
  };
1850
2119
  }
1851
2120
  logVoiceVerbose(`join requested: guild ${guildId} channel ${channelId}`);
2121
+ while (true) {
2122
+ const activeJoinTask = this.joinTasks.get(guildId);
2123
+ if (!activeJoinTask) break;
2124
+ logVoiceVerbose(`join: waiting for active guild join guild ${guildId} channel ${channelId}`);
2125
+ await activeJoinTask.catch(() => void 0);
2126
+ if (this.destroyed) return {
2127
+ ok: false,
2128
+ message: "Discord voice manager is stopped.",
2129
+ guildId,
2130
+ channelId
2131
+ };
2132
+ }
2133
+ const joinTask = this.joinUnlocked({
2134
+ guildId,
2135
+ channelId
2136
+ }, options);
2137
+ this.joinTasks.set(guildId, joinTask);
2138
+ try {
2139
+ return await joinTask;
2140
+ } finally {
2141
+ if (this.joinTasks.get(guildId) === joinTask) this.joinTasks.delete(guildId);
2142
+ }
2143
+ }
2144
+ async joinUnlocked(params, options) {
2145
+ const { guildId, channelId } = params;
2146
+ const voiceConfig = this.params.discordConfig.voice;
2147
+ const voiceMode = resolveDiscordVoiceMode(voiceConfig);
1852
2148
  const existing = this.sessions.get(guildId);
1853
2149
  if (existing && existing.channelId === channelId) {
2150
+ if (options?.meetingNotes) existing.meetingNotes = options.meetingNotes;
2151
+ if (!options?.meetingNotes && isDiscordRealtimeVoiceMode(voiceMode) && !existing.realtime) {
2152
+ const realtimeResult = await this.attachRealtimeSession(existing, voiceMode, { requireLiveEntry: true });
2153
+ if (!realtimeResult.ok) return {
2154
+ ok: false,
2155
+ message: realtimeResult.message,
2156
+ guildId,
2157
+ channelId
2158
+ };
2159
+ }
1854
2160
  logVoiceVerbose(`join: already connected to guild ${guildId} channel ${channelId}`);
1855
2161
  return {
1856
2162
  ok: true,
@@ -1878,8 +2184,6 @@ var DiscordVoiceManager$1 = class {
1878
2184
  ok: false,
1879
2185
  message: "Discord voice plugin is not available."
1880
2186
  };
1881
- const voiceConfig = this.params.discordConfig.voice;
1882
- const voiceMode = resolveDiscordVoiceMode(voiceConfig);
1883
2187
  const adapterCreator = voicePlugin.getGatewayAdapterCreator(guildId);
1884
2188
  const daveEncryption = voiceConfig?.daveEncryption;
1885
2189
  const decryptionFailureTolerance = voiceConfig?.decryptionFailureTolerance;
@@ -1898,28 +2202,56 @@ var DiscordVoiceManager$1 = class {
1898
2202
  voiceSdk,
1899
2203
  reason: `stale connection before join guild ${guildId}`
1900
2204
  });
1901
- const connection = voiceSdk.joinVoiceChannel({
1902
- channelId,
1903
- guildId,
1904
- adapterCreator,
1905
- selfDeaf: false,
1906
- selfMute: false,
1907
- daveEncryption,
1908
- decryptionFailureTolerance
1909
- });
1910
- try {
1911
- await voiceSdk.entersState(connection, voiceSdk.VoiceConnectionStatus.Ready, connectReadyTimeoutMs);
1912
- logVoiceVerbose(`join: connected to guild ${guildId} channel ${channelId}`);
1913
- } catch (err) {
1914
- logger.warn(`discord voice: join failed before ready: guild ${guildId} channel ${channelId} timeout=${connectReadyTimeoutMs}ms error=${formatErrorMessage(err)}`);
2205
+ let connection;
2206
+ const connectReadyDeadlineMs = Date.now() + connectReadyTimeoutMs;
2207
+ for (let attempt = 1; attempt <= 2; attempt += 1) {
2208
+ const joinedConnection = voiceSdk.joinVoiceChannel({
2209
+ channelId,
2210
+ guildId,
2211
+ adapterCreator,
2212
+ selfDeaf: false,
2213
+ selfMute: false,
2214
+ daveEncryption,
2215
+ decryptionFailureTolerance
2216
+ });
2217
+ const remainingConnectReadyTimeoutMs = Math.max(1, connectReadyDeadlineMs - Date.now());
2218
+ try {
2219
+ await voiceSdk.entersState(joinedConnection, voiceSdk.VoiceConnectionStatus.Ready, remainingConnectReadyTimeoutMs);
2220
+ connection = joinedConnection;
2221
+ logVoiceVerbose(`join: connected to guild ${guildId} channel ${channelId}`);
2222
+ break;
2223
+ } catch (err) {
2224
+ destroyVoiceConnectionSafely({
2225
+ connection: joinedConnection,
2226
+ voiceSdk,
2227
+ reason: `failed join cleanup guild ${guildId} channel ${channelId}`
2228
+ });
2229
+ if (attempt === 1 && isRetryableVoiceJoinReadyError(err) && !this.destroyed && connectReadyDeadlineMs > Date.now()) {
2230
+ logVoiceVerbose(`join: retrying aborted ready wait guild ${guildId} channel ${channelId}`);
2231
+ continue;
2232
+ }
2233
+ logger.warn(`discord voice: join failed before ready: guild ${guildId} channel ${channelId} timeout=${connectReadyTimeoutMs}ms error=${formatErrorMessage(err)}`);
2234
+ return {
2235
+ ok: false,
2236
+ message: `Failed to join voice channel: ${formatErrorMessage(err)}`
2237
+ };
2238
+ }
2239
+ }
2240
+ if (!connection) return {
2241
+ ok: false,
2242
+ message: "Failed to join voice channel."
2243
+ };
2244
+ if (this.destroyed) {
1915
2245
  destroyVoiceConnectionSafely({
1916
2246
  connection,
1917
2247
  voiceSdk,
1918
- reason: `failed join cleanup guild ${guildId} channel ${channelId}`
2248
+ reason: `manager stopped during join guild ${guildId} channel ${channelId}`
1919
2249
  });
1920
2250
  return {
1921
2251
  ok: false,
1922
- message: `Failed to join voice channel: ${formatErrorMessage(err)}`
2252
+ message: "Discord voice manager is stopped.",
2253
+ guildId,
2254
+ channelId
1923
2255
  };
1924
2256
  }
1925
2257
  const sessionChannelId = channelInfo?.id ?? channelId;
@@ -1968,6 +2300,8 @@ var DiscordVoiceManager$1 = class {
1968
2300
  if (disconnectedHandler) connection.off(voiceSdk.VoiceConnectionStatus.Disconnected, disconnectedHandler);
1969
2301
  if (destroyedHandler) connection.off(voiceSdk.VoiceConnectionStatus.Destroyed, destroyedHandler);
1970
2302
  if (playerErrorHandler) player.off("error", playerErrorHandler);
2303
+ entry.pendingRealtime?.close();
2304
+ entry.pendingRealtime = void 0;
1971
2305
  entry.realtime?.close();
1972
2306
  entry.realtime = void 0;
1973
2307
  player.stop();
@@ -1990,7 +2324,9 @@ var DiscordVoiceManager$1 = class {
1990
2324
  playbackQueue: Promise.resolve(),
1991
2325
  processingQueue: Promise.resolve(),
1992
2326
  capture: createVoiceCaptureState(),
2327
+ meetingNotes: options?.meetingNotes,
1993
2328
  receiveRecovery: createVoiceReceiveRecoveryState(),
2329
+ isStopped: () => stopped,
1994
2330
  stop: () => {
1995
2331
  stopEntry(entry, {
1996
2332
  destroyConnection: true,
@@ -1998,29 +2334,9 @@ var DiscordVoiceManager$1 = class {
1998
2334
  });
1999
2335
  }
2000
2336
  };
2001
- if (voiceMode !== "stt-tts") {
2002
- entry.realtime = new DiscordRealtimeVoiceSession({
2003
- bootstrapContextInstructions: await resolveDiscordVoiceRealtimeBootstrapContext({
2004
- entry,
2005
- cfg: this.params.cfg,
2006
- discordConfig: this.params.discordConfig
2007
- }),
2008
- cfg: this.params.cfg,
2009
- discordConfig: this.params.discordConfig,
2010
- entry,
2011
- mode: voiceMode,
2012
- runAgentTurn: ({ context, message, toolsAllow, userId }) => this.runDiscordRealtimeAgentTurn({
2013
- context,
2014
- entry,
2015
- message,
2016
- toolsAllow,
2017
- userId
2018
- })
2019
- });
2020
- try {
2021
- await entry.realtime.connect();
2022
- } catch (err) {
2023
- entry.realtime.close();
2337
+ if (!options?.meetingNotes && isDiscordRealtimeVoiceMode(voiceMode)) {
2338
+ const realtimeResult = await this.attachRealtimeSession(entry, voiceMode);
2339
+ if (!realtimeResult.ok) {
2024
2340
  destroyVoiceConnectionSafely({
2025
2341
  connection,
2026
2342
  voiceSdk,
@@ -2028,12 +2344,24 @@ var DiscordVoiceManager$1 = class {
2028
2344
  });
2029
2345
  return {
2030
2346
  ok: false,
2031
- message: `Failed to start Discord realtime voice: ${formatErrorMessage(err)}`,
2347
+ message: realtimeResult.message,
2032
2348
  guildId,
2033
2349
  channelId
2034
2350
  };
2035
2351
  }
2036
2352
  }
2353
+ if (this.destroyed) {
2354
+ stopEntry(entry, {
2355
+ destroyConnection: true,
2356
+ reason: `manager stopped during setup guild ${guildId} channel ${channelId}`
2357
+ });
2358
+ return {
2359
+ ok: false,
2360
+ message: "Discord voice manager is stopped.",
2361
+ guildId,
2362
+ channelId
2363
+ };
2364
+ }
2037
2365
  speakingHandler = (userId) => {
2038
2366
  this.handleSpeakingStart(entry, userId).catch((err) => {
2039
2367
  logger.warn(`discord voice: capture failed: ${formatErrorMessage(err)}`);
@@ -2085,6 +2413,52 @@ var DiscordVoiceManager$1 = class {
2085
2413
  channelId
2086
2414
  };
2087
2415
  }
2416
+ async attachRealtimeSession(entry, voiceMode, options) {
2417
+ const bootstrapContextInstructions = await resolveDiscordVoiceRealtimeBootstrapContext({
2418
+ entry,
2419
+ cfg: this.params.cfg,
2420
+ discordConfig: this.params.discordConfig
2421
+ });
2422
+ if (entry.isStopped() || options?.requireLiveEntry === true && this.sessions.get(entry.guildId) !== entry) return {
2423
+ ok: false,
2424
+ message: "Discord realtime voice session stopped before startup completed."
2425
+ };
2426
+ const realtime = new DiscordRealtimeVoiceSession({
2427
+ bootstrapContextInstructions,
2428
+ cfg: this.params.cfg,
2429
+ discordConfig: this.params.discordConfig,
2430
+ entry,
2431
+ mode: voiceMode,
2432
+ runAgentTurn: ({ context, message, toolsAllow, userId }) => this.runDiscordRealtimeAgentTurn({
2433
+ context,
2434
+ entry,
2435
+ message,
2436
+ toolsAllow,
2437
+ userId
2438
+ })
2439
+ });
2440
+ entry.pendingRealtime = realtime;
2441
+ try {
2442
+ await realtime.connect();
2443
+ if (entry.pendingRealtime !== realtime || entry.isStopped() || options?.requireLiveEntry === true && this.sessions.get(entry.guildId) !== entry) {
2444
+ realtime.close();
2445
+ return {
2446
+ ok: false,
2447
+ message: "Discord realtime voice session stopped before startup completed."
2448
+ };
2449
+ }
2450
+ entry.pendingRealtime = void 0;
2451
+ entry.realtime = realtime;
2452
+ return { ok: true };
2453
+ } catch (err) {
2454
+ if (entry.pendingRealtime === realtime) entry.pendingRealtime = void 0;
2455
+ realtime.close();
2456
+ return {
2457
+ ok: false,
2458
+ message: `Failed to start Discord realtime voice: ${formatErrorMessage(err)}`
2459
+ };
2460
+ }
2461
+ }
2088
2462
  async leave(params, options) {
2089
2463
  const guildId = params.guildId.trim();
2090
2464
  logVoiceVerbose(`leave requested: guild ${guildId} channel ${params.channelId ?? "current"}`);
@@ -2097,6 +2471,23 @@ var DiscordVoiceManager$1 = class {
2097
2471
  ok: false,
2098
2472
  message: "Not connected to that voice channel."
2099
2473
  };
2474
+ if (options?.meetingNotesSessionId) {
2475
+ if (!entry.meetingNotes || entry.meetingNotes.sessionId !== options.meetingNotesSessionId) return {
2476
+ ok: false,
2477
+ message: "Meeting notes session is not active in this voice channel.",
2478
+ guildId,
2479
+ channelId: entry.channelId
2480
+ };
2481
+ if (entry.realtime || entry.pendingRealtime) {
2482
+ entry.meetingNotes = void 0;
2483
+ return {
2484
+ ok: true,
2485
+ message: `Stopped meeting notes for ${formatMention({ channelId: entry.channelId })}.`,
2486
+ guildId,
2487
+ channelId: entry.channelId
2488
+ };
2489
+ }
2490
+ }
2100
2491
  entry.stop();
2101
2492
  this.sessions.delete(guildId);
2102
2493
  if (!options?.preserveFollowState) {
@@ -2595,6 +2986,7 @@ var DiscordVoiceManager$1 = class {
2595
2986
  ownerAllowFrom: this.ownerAllowFrom,
2596
2987
  runtime: this.params.runtime,
2597
2988
  speakerContext: this.speakerContext,
2989
+ meetingNotes: params.entry.meetingNotes,
2598
2990
  fetchGuildName: async (guildId) => {
2599
2991
  const guild = await this.params.client.fetchGuild(guildId).catch(() => null);
2600
2992
  return guild && typeof guild.name === "string" && guild.name.trim() ? guild.name : void 0;