npm - discoclaw - Versions diffs - 1.3.0 → 2.0.0 - Mend

discoclaw 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

package/.env.example +4 -6
package/.env.example.full +13 -32
package/README.md +1 -1
package/dist/cli/dashboard.test.js +0 -4
package/dist/cli/init-wizard.js +4 -8
package/dist/cli/init-wizard.test.js +4 -10
package/dist/config.js +2 -42
package/dist/config.test.js +8 -72
package/dist/dashboard/server.js +1 -5
package/dist/dashboard/server.test.js +3 -6
package/dist/discord/actions.js +112 -6
package/dist/discord/actions.test.js +117 -1
package/dist/discord/help-command.js +1 -1
package/dist/discord/message-coordinator.js +3 -8
package/dist/discord/models-command.js +1 -1
package/dist/discord/reaction-handler.js +2 -2
package/dist/discord/reaction-handler.test.js +55 -0
package/dist/discord/verify-push.js +31 -36
package/dist/discord/verify-push.test.js +34 -6
package/dist/discord/voice-command.js +1 -31
package/dist/discord/voice-command.test.js +21 -259
package/dist/discord/voice-status-command.js +3 -22
package/dist/discord/voice-status-command.test.js +16 -124
package/dist/discord-followup.test.js +133 -0
package/dist/health/config-doctor.js +5 -27
package/dist/health/config-doctor.test.js +1 -4
package/dist/index.js +1 -28
package/dist/runtime-overrides.js +2 -3
package/dist/runtime-overrides.test.js +27 -193
package/dist/tasks/store.js +10 -6
package/dist/tasks/store.test.js +44 -0
package/dist/tasks/task-action-executor.test.js +162 -50
package/dist/tasks/task-action-mutations.js +22 -2
package/dist/tasks/task-action-read-ops.js +7 -1
package/dist/tasks/task-action-runner-types.js +19 -1
package/dist/voice/audio-pipeline.js +145 -298
package/docs/configuration.md +4 -9
package/docs/official-docs.md +6 -9
package/docs/runtime-switching.md +1 -1
package/package.json +1 -1
package/dist/voice/audio-pipeline.test.js +0 -1100
package/dist/voice/stt-deepgram.js +0 -154
package/dist/voice/stt-deepgram.test.js +0 -275
package/dist/voice/stt-factory.js +0 -42
package/dist/voice/stt-factory.test.js +0 -45
package/dist/voice/stt-openai.js +0 -156
package/dist/voice/stt-openai.test.js +0 -281
package/dist/voice/tts-cartesia.js +0 -169
package/dist/voice/tts-cartesia.test.js +0 -228
package/dist/voice/tts-deepgram.js +0 -84
package/dist/voice/tts-deepgram.test.js +0 -220
package/dist/voice/tts-factory.js +0 -52
package/dist/voice/tts-factory.test.js +0 -53
package/dist/voice/tts-openai.js +0 -70
package/dist/voice/tts-openai.test.js +0 -138
package/dist/voice/types.test.js +0 -90

package/dist/voice/audio-pipeline.js CHANGED Viewed

@@ -8,9 +8,6 @@
  */
 import { VoiceConnectionStatus } from '@discordjs/voice';
 import { AudioReceiver } from './audio-receiver.js';
-import { createSttProvider } from './stt-factory.js';
-import { createTtsProvider } from './tts-factory.js';
-import { VoiceResponder } from './voice-responder.js';
 import { ConversationBuffer } from './conversation-buffer.js';
 import { GeminiLiveProvider } from './providers/gemini-live-provider.js';
 import { GeminiLiveResponder } from './providers/gemini-live-responder.js';
@@ -22,54 +19,44 @@ import { executeToolCall } from '../runtime/openai-tool-exec.js';
 // ---------------------------------------------------------------------------
 export class AudioPipelineManager {
     log;
-    voiceConfig;
     allowedUserIds;
     createDecoder;
     onTranscription;
-    createStt;
     invokeAi;
     runtime;
     runtimeModel;
     runtimeCwd;
     runtimeTimeoutMs;
-    createTts;
     transcriptMirror;
     botDisplayName;
     backfill;
     buildGeminiSystemInstruction;
-    voiceProvider;
     geminiApiKey;
     enabledTools;
     silentTools;
     sessionRotationMs;
-    onFallbackTriggered;
     pipelines = new Map();
     /** Re-entrancy guard: VoiceConnection.subscribe() can synchronously fire stateChange→Ready. */
     starting = new Set();
     constructor(opts) {
         this.log = opts.log;
-        this.voiceConfig = opts.voiceConfig;
         this.allowedUserIds = opts.allowedUserIds;
         this.createDecoder = opts.createDecoder;
         this.onTranscription = opts.onTranscription;
-        this.createStt = opts.createStt ?? createSttProvider;
         this.invokeAi = opts.invokeAi;
         this.runtime = opts.runtime;
         this.runtimeModel = opts.runtimeModel;
         this.runtimeCwd = opts.runtimeCwd;
         this.runtimeTimeoutMs = opts.runtimeTimeoutMs;
-        this.createTts = opts.createTts ?? createTtsProvider;
         this.transcriptMirror = opts.transcriptMirror;
         this.botDisplayName = opts.botDisplayName ?? 'Bot';
         this.backfill = opts.backfill;
         this.buildGeminiSystemInstruction = opts.buildGeminiSystemInstruction;
-        this.voiceProvider = opts.voiceProvider ?? 'pipeline';
         this.geminiApiKey = opts.geminiApiKey;
         this.enabledTools = opts.enabledTools ?? [];
         this.silentTools = new Set(opts.silentTools ?? []);
         this.sessionRotationMs = opts.sessionRotationMs;
-        this.onFallbackTriggered = opts.onFallbackTriggered;
-        this.log.info({ voiceProvider: this.voiceProvider }, 'audio pipeline manager initialized');
+        this.log.info({ voiceProvider: 'gemini-live' }, 'audio pipeline manager initialized');
     }
     /**
      * Attach to a VoiceConnection and auto-manage the audio pipeline
@@ -87,8 +74,8 @@ export class AudioPipelineManager {
             }
         });
     }
-    /** Start the audio receive pipeline for a guild. Pass `forceMode` to override the configured provider (used during fallback). */
-    async startPipeline(guildId, connection, forceMode) {
+    /** Start the Gemini Live voice pipeline for a guild. */
+    async startPipeline(guildId, connection) {
         // Re-entrancy guard: VoiceConnection.subscribe() (called when wiring the
         // AudioPlayer) synchronously fires a stateChange→Ready event, which would
         // re-invoke startPipeline and recurse infinitely.
@@ -100,269 +87,171 @@ export class AudioPipelineManager {
             this.log.info({ guildId }, 'stopping existing pipeline before restart');
             await this.stopPipeline(guildId);
         }
-        const effectiveMode = forceMode ?? this.voiceProvider;
         try {
-            // ----- gemini-live mode: skip STT/TTS, use GeminiLiveProvider directly -----
-            if (effectiveMode === 'gemini-live') {
-                const apiKey = this.geminiApiKey;
-                if (!apiKey)
-                    throw new Error('geminiApiKey is required for gemini-live voice provider');
-                const buffer = new ConversationBuffer();
-                if (this.backfill) {
-                    try {
-                        const turns = await this.backfill();
-                        buffer.backfill(turns);
-                        this.log.info({ guildId, turns: turns.length }, 'gemini-live conversation buffer backfilled');
-                    }
-                    catch (err) {
-                        this.log.warn({ guildId, err }, 'gemini-live conversation backfill failed — proceeding with empty history');
-                    }
-                }
-                const geminiLiveModel = normalizeGeminiLiveModel(this.runtimeModel) ?? DEFAULT_GEMINI_LIVE_MODEL;
-                const supportsAsyncFunctionCalling = supportsGeminiLiveAsyncFunctionCalling(geminiLiveModel);
-                const tools = buildGeminiToolDeclarations(this.enabledTools, { nonBlocking: supportsAsyncFunctionCalling });
-                const systemInstruction = await this.buildGeminiSystemInstruction?.();
-                const initialHistory = toGeminiLiveHistoryTurns(buffer.toTurns());
-                const provider = new GeminiLiveProvider({
-                    apiKey,
-                    log: this.log,
-                    model: geminiLiveModel,
-                    systemInstruction,
-                    responseModalities: ['AUDIO'],
-                    tools,
-                    initialHistoryInClientContent: initialHistory.length > 0,
-                    sessionRotationMs: this.sessionRotationMs,
-                });
-                await provider.connect();
-                if (initialHistory.length > 0) {
-                    provider.sendInitialHistory(initialHistory);
-                    this.log.info({ guildId, turns: initialHistory.length }, 'gemini-live conversation history seeded');
+            const apiKey = this.geminiApiKey;
+            if (!apiKey)
+                throw new Error('geminiApiKey is required for gemini-live voice provider');
+            const buffer = new ConversationBuffer();
+            if (this.backfill) {
+                try {
+                    const turns = await this.backfill();
+                    buffer.backfill(turns);
+                    this.log.info({ guildId, turns: turns.length }, 'gemini-live conversation buffer backfilled');
                 }
-                if (!supportsAsyncFunctionCalling && this.silentTools.size > 0) {
-                    this.log.info({ guildId, model: geminiLiveModel, count: this.silentTools.size }, 'gemini-live: current model does not support scheduled tool responses; silent tool scheduling disabled');
+                catch (err) {
+                    this.log.warn({ guildId, err }, 'gemini-live conversation backfill failed — proceeding with empty history');
                 }
-                const mirror = this.transcriptMirror;
-                const botName = this.botDisplayName;
-                let latestInputTranscript;
-                const responder = new GeminiLiveResponder({
-                    log: this.log,
-                    connection,
-                    provider,
-                    onBotResponse: mirror
-                        ? (text) => {
-                            if (latestInputTranscript && text.trim()) {
-                                buffer.push(latestInputTranscript, text);
-                                latestInputTranscript = undefined;
-                            }
-                            mirror.postBotResponse(botName, text).catch((err) => {
-                                this.log.warn({ guildId, err }, 'transcript-mirror: failed to post bot response');
-                            });
+            }
+            const geminiLiveModel = normalizeGeminiLiveModel(this.runtimeModel) ?? DEFAULT_GEMINI_LIVE_MODEL;
+            const supportsAsyncFunctionCalling = supportsGeminiLiveAsyncFunctionCalling(geminiLiveModel);
+            const tools = buildGeminiToolDeclarations(this.enabledTools, { nonBlocking: supportsAsyncFunctionCalling });
+            const systemInstruction = await this.buildGeminiSystemInstruction?.();
+            const initialHistory = toGeminiLiveHistoryTurns(buffer.toTurns());
+            const provider = new GeminiLiveProvider({
+                apiKey,
+                log: this.log,
+                model: geminiLiveModel,
+                systemInstruction,
+                responseModalities: ['AUDIO'],
+                tools,
+                initialHistoryInClientContent: initialHistory.length > 0,
+                sessionRotationMs: this.sessionRotationMs,
+            });
+            await provider.connect();
+            if (initialHistory.length > 0) {
+                provider.sendInitialHistory(initialHistory);
+                this.log.info({ guildId, turns: initialHistory.length }, 'gemini-live conversation history seeded');
+            }
+            if (!supportsAsyncFunctionCalling && this.silentTools.size > 0) {
+                this.log.info({ guildId, model: geminiLiveModel, count: this.silentTools.size }, 'gemini-live: current model does not support scheduled tool responses; silent tool scheduling disabled');
+            }
+            const mirror = this.transcriptMirror;
+            const botName = this.botDisplayName;
+            let latestInputTranscript;
+            const responder = new GeminiLiveResponder({
+                log: this.log,
+                connection,
+                provider,
+                onBotResponse: mirror
+                    ? (text) => {
+                        if (latestInputTranscript && text.trim()) {
+                            buffer.push(latestInputTranscript, text);
+                            latestInputTranscript = undefined;
                         }
-                        : (text) => {
-                            if (latestInputTranscript && text.trim()) {
-                                buffer.push(latestInputTranscript, text);
-                                latestInputTranscript = undefined;
-                            }
-                        },
-                    onInputTranscript: mirror
-                        ? (text) => {
-                            if (text.trim())
-                                latestInputTranscript = text.trim();
-                            mirror.postUserTranscription('User', text).catch((err) => {
-                                this.log.warn({ guildId, err }, 'transcript-mirror: failed to post user transcription');
-                            });
+                        mirror.postBotResponse(botName, text).catch((err) => {
+                            this.log.warn({ guildId, err }, 'transcript-mirror: failed to post bot response');
+                        });
+                    }
+                    : (text) => {
+                        if (latestInputTranscript && text.trim()) {
+                            buffer.push(latestInputTranscript, text);
+                            latestInputTranscript = undefined;
                         }
-                        : (text) => {
-                            if (text.trim())
-                                latestInputTranscript = text.trim();
-                        },
-                    onSessionTerminated: () => {
-                        this.log.error({ guildId }, 'gemini-live session terminally failed — no fallback (fallback disabled)');
                     },
-                    onFallbackRecommended: (reason) => {
-                        this.log.warn({ guildId, reason }, 'gemini-live: fallback recommended but fallback is disabled');
-                    },
-                    onTokenWarning: (estimatedTokens, threshold) => {
-                        this.log.warn({ guildId, estimatedTokens, threshold }, 'gemini-live: token usage approaching context window limit');
+                onInputTranscript: mirror
+                    ? (text) => {
+                        if (text.trim())
+                            latestInputTranscript = text.trim();
+                        mirror.postUserTranscription('User', text).catch((err) => {
+                            this.log.warn({ guildId, err }, 'transcript-mirror: failed to post user transcription');
+                        });
+                    }
+                    : (text) => {
+                        if (text.trim())
+                            latestInputTranscript = text.trim();
                     },
-                    onToolCall: tools
-                        ? (calls) => {
-                            this.log.info({ guildId, count: calls.length, names: calls.map((c) => c.name).join(',') }, 'gemini-live: tool call received — dispatching');
-                            const allowedRoots = this.runtimeCwd ? [this.runtimeCwd] : [];
-                            const allowedToolNames = new Set(buildToolSchemas(this.enabledTools).map((t) => t.function.name));
-                            const logFn = (msg) => this.log.info({ guildId }, msg);
-                            const execOpts = { enableHybridPipeline: false, allowedToolNames };
-                            // Gemini 3.1 Live only supports synchronous function calling.
-                            // Gemini 2.5 Live can opt into NON_BLOCKING declarations and scheduled responses.
-                            void (async () => {
-                                const results = await Promise.all(calls.map(async (call) => {
-                                    const scheduling = supportsAsyncFunctionCalling
-                                        ? (this.isSilentTool(call.name) ? 'SILENT' : 'INTERRUPT')
-                                        : undefined;
-                                    try {
-                                        const res = await executeToolCall(call.name, call.args, allowedRoots, logFn, execOpts);
-                                        return { id: call.id, name: call.name, output: res.result, scheduling };
-                                    }
-                                    catch (err) {
-                                        const msg = err instanceof Error ? err.message : String(err);
-                                        return { id: call.id, name: call.name, output: `Error: ${msg}`, scheduling };
-                                    }
-                                }));
-                                const silentCount = supportsAsyncFunctionCalling
-                                    ? results.filter((r) => r.scheduling === 'SILENT').length
-                                    : 0;
-                                if (silentCount > 0) {
-                                    this.log.info({ guildId, count: silentCount }, 'gemini-live: SILENT tool execution complete — results scheduled silently');
-                                }
+                onSessionTerminated: () => {
+                    this.log.error({ guildId }, 'gemini-live session terminally failed — no fallback');
+                },
+                onFallbackRecommended: (reason) => {
+                    this.log.warn({ guildId, reason }, 'gemini-live: fallback recommended but the legacy pipeline has been removed');
+                },
+                onTokenWarning: (estimatedTokens, threshold) => {
+                    this.log.warn({ guildId, estimatedTokens, threshold }, 'gemini-live: token usage approaching context window limit');
+                },
+                onToolCall: tools
+                    ? (calls) => {
+                        this.log.info({ guildId, count: calls.length, names: calls.map((c) => c.name).join(',') }, 'gemini-live: tool call received — dispatching');
+                        const allowedRoots = this.runtimeCwd ? [this.runtimeCwd] : [];
+                        const allowedToolNames = new Set(buildToolSchemas(this.enabledTools).map((t) => t.function.name));
+                        const logFn = (msg) => this.log.info({ guildId }, msg);
+                        const execOpts = { enableHybridPipeline: false, allowedToolNames };
+                        void (async () => {
+                            const results = await Promise.all(calls.map(async (call) => {
+                                const scheduling = supportsAsyncFunctionCalling
+                                    ? (this.isSilentTool(call.name) ? 'SILENT' : 'INTERRUPT')
+                                    : undefined;
                                 try {
-                                    provider.sendToolResponse(results);
+                                    const res = await executeToolCall(call.name, call.args, allowedRoots, logFn, execOpts);
+                                    return { id: call.id, name: call.name, output: res.result, scheduling };
                                 }
                                 catch (err) {
-                                    this.log.warn({ guildId, err }, 'gemini-live: sendToolResponse failed (provider likely disconnected)');
+                                    const msg = err instanceof Error ? err.message : String(err);
+                                    return { id: call.id, name: call.name, output: `Error: ${msg}`, scheduling };
                                 }
-                            })();
-                        }
-                        : undefined,
-                });
-                responder.start();
-                // SttProvider shim: bridges AudioReceiver frames to GeminiLiveProvider.sendAudio
-                const sttShim = {
-                    start: async () => { },
-                    stop: async () => { },
-                    onTranscription: () => { },
-                    feedAudio: (frame) => {
-                        try {
-                            provider.sendAudio(frame.buffer);
-                        }
-                        catch (err) {
-                            this.log.warn({ guildId, err }, 'gemini-live: sendAudio error (non-fatal)');
-                        }
-                    },
-                };
-                const receiver = new AudioReceiver({
-                    connection,
-                    allowedUserIds: this.allowedUserIds,
-                    sttProvider: sttShim,
-                    log: this.log,
-                    createDecoder: this.createDecoder,
-                    onUserSpeaking: () => { },
-                    onUserSilence: () => {
-                        try {
-                            provider.sendAudioStreamEnd();
-                        }
-                        catch (err) {
-                            this.log.warn({ guildId, err }, 'gemini-live: sendAudioStreamEnd error (non-fatal)');
-                        }
-                    },
-                });
-                receiver.start();
-                this.pipelines.set(guildId, {
-                    connection,
-                    sttProvider: sttShim,
-                    receiver,
-                    buffer,
-                    geminiProvider: provider,
-                    geminiResponder: responder,
-                    mode: 'gemini-live',
-                });
-                this.log.info({ guildId }, 'audio pipeline started (gemini-live)');
-                return;
-            }
-            // ----- default pipeline mode: STT/TTS/VoiceResponder -----
-            const sttProvider = this.createStt(this.voiceConfig, this.log);
-            const mirror = this.transcriptMirror;
-            // Create conversation buffer and backfill history if available
-            let buffer;
-            if (this.invokeAi) {
-                buffer = new ConversationBuffer();
-                if (this.backfill) {
-                    try {
-                        const turns = await this.backfill();
-                        buffer.backfill(turns);
-                        this.log.info({ guildId, turns: turns.length }, 'conversation buffer backfilled');
-                    }
-                    catch (err) {
-                        this.log.warn({ guildId, err }, 'conversation backfill failed — proceeding with empty buffer');
-                    }
-                }
-            }
-            // Create VoiceResponder for the full conversation loop if invokeAi is configured
-            let responder;
-            if (this.invokeAi) {
-                try {
-                    const tts = this.createTts(this.voiceConfig, this.log);
-                    const botName = this.botDisplayName;
-                    responder = new VoiceResponder({
-                        log: this.log,
-                        tts,
-                        connection,
-                        invokeAi: this.invokeAi,
-                        onBotResponse: mirror
-                            ? (text) => {
-                                mirror.postBotResponse(botName, text).catch((err) => {
-                                    this.log.warn({ guildId, err }, 'transcript-mirror: failed to post bot response');
-                                });
+                            }));
+                            const silentCount = supportsAsyncFunctionCalling
+                                ? results.filter((r) => r.scheduling === 'SILENT').length
+                                : 0;
+                            if (silentCount > 0) {
+                                this.log.info({ guildId, count: silentCount }, 'gemini-live: SILENT tool execution complete — results scheduled silently');
                             }
-                            : undefined,
-                        buffer,
-                    });
-                    this.log.info({ guildId }, 'voice responder created');
-                }
-                catch (err) {
-                    this.log.warn({ guildId, err }, 'failed to create voice responder — continuing with STT-only mode');
-                }
-            }
-            // Wire transcription callback — fires the external callback, transcript mirror, and responder
-            const onTranscriptionCb = this.onTranscription;
-            if (onTranscriptionCb || responder || mirror) {
-                sttProvider.onTranscription((result) => {
-                    if (onTranscriptionCb) {
-                        onTranscriptionCb(guildId, result);
+                            try {
+                                provider.sendToolResponse(results);
+                            }
+                            catch (err) {
+                                this.log.warn({ guildId, err }, 'gemini-live: sendToolResponse failed (provider likely disconnected)');
+                            }
+                        })();
                     }
-                    // STT-confirmed barge-in: any transcription (interim or final) with
-                    // non-empty text stops ongoing playback. Echo produces empty
-                    // transcriptions; real speech produces non-empty ones.
-                    if (result.text.trim() && responder?.isPlaying) {
-                        this.log.info({ guildId }, 'barge-in detected');
-                        responder.stop();
+                    : undefined,
+            });
+            responder.start();
+            // SttProvider shim: bridges AudioReceiver frames to GeminiLiveProvider.sendAudio
+            const sttShim = {
+                start: async () => { },
+                stop: async () => { },
+                onTranscription: () => { },
+                feedAudio: (frame) => {
+                    try {
+                        provider.sendAudio(frame.buffer);
                     }
-                    if (result.isFinal && result.text.trim()) {
-                        if (mirror) {
-                            mirror.postUserTranscription('User', result.text).catch((err) => {
-                                this.log.warn({ guildId, err }, 'transcript-mirror: failed to post user transcription');
-                            });
-                        }
-                        if (responder) {
-                            responder.handleTranscription(result.text).catch((err) => {
-                                this.log.error({ guildId, err }, 'voice-responder: handleTranscription failed');
-                            });
-                        }
+                    catch (err) {
+                        this.log.warn({ guildId, err }, 'gemini-live: sendAudio error (non-fatal)');
                     }
-                });
-            }
-            await sttProvider.start();
+                },
+            };
             const receiver = new AudioReceiver({
                 connection,
                 allowedUserIds: this.allowedUserIds,
-                sttProvider,
+                sttProvider: sttShim,
                 log: this.log,
                 createDecoder: this.createDecoder,
-                onUserSpeaking: (_userId) => {
-                    // Barge-in is now gated on STT transcription (see onTranscription
-                    // callback above). This callback is kept for AudioReceiver
-                    // subscription management.
+                onUserSpeaking: () => { },
+                onUserSilence: () => {
+                    try {
+                        provider.sendAudioStreamEnd();
+                    }
+                    catch (err) {
+                        this.log.warn({ guildId, err }, 'gemini-live: sendAudioStreamEnd error (non-fatal)');
+                    }
                 },
             });
             receiver.start();
-            this.pipelines.set(guildId, { connection, sttProvider, receiver, responder, buffer, mode: 'pipeline' });
-            this.log.info({ guildId, mode: effectiveMode }, 'audio pipeline started');
+            this.pipelines.set(guildId, {
+                connection,
+                sttProvider: sttShim,
+                receiver,
+                buffer,
+                geminiProvider: provider,
+                geminiResponder: responder,
+                mode: 'gemini-live',
+            });
+            this.log.info({ guildId }, 'audio pipeline started (gemini-live)');
         }
         catch (err) {
             this.log.error({ guildId, err }, 'failed to start audio pipeline');
-            // Fallback disabled — gemini-live must succeed or the pipeline stays down
-            if (effectiveMode === 'gemini-live') {
-                this.log.error({ guildId }, 'gemini-live: connection failed — no fallback (fallback disabled)');
-            }
+            this.log.error({ guildId }, 'gemini-live: connection failed — no fallback available');
         }
         finally {
             this.starting.delete(guildId);
@@ -378,7 +267,6 @@ export class AudioPipelineManager {
         if (pipeline.geminiProvider) {
             await pipeline.geminiProvider.disconnect();
         }
-        pipeline.responder?.destroy();
         pipeline.receiver.stop();
         try {
             await pipeline.sttProvider.stop();
@@ -401,58 +289,17 @@ export class AudioPipelineManager {
     get activePipelineCount() {
         return this.pipelines.size;
     }
-    /** Configured voice provider mode ('pipeline' or 'gemini-live'). */
+    /** Configured voice provider mode. */
     get activeVoiceProvider() {
-        return this.voiceProvider;
+        return 'gemini-live';
     }
-    /** Active mode for a specific guild (may differ from configured mode during fallback). */
+    /** Active mode for a specific guild. */
     pipelineMode(guildId) {
         return this.pipelines.get(guildId)?.mode;
     }
-    /** Current Deepgram TTS voice model name. */
-    get ttsVoice() {
-        return this.voiceConfig.deepgramTtsVoice;
-    }
-    /**
-     * Fall back from gemini-live to the standard pipeline for a guild.
-     * Stops the current gemini-live session and starts a standard STT/AI/TTS pipeline.
-     * No-op if no pipeline exists or the guild is already in standard mode.
-     */
-    async fallbackToPipeline(guildId, connection) {
-        const pipeline = this.pipelines.get(guildId);
-        if (!pipeline || pipeline.mode !== 'gemini-live')
-            return;
-        this.log.warn({ guildId }, 'gemini-live: initiating fallback to standard pipeline');
-        await this.stopPipeline(guildId);
-        await this.startPipeline(guildId, connection, 'pipeline');
-        if (this.hasPipeline(guildId)) {
-            this.log.info({ guildId }, 'gemini-live: fallback to standard pipeline succeeded');
-            this.onFallbackTriggered?.(guildId, 'pipeline');
-        }
-        else {
-            this.log.error({ guildId }, 'gemini-live: fallback to standard pipeline also failed — guild has no active pipeline');
-        }
-    }
     isSilentTool(toolName) {
         return this.silentTools.has(toolName) || this.silentTools.has(OPENAI_TO_DISCO_NAME[toolName] ?? toolName);
     }
-    /**
-     * Update the Deepgram TTS voice and restart all active pipelines so the
-     * new voice takes effect immediately. No-op in gemini-live mode (TTS is
-     * handled server-side).
-     * @returns The number of pipelines that were restarted (0 in gemini-live mode).
-     */
-    async setTtsVoice(voice) {
-        if (this.voiceProvider === 'gemini-live') {
-            this.log.info({ voice }, 'TTS voice change ignored — gemini-live mode uses server-side TTS');
-            return 0;
-        }
-        this.voiceConfig = { ...this.voiceConfig, deepgramTtsVoice: voice };
-        this.log.info({ voice }, 'TTS voice updated — restarting active pipelines');
-        const entries = [...this.pipelines.entries()];
-        await Promise.all(entries.map(([guildId, pipeline]) => this.startPipeline(guildId, pipeline.connection)));
-        return entries.length;
-    }
 }
 function toGeminiLiveHistoryTurns(turns) {
     const history = [];

package/docs/configuration.md CHANGED Viewed

@@ -37,7 +37,7 @@ For npm-managed daemon installs, readiness is currently constrained by service e
 Model/runtime state is intentionally split across three storage modes:
 - `models.json` stores persisted model strings per role (`chat`, `fast`, `plan-run`, `voice`, forge roles, cron roles, etc.).
-- `runtime-overrides.json` stores persisted runtime-only overlays such as `fastRuntime` and `voiceRuntime` (plus non-model keys such as `ttsVoice`).
+- `runtime-overrides.json` stores persisted runtime-only overlays such as `fastRuntime` and `voiceRuntime`.
 - Live chat runtime swaps stay in memory only. `!models set chat <runtime>` changes the active chat runtime immediately, but there is no persisted `chatRuntime` overlay.
 On first run, `models.json` is scaffolded from the instance startup defaults. After that:
@@ -322,24 +322,19 @@ The same forum-boundary rule applies to tasks: `DISCOCLAW_TASKS_FORUM` is the di
 ## Voice
-See [docs/voice.md](voice.md) for the full setup guide and provider details.
+See [docs/voice.md](voice.md) for the full Gemini Live setup guide.
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `DISCOCLAW_VOICE_ENABLED` | `false` | Master switch for voice subsystem |
 | `DISCOCLAW_VOICE_AUTO_JOIN` | `false` | Auto-join voice channels when users enter |
 | `ANTHROPIC_API_KEY` | — | Anthropic API key (required for direct Messages API voice responses) |
+| `GEMINI_API_KEY` | — | Gemini API key required for Gemini Live voice |
 | `DISCOCLAW_VOICE_MODEL` | follows startup chat model | Model override for voice responses |
 | `DISCOCLAW_VOICE_SYSTEM_PROMPT` | — | System prompt override for voice (max 4000 chars) |
-| `DISCOCLAW_STT_PROVIDER` | `deepgram` | Speech-to-text provider: `deepgram`, `whisper`, `openai` |
-| `DISCOCLAW_TTS_PROVIDER` | `cartesia` | Text-to-speech provider: `cartesia`, `deepgram`, `kokoro`, `openai` |
+| `DISCOCLAW_GEMINI_SESSION_ROTATION_MS` | `780000` | Proactive Gemini Live session rotation interval in milliseconds |
 | `DISCOCLAW_VOICE_HOME_CHANNEL` | — | Voice channel name or ID for prompt context |
 | `DISCOCLAW_VOICE_LOG_CHANNEL` | `voice-log` | Text channel for transcript mirror |
-| `DEEPGRAM_API_KEY` | — | Deepgram API key (required for Deepgram STT/TTS) |
-| `DEEPGRAM_STT_MODEL` | `nova-3-general` | Deepgram STT model |
-| `DEEPGRAM_TTS_VOICE` | `aura-2-asteria-en` | Deepgram TTS voice |
-| `DEEPGRAM_TTS_SPEED` | `1.3` | Deepgram TTS playback speed multiplier (0.5–1.5) |
-| `CARTESIA_API_KEY` | — | Cartesia API key (required for Cartesia TTS) |
 ## Webhook

package/docs/official-docs.md CHANGED Viewed

@@ -6,8 +6,8 @@ Completeness pass for this index was cross-checked against:
 - `package.json`
 - `.context/runtime.md`
-- `src/voice/tts-factory.ts`
-- `src/voice/stt-factory.ts`
+- `src/voice/audio-pipeline.ts`
+- `src/voice/providers/gemini-live-provider.ts`
 - `src/cold-storage/embeddings.ts`
 - `src/cold-storage/openai-compat.ts`
 - `src/discord/actions-imagegen.ts`
@@ -24,8 +24,8 @@ Completeness pass for this index was cross-checked against:
 | Provider | What DiscoClaw uses | Official docs |
 |----------|----------------------|---------------|
 | Anthropic | Claude model families via `src/runtime/anthropic-rest.ts` and Claude Code CLI runtime | Models overview: <https://docs.anthropic.com/en/docs/about-claude/models/overview><br>Messages API: <https://platform.claude.com/docs/en/api/messages><br>Claude Code docs: <https://code.claude.com/docs/en/overview> |
-| OpenAI | OpenAI-compatible runtime, Codex runtime docs, OpenAI voice, embeddings, and image generation | Model IDs: <https://developers.openai.com/api/model-ids/><br>API reference overview: <https://platform.openai.com/docs/api-reference><br>Codex docs: <https://developers.openai.com/codex/><br>Codex app-server API: <https://developers.openai.com/codex/app-server> |
-| Google | Gemini API runtime and Gemini/Imagen image generation | Gemini models: <https://ai.google.dev/models/gemini><br>Gemini API docs: <https://ai.google.dev/gemini-api/docs> |
+| OpenAI | OpenAI-compatible runtime, Codex runtime docs, embeddings, and image generation | Model IDs: <https://developers.openai.com/api/model-ids/><br>API reference overview: <https://platform.openai.com/docs/api-reference><br>Codex docs: <https://developers.openai.com/codex/><br>Codex app-server API: <https://developers.openai.com/codex/app-server> |
+| Google | Gemini API runtime, Gemini Live voice, and Gemini/Imagen image generation | Gemini models: <https://ai.google.dev/models/gemini><br>Gemini API docs: <https://ai.google.dev/gemini-api/docs><br>Gemini Live API: <https://ai.google.dev/gemini-api/docs/live> |
 | OpenRouter | OpenRouter runtime through `src/runtime/openai-compat.ts` | Model list: <https://openrouter.ai/models><br>API docs: <https://openrouter.ai/docs/api/reference/overview> |
 ## Discord
@@ -53,11 +53,8 @@ Completeness pass for this index was cross-checked against:
 | Provider | Used in DiscoClaw | Official docs |
 |----------|-------------------|---------------|
-| Deepgram STT | `src/voice/stt-deepgram.ts` with Nova-3 streaming (`nova-3-general`) | STT API overview: <https://developers.deepgram.com/docs/speech-to-text><br>Streaming API: <https://developers.deepgram.com/reference/speech-to-text/listen-streaming><br>Nova-3 models: <https://developers.deepgram.com/docs/models-languages-overview> |
-| Deepgram TTS | `src/voice/tts-deepgram.ts` with Aura (`aura-2-asteria-en`) | TTS API overview: <https://developers.deepgram.com/docs/text-to-speech><br>Speak endpoint: <https://developers.deepgram.com/reference/text-to-speech/speak-streaming><br>Aura voices/models: <https://developers.deepgram.com/docs/tts-models> |
-| Cartesia TTS | `src/voice/tts-cartesia.ts` with Sonic-3 over WebSocket | API docs: <https://docs.cartesia.ai/api-reference><br>TTS WebSocket: <https://docs.cartesia.ai/api-reference/tts/websocket> |
-| OpenAI TTS | `src/voice/tts-openai.ts` (`/v1/audio/speech`, default `tts-1`) | Audio speech API reference: <https://platform.openai.com/docs/api-reference/audio/createSpeech> |
-| OpenAI STT | `src/voice/stt-openai.ts` (`/v1/audio/transcriptions`, `whisper-1`) | Audio transcription API reference: <https://platform.openai.com/docs/api-reference/audio/createTranscription> |
+| Gemini Live | `src/voice/audio-pipeline.ts` and the Gemini Live provider handle speech recognition, reasoning, and speech synthesis in one session | Live API overview: <https://ai.google.dev/gemini-api/docs/live><br>Realtime guide: <https://ai.google.dev/gemini-api/docs/live-guide> |
+| Anthropic Messages API (optional voice runtime) | `!models set voice claude-api` can switch voice response generation to direct Anthropic API calls while Discord audio transport stays on Gemini Live | API overview: <https://docs.anthropic.com/en/api/messages> |
 ## Image Generation