discoclaw 1.2.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.context/voice.md +30 -2
- package/.env.example +7 -3
- package/.env.example.full +13 -32
- package/README.md +1 -1
- package/dist/cli/dashboard.js +7 -1
- package/dist/cli/dashboard.test.js +0 -4
- package/dist/cli/init-wizard.js +4 -8
- package/dist/cli/init-wizard.test.js +4 -10
- package/dist/config.js +5 -38
- package/dist/config.test.js +8 -72
- package/dist/cron/executor.js +72 -1
- package/dist/dashboard/api/metrics.js +7 -0
- package/dist/dashboard/api/metrics.test.js +16 -0
- package/dist/dashboard/api/traces.js +14 -0
- package/dist/dashboard/api/traces.test.js +40 -0
- package/dist/dashboard/page.js +187 -8
- package/dist/dashboard/server.js +82 -19
- package/dist/dashboard/server.test.js +123 -10
- package/dist/discord/actions.js +112 -6
- package/dist/discord/actions.test.js +117 -1
- package/dist/discord/deferred-runner.js +306 -219
- package/dist/discord/help-command.js +1 -1
- package/dist/discord/message-coordinator.js +4 -36
- package/dist/discord/models-command.js +1 -1
- package/dist/discord/reaction-handler.js +83 -5
- package/dist/discord/reaction-handler.test.js +55 -0
- package/dist/discord/verify-push.js +31 -36
- package/dist/discord/verify-push.test.js +34 -6
- package/dist/discord/voice-command.js +1 -31
- package/dist/discord/voice-command.test.js +21 -259
- package/dist/discord/voice-status-command.js +3 -22
- package/dist/discord/voice-status-command.test.js +16 -124
- package/dist/discord-followup.test.js +133 -0
- package/dist/health/config-doctor.js +5 -27
- package/dist/health/config-doctor.test.js +1 -4
- package/dist/index.js +15 -28
- package/dist/observability/trace-store.js +56 -0
- package/dist/observability/trace-utils.js +31 -0
- package/dist/runtime/codex-cli.js +3 -2
- package/dist/runtime/codex-cli.test.js +33 -0
- package/dist/runtime/model-tiers.js +1 -1
- package/dist/runtime/model-tiers.test.js +9 -0
- package/dist/runtime/openai-tool-schemas.js +17 -0
- package/dist/runtime-overrides.js +2 -3
- package/dist/runtime-overrides.test.js +27 -193
- package/dist/tasks/store.js +10 -6
- package/dist/tasks/store.test.js +44 -0
- package/dist/tasks/task-action-executor.test.js +162 -50
- package/dist/tasks/task-action-mutations.js +22 -2
- package/dist/tasks/task-action-read-ops.js +7 -1
- package/dist/tasks/task-action-runner-types.js +19 -1
- package/dist/voice/audio-pipeline.js +183 -96
- package/dist/voice/audio-receiver.js +8 -0
- package/dist/voice/audio-receiver.test.js +16 -0
- package/dist/voice/conversation-buffer.js +16 -6
- package/dist/voice/providers/gemini-live-provider.js +481 -0
- package/dist/voice/providers/gemini-live-provider.test.js +834 -0
- package/dist/voice/providers/gemini-live-responder.js +267 -0
- package/dist/voice/providers/gemini-live-responder.test.js +615 -0
- package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
- package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
- package/dist/voice/providers/gemini-live-types.js +32 -0
- package/dist/voice/providers/gemini-tool-mapper.js +91 -0
- package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
- package/dist/voice/providers/index.js +3 -0
- package/dist/voice/voice-prompt-builder.js +26 -17
- package/dist/voice/voice-prompt-builder.test.js +16 -1
- package/docs/configuration.md +4 -9
- package/docs/official-docs.md +6 -9
- package/docs/runtime-switching.md +1 -1
- package/package.json +1 -1
- package/dist/voice/audio-pipeline.test.js +0 -619
- package/dist/voice/stt-deepgram.js +0 -154
- package/dist/voice/stt-deepgram.test.js +0 -275
- package/dist/voice/stt-factory.js +0 -42
- package/dist/voice/stt-factory.test.js +0 -45
- package/dist/voice/stt-openai.js +0 -156
- package/dist/voice/stt-openai.test.js +0 -281
- package/dist/voice/tts-cartesia.js +0 -169
- package/dist/voice/tts-cartesia.test.js +0 -228
- package/dist/voice/tts-deepgram.js +0 -84
- package/dist/voice/tts-deepgram.test.js +0 -220
- package/dist/voice/tts-factory.js +0 -52
- package/dist/voice/tts-factory.test.js +0 -53
- package/dist/voice/tts-openai.js +0 -70
- package/dist/voice/tts-openai.test.js +0 -138
- package/dist/voice/types.test.js +0 -84
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GeminiLiveResponder — Discord audio output bridge for the Gemini Live voice path.
|
|
3
|
+
*
|
|
4
|
+
* Phase 1.2: receives audio/text events from GeminiLiveProvider, upsamples
|
|
5
|
+
* audio to Discord's 48 kHz stereo format, manages an AudioPlayer + PassThrough
|
|
6
|
+
* stream pipeline, and handles barge-in via Gemini's interrupted/turn_complete events.
|
|
7
|
+
*/
|
|
8
|
+
import { PassThrough } from 'node:stream';
|
|
9
|
+
import { createAudioPlayer, createAudioResource, AudioPlayerStatus, StreamType, } from '@discordjs/voice';
|
|
10
|
+
import { upsampleToDiscord } from '../voice-responder.js';
|
|
11
|
+
// Gemini Live returns 24 kHz mono PCM s16le
|
|
12
|
+
const GEMINI_OUTPUT_RATE = 24_000;
|
|
13
|
+
const GEMINI_OUTPUT_CHANNELS = 1;
|
|
14
|
+
const TRANSCRIPT_FLUSH_DELAY_MS = 50;
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Responder
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
export class GeminiLiveResponder {
|
|
19
|
+
log;
|
|
20
|
+
connection;
|
|
21
|
+
provider;
|
|
22
|
+
onBotResponse;
|
|
23
|
+
onInputTranscript;
|
|
24
|
+
onToolCall;
|
|
25
|
+
onSessionTerminated;
|
|
26
|
+
onFallbackRecommended;
|
|
27
|
+
onTokenWarning;
|
|
28
|
+
playerFactory;
|
|
29
|
+
player = null;
|
|
30
|
+
stream = null;
|
|
31
|
+
transcript = '';
|
|
32
|
+
transcriptFlushTimer = null;
|
|
33
|
+
turnCompletePending = false;
|
|
34
|
+
started = false;
|
|
35
|
+
constructor(opts) {
|
|
36
|
+
this.log = opts.log;
|
|
37
|
+
this.connection = opts.connection;
|
|
38
|
+
this.provider = opts.provider;
|
|
39
|
+
this.onBotResponse = opts.onBotResponse;
|
|
40
|
+
this.onInputTranscript = opts.onInputTranscript;
|
|
41
|
+
this.onToolCall = opts.onToolCall;
|
|
42
|
+
this.onSessionTerminated = opts.onSessionTerminated;
|
|
43
|
+
this.onFallbackRecommended = opts.onFallbackRecommended;
|
|
44
|
+
this.onTokenWarning = opts.onTokenWarning;
|
|
45
|
+
this.playerFactory = opts.createPlayer ?? (() => createAudioPlayer());
|
|
46
|
+
}
|
|
47
|
+
/** Create the player, subscribe to the connection, and listen for provider events. */
|
|
48
|
+
start() {
|
|
49
|
+
if (this.started)
|
|
50
|
+
return;
|
|
51
|
+
this.started = true;
|
|
52
|
+
this.player = this.playerFactory();
|
|
53
|
+
const subscription = this.connection.subscribe(this.player);
|
|
54
|
+
this.log.info({ subscribed: !!subscription }, 'gemini-live-responder: player subscription result');
|
|
55
|
+
this.player.on('stateChange', (oldState, newState) => {
|
|
56
|
+
this.log.info({ from: oldState.status, to: newState.status }, 'gemini-live-responder: player state change');
|
|
57
|
+
});
|
|
58
|
+
this.player.on('error', (err) => {
|
|
59
|
+
this.log.error({ err }, 'gemini-live-responder: audio player error');
|
|
60
|
+
});
|
|
61
|
+
this.provider.onEvent((event) => this.handleEvent(event));
|
|
62
|
+
}
|
|
63
|
+
/** Whether the bot is audibly speaking (Playing or Buffering). */
|
|
64
|
+
get isPlaying() {
|
|
65
|
+
if (!this.player)
|
|
66
|
+
return false;
|
|
67
|
+
const status = this.player.state.status;
|
|
68
|
+
return status === AudioPlayerStatus.Playing || status === AudioPlayerStatus.Buffering;
|
|
69
|
+
}
|
|
70
|
+
/** Stop playback and tear down the stream. */
|
|
71
|
+
stop() {
|
|
72
|
+
this.destroyStream();
|
|
73
|
+
this.cancelTranscriptFlush();
|
|
74
|
+
this.player?.stop();
|
|
75
|
+
this.transcript = '';
|
|
76
|
+
this.turnCompletePending = false;
|
|
77
|
+
}
|
|
78
|
+
/** Stop and release all resources. */
|
|
79
|
+
destroy() {
|
|
80
|
+
this.stop();
|
|
81
|
+
this.started = false;
|
|
82
|
+
this.player = null;
|
|
83
|
+
}
|
|
84
|
+
// -----------------------------------------------------------------------
|
|
85
|
+
// Internal
|
|
86
|
+
// -----------------------------------------------------------------------
|
|
87
|
+
handleEvent(event) {
|
|
88
|
+
switch (event.type) {
|
|
89
|
+
case 'audio':
|
|
90
|
+
this.handleAudio(event.data);
|
|
91
|
+
break;
|
|
92
|
+
case 'text':
|
|
93
|
+
this.transcript += event.text;
|
|
94
|
+
if (this.turnCompletePending) {
|
|
95
|
+
this.scheduleTranscriptFlush();
|
|
96
|
+
}
|
|
97
|
+
break;
|
|
98
|
+
case 'input_transcript':
|
|
99
|
+
this.handleInputTranscript(event.text);
|
|
100
|
+
break;
|
|
101
|
+
case 'interrupted':
|
|
102
|
+
this.handleInterrupted();
|
|
103
|
+
break;
|
|
104
|
+
case 'turn_complete':
|
|
105
|
+
this.handleTurnComplete();
|
|
106
|
+
break;
|
|
107
|
+
case 'tool_call':
|
|
108
|
+
this.handleToolCall(event.functionCalls);
|
|
109
|
+
break;
|
|
110
|
+
case 'session_rotating':
|
|
111
|
+
this.handleSessionRotating();
|
|
112
|
+
break;
|
|
113
|
+
case 'reconnecting':
|
|
114
|
+
this.handleReconnecting(event.attempt, event.maxRetries, event.hasResumeHandle);
|
|
115
|
+
break;
|
|
116
|
+
case 'reconnected':
|
|
117
|
+
this.log.info({ attempt: event.attempt }, 'gemini-live-responder: session reconnected');
|
|
118
|
+
break;
|
|
119
|
+
case 'reconnect_failed':
|
|
120
|
+
this.handleReconnectFailed(event.attempts);
|
|
121
|
+
break;
|
|
122
|
+
case 'token_warning':
|
|
123
|
+
this.handleTokenWarning(event.estimatedTokens, event.threshold);
|
|
124
|
+
break;
|
|
125
|
+
case 'fallback_recommended':
|
|
126
|
+
this.handleFallbackRecommended(event.reason);
|
|
127
|
+
break;
|
|
128
|
+
default:
|
|
129
|
+
// setup_complete, error — not handled here
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
handleAudio(data) {
|
|
134
|
+
// Lazily create the stream + resource on the first audio chunk of a turn
|
|
135
|
+
if (!this.stream) {
|
|
136
|
+
this.stream = new PassThrough();
|
|
137
|
+
const resource = createAudioResource(this.stream, {
|
|
138
|
+
inputType: StreamType.Raw,
|
|
139
|
+
});
|
|
140
|
+
this.player.play(resource);
|
|
141
|
+
this.log.info({}, 'gemini-live-responder: streaming playback started');
|
|
142
|
+
}
|
|
143
|
+
const upsampled = upsampleToDiscord(data, GEMINI_OUTPUT_RATE, GEMINI_OUTPUT_CHANNELS);
|
|
144
|
+
this.stream.write(upsampled);
|
|
145
|
+
}
|
|
146
|
+
handleInterrupted() {
|
|
147
|
+
this.log.info({}, 'gemini-live-responder: interrupted — stopping playback');
|
|
148
|
+
this.destroyStream();
|
|
149
|
+
this.cancelTranscriptFlush();
|
|
150
|
+
this.player?.stop();
|
|
151
|
+
this.transcript = '';
|
|
152
|
+
this.turnCompletePending = false;
|
|
153
|
+
}
|
|
154
|
+
handleInputTranscript(text) {
|
|
155
|
+
if (this.onInputTranscript) {
|
|
156
|
+
try {
|
|
157
|
+
this.onInputTranscript(text);
|
|
158
|
+
}
|
|
159
|
+
catch (err) {
|
|
160
|
+
this.log.warn({ err }, 'gemini-live-responder: onInputTranscript callback error');
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
handleToolCall(calls) {
|
|
165
|
+
this.log.info({ count: calls.length, names: calls.map((c) => c.name).join(',') }, 'gemini-live-responder: tool call received');
|
|
166
|
+
if (this.onToolCall) {
|
|
167
|
+
try {
|
|
168
|
+
this.onToolCall(calls);
|
|
169
|
+
}
|
|
170
|
+
catch (err) {
|
|
171
|
+
this.log.warn({ err }, 'gemini-live-responder: onToolCall callback error');
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
handleTurnComplete() {
|
|
176
|
+
this.log.info({}, 'gemini-live-responder: turn complete');
|
|
177
|
+
// End the stream gracefully so remaining buffered audio plays out
|
|
178
|
+
if (this.stream) {
|
|
179
|
+
this.stream.end();
|
|
180
|
+
this.stream = null;
|
|
181
|
+
}
|
|
182
|
+
// Fire the transcript callback
|
|
183
|
+
this.turnCompletePending = true;
|
|
184
|
+
this.scheduleTranscriptFlush();
|
|
185
|
+
}
|
|
186
|
+
handleSessionRotating() {
|
|
187
|
+
this.log.info({}, 'gemini-live-responder: planned session rotation — pausing playback');
|
|
188
|
+
this.destroyStream();
|
|
189
|
+
this.cancelTranscriptFlush();
|
|
190
|
+
this.player?.stop();
|
|
191
|
+
this.transcript = '';
|
|
192
|
+
this.turnCompletePending = false;
|
|
193
|
+
}
|
|
194
|
+
handleReconnecting(attempt, maxRetries, hasResumeHandle) {
|
|
195
|
+
this.log.info({ attempt, maxRetries, hasResumeHandle }, 'gemini-live-responder: session reconnecting — pausing playback');
|
|
196
|
+
this.destroyStream();
|
|
197
|
+
this.cancelTranscriptFlush();
|
|
198
|
+
this.player?.stop();
|
|
199
|
+
this.transcript = '';
|
|
200
|
+
this.turnCompletePending = false;
|
|
201
|
+
}
|
|
202
|
+
handleReconnectFailed(attempts) {
|
|
203
|
+
this.log.error({ attempts }, 'gemini-live-responder: session terminally failed — all reconnect retries exhausted');
|
|
204
|
+
this.stop();
|
|
205
|
+
if (this.onSessionTerminated) {
|
|
206
|
+
try {
|
|
207
|
+
this.onSessionTerminated();
|
|
208
|
+
}
|
|
209
|
+
catch (err) {
|
|
210
|
+
this.log.warn({ err }, 'gemini-live-responder: onSessionTerminated callback error');
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
handleTokenWarning(estimatedTokens, threshold) {
|
|
215
|
+
this.log.warn({ estimatedTokens, threshold }, 'gemini-live-responder: token usage approaching context window limit');
|
|
216
|
+
if (this.onTokenWarning) {
|
|
217
|
+
try {
|
|
218
|
+
this.onTokenWarning(estimatedTokens, threshold);
|
|
219
|
+
}
|
|
220
|
+
catch (err) {
|
|
221
|
+
this.log.warn({ err }, 'gemini-live-responder: onTokenWarning callback error');
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
handleFallbackRecommended(reason) {
|
|
226
|
+
this.log.warn({ reason }, 'gemini-live-responder: fallback to pipeline voice mode recommended');
|
|
227
|
+
if (this.onFallbackRecommended) {
|
|
228
|
+
try {
|
|
229
|
+
this.onFallbackRecommended(reason);
|
|
230
|
+
}
|
|
231
|
+
catch (err) {
|
|
232
|
+
this.log.warn({ err }, 'gemini-live-responder: onFallbackRecommended callback error');
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
destroyStream() {
|
|
237
|
+
if (this.stream) {
|
|
238
|
+
this.stream.destroy();
|
|
239
|
+
this.stream = null;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
scheduleTranscriptFlush() {
|
|
243
|
+
this.cancelTranscriptFlush();
|
|
244
|
+
this.transcriptFlushTimer = setTimeout(() => {
|
|
245
|
+
this.transcriptFlushTimer = null;
|
|
246
|
+
if (!this.turnCompletePending)
|
|
247
|
+
return;
|
|
248
|
+
const text = this.transcript;
|
|
249
|
+
this.transcript = '';
|
|
250
|
+
this.turnCompletePending = false;
|
|
251
|
+
if (!text)
|
|
252
|
+
return;
|
|
253
|
+
try {
|
|
254
|
+
this.onBotResponse?.(text);
|
|
255
|
+
}
|
|
256
|
+
catch (err) {
|
|
257
|
+
this.log.warn({ err }, 'gemini-live-responder: onBotResponse callback error');
|
|
258
|
+
}
|
|
259
|
+
}, TRANSCRIPT_FLUSH_DELAY_MS);
|
|
260
|
+
}
|
|
261
|
+
cancelTranscriptFlush() {
|
|
262
|
+
if (this.transcriptFlushTimer) {
|
|
263
|
+
clearTimeout(this.transcriptFlushTimer);
|
|
264
|
+
this.transcriptFlushTimer = null;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|