discoclaw 1.2.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.context/voice.md +30 -2
  2. package/.env.example +7 -3
  3. package/.env.example.full +13 -32
  4. package/README.md +1 -1
  5. package/dist/cli/dashboard.js +7 -1
  6. package/dist/cli/dashboard.test.js +0 -4
  7. package/dist/cli/init-wizard.js +4 -8
  8. package/dist/cli/init-wizard.test.js +4 -10
  9. package/dist/config.js +5 -38
  10. package/dist/config.test.js +8 -72
  11. package/dist/cron/executor.js +72 -1
  12. package/dist/dashboard/api/metrics.js +7 -0
  13. package/dist/dashboard/api/metrics.test.js +16 -0
  14. package/dist/dashboard/api/traces.js +14 -0
  15. package/dist/dashboard/api/traces.test.js +40 -0
  16. package/dist/dashboard/page.js +187 -8
  17. package/dist/dashboard/server.js +82 -19
  18. package/dist/dashboard/server.test.js +123 -10
  19. package/dist/discord/actions.js +112 -6
  20. package/dist/discord/actions.test.js +117 -1
  21. package/dist/discord/deferred-runner.js +306 -219
  22. package/dist/discord/help-command.js +1 -1
  23. package/dist/discord/message-coordinator.js +4 -36
  24. package/dist/discord/models-command.js +1 -1
  25. package/dist/discord/reaction-handler.js +83 -5
  26. package/dist/discord/reaction-handler.test.js +55 -0
  27. package/dist/discord/verify-push.js +31 -36
  28. package/dist/discord/verify-push.test.js +34 -6
  29. package/dist/discord/voice-command.js +1 -31
  30. package/dist/discord/voice-command.test.js +21 -259
  31. package/dist/discord/voice-status-command.js +3 -22
  32. package/dist/discord/voice-status-command.test.js +16 -124
  33. package/dist/discord-followup.test.js +133 -0
  34. package/dist/health/config-doctor.js +5 -27
  35. package/dist/health/config-doctor.test.js +1 -4
  36. package/dist/index.js +15 -28
  37. package/dist/observability/trace-store.js +56 -0
  38. package/dist/observability/trace-utils.js +31 -0
  39. package/dist/runtime/codex-cli.js +3 -2
  40. package/dist/runtime/codex-cli.test.js +33 -0
  41. package/dist/runtime/model-tiers.js +1 -1
  42. package/dist/runtime/model-tiers.test.js +9 -0
  43. package/dist/runtime/openai-tool-schemas.js +17 -0
  44. package/dist/runtime-overrides.js +2 -3
  45. package/dist/runtime-overrides.test.js +27 -193
  46. package/dist/tasks/store.js +10 -6
  47. package/dist/tasks/store.test.js +44 -0
  48. package/dist/tasks/task-action-executor.test.js +162 -50
  49. package/dist/tasks/task-action-mutations.js +22 -2
  50. package/dist/tasks/task-action-read-ops.js +7 -1
  51. package/dist/tasks/task-action-runner-types.js +19 -1
  52. package/dist/voice/audio-pipeline.js +183 -96
  53. package/dist/voice/audio-receiver.js +8 -0
  54. package/dist/voice/audio-receiver.test.js +16 -0
  55. package/dist/voice/conversation-buffer.js +16 -6
  56. package/dist/voice/providers/gemini-live-provider.js +481 -0
  57. package/dist/voice/providers/gemini-live-provider.test.js +834 -0
  58. package/dist/voice/providers/gemini-live-responder.js +267 -0
  59. package/dist/voice/providers/gemini-live-responder.test.js +615 -0
  60. package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
  61. package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
  62. package/dist/voice/providers/gemini-live-types.js +32 -0
  63. package/dist/voice/providers/gemini-tool-mapper.js +91 -0
  64. package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
  65. package/dist/voice/providers/index.js +3 -0
  66. package/dist/voice/voice-prompt-builder.js +26 -17
  67. package/dist/voice/voice-prompt-builder.test.js +16 -1
  68. package/docs/configuration.md +4 -9
  69. package/docs/official-docs.md +6 -9
  70. package/docs/runtime-switching.md +1 -1
  71. package/package.json +1 -1
  72. package/dist/voice/audio-pipeline.test.js +0 -619
  73. package/dist/voice/stt-deepgram.js +0 -154
  74. package/dist/voice/stt-deepgram.test.js +0 -275
  75. package/dist/voice/stt-factory.js +0 -42
  76. package/dist/voice/stt-factory.test.js +0 -45
  77. package/dist/voice/stt-openai.js +0 -156
  78. package/dist/voice/stt-openai.test.js +0 -281
  79. package/dist/voice/tts-cartesia.js +0 -169
  80. package/dist/voice/tts-cartesia.test.js +0 -228
  81. package/dist/voice/tts-deepgram.js +0 -84
  82. package/dist/voice/tts-deepgram.test.js +0 -220
  83. package/dist/voice/tts-factory.js +0 -52
  84. package/dist/voice/tts-factory.test.js +0 -53
  85. package/dist/voice/tts-openai.js +0 -70
  86. package/dist/voice/tts-openai.test.js +0 -138
  87. package/dist/voice/types.test.js +0 -84
@@ -0,0 +1,267 @@
1
+ /**
2
+ * GeminiLiveResponder — Discord audio output bridge for the Gemini Live voice path.
3
+ *
4
+ * Phase 1.2: receives audio/text events from GeminiLiveProvider, upsamples
5
+ * audio to Discord's 48 kHz stereo format, manages an AudioPlayer + PassThrough
6
+ * stream pipeline, and handles barge-in via Gemini's interrupted/turn_complete events.
7
+ */
8
+ import { PassThrough } from 'node:stream';
9
+ import { createAudioPlayer, createAudioResource, AudioPlayerStatus, StreamType, } from '@discordjs/voice';
10
+ import { upsampleToDiscord } from '../voice-responder.js';
11
+ // Gemini Live returns 24 kHz mono PCM s16le
12
+ const GEMINI_OUTPUT_RATE = 24_000;
13
+ const GEMINI_OUTPUT_CHANNELS = 1;
14
+ const TRANSCRIPT_FLUSH_DELAY_MS = 50;
15
+ // ---------------------------------------------------------------------------
16
+ // Responder
17
+ // ---------------------------------------------------------------------------
18
+ export class GeminiLiveResponder {
19
+ log;
20
+ connection;
21
+ provider;
22
+ onBotResponse;
23
+ onInputTranscript;
24
+ onToolCall;
25
+ onSessionTerminated;
26
+ onFallbackRecommended;
27
+ onTokenWarning;
28
+ playerFactory;
29
+ player = null;
30
+ stream = null;
31
+ transcript = '';
32
+ transcriptFlushTimer = null;
33
+ turnCompletePending = false;
34
+ started = false;
35
+ constructor(opts) {
36
+ this.log = opts.log;
37
+ this.connection = opts.connection;
38
+ this.provider = opts.provider;
39
+ this.onBotResponse = opts.onBotResponse;
40
+ this.onInputTranscript = opts.onInputTranscript;
41
+ this.onToolCall = opts.onToolCall;
42
+ this.onSessionTerminated = opts.onSessionTerminated;
43
+ this.onFallbackRecommended = opts.onFallbackRecommended;
44
+ this.onTokenWarning = opts.onTokenWarning;
45
+ this.playerFactory = opts.createPlayer ?? (() => createAudioPlayer());
46
+ }
47
+ /** Create the player, subscribe to the connection, and listen for provider events. */
48
+ start() {
49
+ if (this.started)
50
+ return;
51
+ this.started = true;
52
+ this.player = this.playerFactory();
53
+ const subscription = this.connection.subscribe(this.player);
54
+ this.log.info({ subscribed: !!subscription }, 'gemini-live-responder: player subscription result');
55
+ this.player.on('stateChange', (oldState, newState) => {
56
+ this.log.info({ from: oldState.status, to: newState.status }, 'gemini-live-responder: player state change');
57
+ });
58
+ this.player.on('error', (err) => {
59
+ this.log.error({ err }, 'gemini-live-responder: audio player error');
60
+ });
61
+ this.provider.onEvent((event) => this.handleEvent(event));
62
+ }
63
+ /** Whether the bot is audibly speaking (Playing or Buffering). */
64
+ get isPlaying() {
65
+ if (!this.player)
66
+ return false;
67
+ const status = this.player.state.status;
68
+ return status === AudioPlayerStatus.Playing || status === AudioPlayerStatus.Buffering;
69
+ }
70
+ /** Stop playback and tear down the stream. */
71
+ stop() {
72
+ this.destroyStream();
73
+ this.cancelTranscriptFlush();
74
+ this.player?.stop();
75
+ this.transcript = '';
76
+ this.turnCompletePending = false;
77
+ }
78
+ /** Stop and release all resources. */
79
+ destroy() {
80
+ this.stop();
81
+ this.started = false;
82
+ this.player = null;
83
+ }
84
+ // -----------------------------------------------------------------------
85
+ // Internal
86
+ // -----------------------------------------------------------------------
87
+ handleEvent(event) {
88
+ switch (event.type) {
89
+ case 'audio':
90
+ this.handleAudio(event.data);
91
+ break;
92
+ case 'text':
93
+ this.transcript += event.text;
94
+ if (this.turnCompletePending) {
95
+ this.scheduleTranscriptFlush();
96
+ }
97
+ break;
98
+ case 'input_transcript':
99
+ this.handleInputTranscript(event.text);
100
+ break;
101
+ case 'interrupted':
102
+ this.handleInterrupted();
103
+ break;
104
+ case 'turn_complete':
105
+ this.handleTurnComplete();
106
+ break;
107
+ case 'tool_call':
108
+ this.handleToolCall(event.functionCalls);
109
+ break;
110
+ case 'session_rotating':
111
+ this.handleSessionRotating();
112
+ break;
113
+ case 'reconnecting':
114
+ this.handleReconnecting(event.attempt, event.maxRetries, event.hasResumeHandle);
115
+ break;
116
+ case 'reconnected':
117
+ this.log.info({ attempt: event.attempt }, 'gemini-live-responder: session reconnected');
118
+ break;
119
+ case 'reconnect_failed':
120
+ this.handleReconnectFailed(event.attempts);
121
+ break;
122
+ case 'token_warning':
123
+ this.handleTokenWarning(event.estimatedTokens, event.threshold);
124
+ break;
125
+ case 'fallback_recommended':
126
+ this.handleFallbackRecommended(event.reason);
127
+ break;
128
+ default:
129
+ // setup_complete, error — not handled here
130
+ break;
131
+ }
132
+ }
133
+ handleAudio(data) {
134
+ // Lazily create the stream + resource on the first audio chunk of a turn
135
+ if (!this.stream) {
136
+ this.stream = new PassThrough();
137
+ const resource = createAudioResource(this.stream, {
138
+ inputType: StreamType.Raw,
139
+ });
140
+ this.player.play(resource);
141
+ this.log.info({}, 'gemini-live-responder: streaming playback started');
142
+ }
143
+ const upsampled = upsampleToDiscord(data, GEMINI_OUTPUT_RATE, GEMINI_OUTPUT_CHANNELS);
144
+ this.stream.write(upsampled);
145
+ }
146
+ handleInterrupted() {
147
+ this.log.info({}, 'gemini-live-responder: interrupted — stopping playback');
148
+ this.destroyStream();
149
+ this.cancelTranscriptFlush();
150
+ this.player?.stop();
151
+ this.transcript = '';
152
+ this.turnCompletePending = false;
153
+ }
154
+ handleInputTranscript(text) {
155
+ if (this.onInputTranscript) {
156
+ try {
157
+ this.onInputTranscript(text);
158
+ }
159
+ catch (err) {
160
+ this.log.warn({ err }, 'gemini-live-responder: onInputTranscript callback error');
161
+ }
162
+ }
163
+ }
164
+ handleToolCall(calls) {
165
+ this.log.info({ count: calls.length, names: calls.map((c) => c.name).join(',') }, 'gemini-live-responder: tool call received');
166
+ if (this.onToolCall) {
167
+ try {
168
+ this.onToolCall(calls);
169
+ }
170
+ catch (err) {
171
+ this.log.warn({ err }, 'gemini-live-responder: onToolCall callback error');
172
+ }
173
+ }
174
+ }
175
+ handleTurnComplete() {
176
+ this.log.info({}, 'gemini-live-responder: turn complete');
177
+ // End the stream gracefully so remaining buffered audio plays out
178
+ if (this.stream) {
179
+ this.stream.end();
180
+ this.stream = null;
181
+ }
182
+ // Fire the transcript callback
183
+ this.turnCompletePending = true;
184
+ this.scheduleTranscriptFlush();
185
+ }
186
+ handleSessionRotating() {
187
+ this.log.info({}, 'gemini-live-responder: planned session rotation — pausing playback');
188
+ this.destroyStream();
189
+ this.cancelTranscriptFlush();
190
+ this.player?.stop();
191
+ this.transcript = '';
192
+ this.turnCompletePending = false;
193
+ }
194
+ handleReconnecting(attempt, maxRetries, hasResumeHandle) {
195
+ this.log.info({ attempt, maxRetries, hasResumeHandle }, 'gemini-live-responder: session reconnecting — pausing playback');
196
+ this.destroyStream();
197
+ this.cancelTranscriptFlush();
198
+ this.player?.stop();
199
+ this.transcript = '';
200
+ this.turnCompletePending = false;
201
+ }
202
+ handleReconnectFailed(attempts) {
203
+ this.log.error({ attempts }, 'gemini-live-responder: session terminally failed — all reconnect retries exhausted');
204
+ this.stop();
205
+ if (this.onSessionTerminated) {
206
+ try {
207
+ this.onSessionTerminated();
208
+ }
209
+ catch (err) {
210
+ this.log.warn({ err }, 'gemini-live-responder: onSessionTerminated callback error');
211
+ }
212
+ }
213
+ }
214
+ handleTokenWarning(estimatedTokens, threshold) {
215
+ this.log.warn({ estimatedTokens, threshold }, 'gemini-live-responder: token usage approaching context window limit');
216
+ if (this.onTokenWarning) {
217
+ try {
218
+ this.onTokenWarning(estimatedTokens, threshold);
219
+ }
220
+ catch (err) {
221
+ this.log.warn({ err }, 'gemini-live-responder: onTokenWarning callback error');
222
+ }
223
+ }
224
+ }
225
+ handleFallbackRecommended(reason) {
226
+ this.log.warn({ reason }, 'gemini-live-responder: fallback to pipeline voice mode recommended');
227
+ if (this.onFallbackRecommended) {
228
+ try {
229
+ this.onFallbackRecommended(reason);
230
+ }
231
+ catch (err) {
232
+ this.log.warn({ err }, 'gemini-live-responder: onFallbackRecommended callback error');
233
+ }
234
+ }
235
+ }
236
+ destroyStream() {
237
+ if (this.stream) {
238
+ this.stream.destroy();
239
+ this.stream = null;
240
+ }
241
+ }
242
+ scheduleTranscriptFlush() {
243
+ this.cancelTranscriptFlush();
244
+ this.transcriptFlushTimer = setTimeout(() => {
245
+ this.transcriptFlushTimer = null;
246
+ if (!this.turnCompletePending)
247
+ return;
248
+ const text = this.transcript;
249
+ this.transcript = '';
250
+ this.turnCompletePending = false;
251
+ if (!text)
252
+ return;
253
+ try {
254
+ this.onBotResponse?.(text);
255
+ }
256
+ catch (err) {
257
+ this.log.warn({ err }, 'gemini-live-responder: onBotResponse callback error');
258
+ }
259
+ }, TRANSCRIPT_FLUSH_DELAY_MS);
260
+ }
261
+ cancelTranscriptFlush() {
262
+ if (this.transcriptFlushTimer) {
263
+ clearTimeout(this.transcriptFlushTimer);
264
+ this.transcriptFlushTimer = null;
265
+ }
266
+ }
267
+ }