discoclaw 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.context/voice.md +30 -2
  2. package/.env.example +6 -0
  3. package/dist/cli/dashboard.js +7 -1
  4. package/dist/config.js +7 -0
  5. package/dist/cron/executor.js +72 -1
  6. package/dist/dashboard/api/metrics.js +7 -0
  7. package/dist/dashboard/api/metrics.test.js +16 -0
  8. package/dist/dashboard/api/traces.js +14 -0
  9. package/dist/dashboard/api/traces.test.js +40 -0
  10. package/dist/dashboard/page.js +187 -8
  11. package/dist/dashboard/server.js +81 -14
  12. package/dist/dashboard/server.test.js +120 -4
  13. package/dist/discord/deferred-runner.js +306 -219
  14. package/dist/discord/message-coordinator.js +1 -28
  15. package/dist/discord/reaction-handler.js +81 -3
  16. package/dist/index.js +15 -1
  17. package/dist/observability/trace-store.js +56 -0
  18. package/dist/observability/trace-utils.js +31 -0
  19. package/dist/runtime/codex-cli.js +3 -2
  20. package/dist/runtime/codex-cli.test.js +33 -0
  21. package/dist/runtime/model-tiers.js +1 -1
  22. package/dist/runtime/model-tiers.test.js +9 -0
  23. package/dist/runtime/openai-tool-schemas.js +17 -0
  24. package/dist/voice/audio-pipeline.js +246 -6
  25. package/dist/voice/audio-pipeline.test.js +481 -0
  26. package/dist/voice/audio-receiver.js +8 -0
  27. package/dist/voice/audio-receiver.test.js +16 -0
  28. package/dist/voice/conversation-buffer.js +16 -6
  29. package/dist/voice/providers/gemini-live-provider.js +481 -0
  30. package/dist/voice/providers/gemini-live-provider.test.js +834 -0
  31. package/dist/voice/providers/gemini-live-responder.js +267 -0
  32. package/dist/voice/providers/gemini-live-responder.test.js +615 -0
  33. package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
  34. package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
  35. package/dist/voice/providers/gemini-live-types.js +32 -0
  36. package/dist/voice/providers/gemini-tool-mapper.js +91 -0
  37. package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
  38. package/dist/voice/providers/index.js +3 -0
  39. package/dist/voice/types.test.js +6 -0
  40. package/dist/voice/voice-prompt-builder.js +26 -17
  41. package/dist/voice/voice-prompt-builder.test.js +16 -1
  42. package/package.json +1 -1
@@ -12,6 +12,11 @@ import { createSttProvider } from './stt-factory.js';
12
12
  import { createTtsProvider } from './tts-factory.js';
13
13
  import { VoiceResponder } from './voice-responder.js';
14
14
  import { ConversationBuffer } from './conversation-buffer.js';
15
+ import { GeminiLiveProvider } from './providers/gemini-live-provider.js';
16
+ import { GeminiLiveResponder } from './providers/gemini-live-responder.js';
17
+ import { DEFAULT_GEMINI_LIVE_MODEL, normalizeGeminiLiveModel, supportsGeminiLiveAsyncFunctionCalling, } from './providers/gemini-live-types.js';
18
+ import { buildGeminiToolDeclarations, buildToolSchemas, OPENAI_TO_DISCO_NAME } from '../runtime/openai-tool-schemas.js';
19
+ import { executeToolCall } from '../runtime/openai-tool-exec.js';
15
20
  // ---------------------------------------------------------------------------
16
21
  // AudioPipelineManager
17
22
  // ---------------------------------------------------------------------------
@@ -31,6 +36,13 @@ export class AudioPipelineManager {
31
36
  transcriptMirror;
32
37
  botDisplayName;
33
38
  backfill;
39
+ buildGeminiSystemInstruction;
40
+ voiceProvider;
41
+ geminiApiKey;
42
+ enabledTools;
43
+ silentTools;
44
+ sessionRotationMs;
45
+ onFallbackTriggered;
34
46
  pipelines = new Map();
35
47
  /** Re-entrancy guard: VoiceConnection.subscribe() can synchronously fire stateChange→Ready. */
36
48
  starting = new Set();
@@ -50,6 +62,14 @@ export class AudioPipelineManager {
50
62
  this.transcriptMirror = opts.transcriptMirror;
51
63
  this.botDisplayName = opts.botDisplayName ?? 'Bot';
52
64
  this.backfill = opts.backfill;
65
+ this.buildGeminiSystemInstruction = opts.buildGeminiSystemInstruction;
66
+ this.voiceProvider = opts.voiceProvider ?? 'pipeline';
67
+ this.geminiApiKey = opts.geminiApiKey;
68
+ this.enabledTools = opts.enabledTools ?? [];
69
+ this.silentTools = new Set(opts.silentTools ?? []);
70
+ this.sessionRotationMs = opts.sessionRotationMs;
71
+ this.onFallbackTriggered = opts.onFallbackTriggered;
72
+ this.log.info({ voiceProvider: this.voiceProvider }, 'audio pipeline manager initialized');
53
73
  }
54
74
  /**
55
75
  * Attach to a VoiceConnection and auto-manage the audio pipeline
@@ -67,8 +87,8 @@ export class AudioPipelineManager {
67
87
  }
68
88
  });
69
89
  }
70
- /** Start the audio receive pipeline for a guild. */
71
- async startPipeline(guildId, connection) {
90
+ /** Start the audio receive pipeline for a guild. Pass `forceMode` to override the configured provider (used during fallback). */
91
+ async startPipeline(guildId, connection, forceMode) {
72
92
  // Re-entrancy guard: VoiceConnection.subscribe() (called when wiring the
73
93
  // AudioPlayer) synchronously fires a stateChange→Ready event, which would
74
94
  // re-invoke startPipeline and recurse infinitely.
@@ -80,7 +100,175 @@ export class AudioPipelineManager {
80
100
  this.log.info({ guildId }, 'stopping existing pipeline before restart');
81
101
  await this.stopPipeline(guildId);
82
102
  }
103
+ const effectiveMode = forceMode ?? this.voiceProvider;
83
104
  try {
105
+ // ----- gemini-live mode: skip STT/TTS, use GeminiLiveProvider directly -----
106
+ if (effectiveMode === 'gemini-live') {
107
+ const apiKey = this.geminiApiKey;
108
+ if (!apiKey)
109
+ throw new Error('geminiApiKey is required for gemini-live voice provider');
110
+ const buffer = new ConversationBuffer();
111
+ if (this.backfill) {
112
+ try {
113
+ const turns = await this.backfill();
114
+ buffer.backfill(turns);
115
+ this.log.info({ guildId, turns: turns.length }, 'gemini-live conversation buffer backfilled');
116
+ }
117
+ catch (err) {
118
+ this.log.warn({ guildId, err }, 'gemini-live conversation backfill failed — proceeding with empty history');
119
+ }
120
+ }
121
+ const geminiLiveModel = normalizeGeminiLiveModel(this.runtimeModel) ?? DEFAULT_GEMINI_LIVE_MODEL;
122
+ const supportsAsyncFunctionCalling = supportsGeminiLiveAsyncFunctionCalling(geminiLiveModel);
123
+ const tools = buildGeminiToolDeclarations(this.enabledTools, { nonBlocking: supportsAsyncFunctionCalling });
124
+ const systemInstruction = await this.buildGeminiSystemInstruction?.();
125
+ const initialHistory = toGeminiLiveHistoryTurns(buffer.toTurns());
126
+ const provider = new GeminiLiveProvider({
127
+ apiKey,
128
+ log: this.log,
129
+ model: geminiLiveModel,
130
+ systemInstruction,
131
+ responseModalities: ['AUDIO'],
132
+ tools,
133
+ initialHistoryInClientContent: initialHistory.length > 0,
134
+ sessionRotationMs: this.sessionRotationMs,
135
+ });
136
+ await provider.connect();
137
+ if (initialHistory.length > 0) {
138
+ provider.sendInitialHistory(initialHistory);
139
+ this.log.info({ guildId, turns: initialHistory.length }, 'gemini-live conversation history seeded');
140
+ }
141
+ if (!supportsAsyncFunctionCalling && this.silentTools.size > 0) {
142
+ this.log.info({ guildId, model: geminiLiveModel, count: this.silentTools.size }, 'gemini-live: current model does not support scheduled tool responses; silent tool scheduling disabled');
143
+ }
144
+ const mirror = this.transcriptMirror;
145
+ const botName = this.botDisplayName;
146
+ let latestInputTranscript;
147
+ const responder = new GeminiLiveResponder({
148
+ log: this.log,
149
+ connection,
150
+ provider,
151
+ onBotResponse: mirror
152
+ ? (text) => {
153
+ if (latestInputTranscript && text.trim()) {
154
+ buffer.push(latestInputTranscript, text);
155
+ latestInputTranscript = undefined;
156
+ }
157
+ mirror.postBotResponse(botName, text).catch((err) => {
158
+ this.log.warn({ guildId, err }, 'transcript-mirror: failed to post bot response');
159
+ });
160
+ }
161
+ : (text) => {
162
+ if (latestInputTranscript && text.trim()) {
163
+ buffer.push(latestInputTranscript, text);
164
+ latestInputTranscript = undefined;
165
+ }
166
+ },
167
+ onInputTranscript: mirror
168
+ ? (text) => {
169
+ if (text.trim())
170
+ latestInputTranscript = text.trim();
171
+ mirror.postUserTranscription('User', text).catch((err) => {
172
+ this.log.warn({ guildId, err }, 'transcript-mirror: failed to post user transcription');
173
+ });
174
+ }
175
+ : (text) => {
176
+ if (text.trim())
177
+ latestInputTranscript = text.trim();
178
+ },
179
+ onSessionTerminated: () => {
180
+ this.log.error({ guildId }, 'gemini-live session terminally failed — no fallback (fallback disabled)');
181
+ },
182
+ onFallbackRecommended: (reason) => {
183
+ this.log.warn({ guildId, reason }, 'gemini-live: fallback recommended but fallback is disabled');
184
+ },
185
+ onTokenWarning: (estimatedTokens, threshold) => {
186
+ this.log.warn({ guildId, estimatedTokens, threshold }, 'gemini-live: token usage approaching context window limit');
187
+ },
188
+ onToolCall: tools
189
+ ? (calls) => {
190
+ this.log.info({ guildId, count: calls.length, names: calls.map((c) => c.name).join(',') }, 'gemini-live: tool call received — dispatching');
191
+ const allowedRoots = this.runtimeCwd ? [this.runtimeCwd] : [];
192
+ const allowedToolNames = new Set(buildToolSchemas(this.enabledTools).map((t) => t.function.name));
193
+ const logFn = (msg) => this.log.info({ guildId }, msg);
194
+ const execOpts = { enableHybridPipeline: false, allowedToolNames };
195
+ // Gemini 3.1 Live only supports synchronous function calling.
196
+ // Gemini 2.5 Live can opt into NON_BLOCKING declarations and scheduled responses.
197
+ void (async () => {
198
+ const results = await Promise.all(calls.map(async (call) => {
199
+ const scheduling = supportsAsyncFunctionCalling
200
+ ? (this.isSilentTool(call.name) ? 'SILENT' : 'INTERRUPT')
201
+ : undefined;
202
+ try {
203
+ const res = await executeToolCall(call.name, call.args, allowedRoots, logFn, execOpts);
204
+ return { id: call.id, name: call.name, output: res.result, scheduling };
205
+ }
206
+ catch (err) {
207
+ const msg = err instanceof Error ? err.message : String(err);
208
+ return { id: call.id, name: call.name, output: `Error: ${msg}`, scheduling };
209
+ }
210
+ }));
211
+ const silentCount = supportsAsyncFunctionCalling
212
+ ? results.filter((r) => r.scheduling === 'SILENT').length
213
+ : 0;
214
+ if (silentCount > 0) {
215
+ this.log.info({ guildId, count: silentCount }, 'gemini-live: SILENT tool execution complete — results scheduled silently');
216
+ }
217
+ try {
218
+ provider.sendToolResponse(results);
219
+ }
220
+ catch (err) {
221
+ this.log.warn({ guildId, err }, 'gemini-live: sendToolResponse failed (provider likely disconnected)');
222
+ }
223
+ })();
224
+ }
225
+ : undefined,
226
+ });
227
+ responder.start();
228
+ // SttProvider shim: bridges AudioReceiver frames to GeminiLiveProvider.sendAudio
229
+ const sttShim = {
230
+ start: async () => { },
231
+ stop: async () => { },
232
+ onTranscription: () => { },
233
+ feedAudio: (frame) => {
234
+ try {
235
+ provider.sendAudio(frame.buffer);
236
+ }
237
+ catch (err) {
238
+ this.log.warn({ guildId, err }, 'gemini-live: sendAudio error (non-fatal)');
239
+ }
240
+ },
241
+ };
242
+ const receiver = new AudioReceiver({
243
+ connection,
244
+ allowedUserIds: this.allowedUserIds,
245
+ sttProvider: sttShim,
246
+ log: this.log,
247
+ createDecoder: this.createDecoder,
248
+ onUserSpeaking: () => { },
249
+ onUserSilence: () => {
250
+ try {
251
+ provider.sendAudioStreamEnd();
252
+ }
253
+ catch (err) {
254
+ this.log.warn({ guildId, err }, 'gemini-live: sendAudioStreamEnd error (non-fatal)');
255
+ }
256
+ },
257
+ });
258
+ receiver.start();
259
+ this.pipelines.set(guildId, {
260
+ connection,
261
+ sttProvider: sttShim,
262
+ receiver,
263
+ buffer,
264
+ geminiProvider: provider,
265
+ geminiResponder: responder,
266
+ mode: 'gemini-live',
267
+ });
268
+ this.log.info({ guildId }, 'audio pipeline started (gemini-live)');
269
+ return;
270
+ }
271
+ // ----- default pipeline mode: STT/TTS/VoiceResponder -----
84
272
  const sttProvider = this.createStt(this.voiceConfig, this.log);
85
273
  const mirror = this.transcriptMirror;
86
274
  // Create conversation buffer and backfill history if available
@@ -166,11 +354,15 @@ export class AudioPipelineManager {
166
354
  },
167
355
  });
168
356
  receiver.start();
169
- this.pipelines.set(guildId, { connection, sttProvider, receiver, responder, buffer });
170
- this.log.info({ guildId }, 'audio pipeline started');
357
+ this.pipelines.set(guildId, { connection, sttProvider, receiver, responder, buffer, mode: 'pipeline' });
358
+ this.log.info({ guildId, mode: effectiveMode }, 'audio pipeline started');
171
359
  }
172
360
  catch (err) {
173
361
  this.log.error({ guildId, err }, 'failed to start audio pipeline');
362
+ // Fallback disabled — gemini-live must succeed or the pipeline stays down
363
+ if (effectiveMode === 'gemini-live') {
364
+ this.log.error({ guildId }, 'gemini-live: connection failed — no fallback (fallback disabled)');
365
+ }
174
366
  }
175
367
  finally {
176
368
  this.starting.delete(guildId);
@@ -182,6 +374,10 @@ export class AudioPipelineManager {
182
374
  if (!pipeline)
183
375
  return;
184
376
  this.pipelines.delete(guildId);
377
+ pipeline.geminiResponder?.destroy();
378
+ if (pipeline.geminiProvider) {
379
+ await pipeline.geminiProvider.disconnect();
380
+ }
185
381
  pipeline.responder?.destroy();
186
382
  pipeline.receiver.stop();
187
383
  try {
@@ -205,16 +401,52 @@ export class AudioPipelineManager {
205
401
  get activePipelineCount() {
206
402
  return this.pipelines.size;
207
403
  }
404
+ /** Configured voice provider mode ('pipeline' or 'gemini-live'). */
405
+ get activeVoiceProvider() {
406
+ return this.voiceProvider;
407
+ }
408
+ /** Active mode for a specific guild (may differ from configured mode during fallback). */
409
+ pipelineMode(guildId) {
410
+ return this.pipelines.get(guildId)?.mode;
411
+ }
208
412
  /** Current Deepgram TTS voice model name. */
209
413
  get ttsVoice() {
210
414
  return this.voiceConfig.deepgramTtsVoice;
211
415
  }
416
+ /**
417
+ * Fall back from gemini-live to the standard pipeline for a guild.
418
+ * Stops the current gemini-live session and starts a standard STT/AI/TTS pipeline.
419
+ * No-op if no pipeline exists or the guild is already in standard mode.
420
+ */
421
+ async fallbackToPipeline(guildId, connection) {
422
+ const pipeline = this.pipelines.get(guildId);
423
+ if (!pipeline || pipeline.mode !== 'gemini-live')
424
+ return;
425
+ this.log.warn({ guildId }, 'gemini-live: initiating fallback to standard pipeline');
426
+ await this.stopPipeline(guildId);
427
+ await this.startPipeline(guildId, connection, 'pipeline');
428
+ if (this.hasPipeline(guildId)) {
429
+ this.log.info({ guildId }, 'gemini-live: fallback to standard pipeline succeeded');
430
+ this.onFallbackTriggered?.(guildId, 'pipeline');
431
+ }
432
+ else {
433
+ this.log.error({ guildId }, 'gemini-live: fallback to standard pipeline also failed — guild has no active pipeline');
434
+ }
435
+ }
436
+ isSilentTool(toolName) {
437
+ return this.silentTools.has(toolName) || this.silentTools.has(OPENAI_TO_DISCO_NAME[toolName] ?? toolName);
438
+ }
212
439
  /**
213
440
  * Update the Deepgram TTS voice and restart all active pipelines so the
214
- * new voice takes effect immediately.
215
- * @returns The number of pipelines that were restarted.
441
+ * new voice takes effect immediately. No-op in gemini-live mode (TTS is
442
+ * handled server-side).
443
+ * @returns The number of pipelines that were restarted (0 in gemini-live mode).
216
444
  */
217
445
  async setTtsVoice(voice) {
446
+ if (this.voiceProvider === 'gemini-live') {
447
+ this.log.info({ voice }, 'TTS voice change ignored — gemini-live mode uses server-side TTS');
448
+ return 0;
449
+ }
218
450
  this.voiceConfig = { ...this.voiceConfig, deepgramTtsVoice: voice };
219
451
  this.log.info({ voice }, 'TTS voice updated — restarting active pipelines');
220
452
  const entries = [...this.pipelines.entries()];
@@ -222,3 +454,11 @@ export class AudioPipelineManager {
222
454
  return entries.length;
223
455
  }
224
456
  }
457
+ function toGeminiLiveHistoryTurns(turns) {
458
+ const history = [];
459
+ for (const turn of turns) {
460
+ history.push({ role: 'user', parts: [{ text: turn.user }] });
461
+ history.push({ role: 'model', parts: [{ text: turn.assistant }] });
462
+ }
463
+ return history;
464
+ }