discoclaw 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.context/voice.md +30 -2
- package/.env.example +6 -0
- package/dist/cli/dashboard.js +7 -1
- package/dist/config.js +7 -0
- package/dist/cron/executor.js +72 -1
- package/dist/dashboard/api/metrics.js +7 -0
- package/dist/dashboard/api/metrics.test.js +16 -0
- package/dist/dashboard/api/traces.js +14 -0
- package/dist/dashboard/api/traces.test.js +40 -0
- package/dist/dashboard/page.js +187 -8
- package/dist/dashboard/server.js +81 -14
- package/dist/dashboard/server.test.js +120 -4
- package/dist/discord/deferred-runner.js +306 -219
- package/dist/discord/message-coordinator.js +1 -28
- package/dist/discord/reaction-handler.js +81 -3
- package/dist/index.js +15 -1
- package/dist/observability/trace-store.js +56 -0
- package/dist/observability/trace-utils.js +31 -0
- package/dist/runtime/codex-cli.js +3 -2
- package/dist/runtime/codex-cli.test.js +33 -0
- package/dist/runtime/model-tiers.js +1 -1
- package/dist/runtime/model-tiers.test.js +9 -0
- package/dist/runtime/openai-tool-schemas.js +17 -0
- package/dist/voice/audio-pipeline.js +246 -6
- package/dist/voice/audio-pipeline.test.js +481 -0
- package/dist/voice/audio-receiver.js +8 -0
- package/dist/voice/audio-receiver.test.js +16 -0
- package/dist/voice/conversation-buffer.js +16 -6
- package/dist/voice/providers/gemini-live-provider.js +481 -0
- package/dist/voice/providers/gemini-live-provider.test.js +834 -0
- package/dist/voice/providers/gemini-live-responder.js +267 -0
- package/dist/voice/providers/gemini-live-responder.test.js +615 -0
- package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
- package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
- package/dist/voice/providers/gemini-live-types.js +32 -0
- package/dist/voice/providers/gemini-tool-mapper.js +91 -0
- package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
- package/dist/voice/providers/index.js +3 -0
- package/dist/voice/types.test.js +6 -0
- package/dist/voice/voice-prompt-builder.js +26 -17
- package/dist/voice/voice-prompt-builder.test.js +16 -1
- package/package.json +1 -1
|
@@ -12,6 +12,11 @@ import { createSttProvider } from './stt-factory.js';
|
|
|
12
12
|
import { createTtsProvider } from './tts-factory.js';
|
|
13
13
|
import { VoiceResponder } from './voice-responder.js';
|
|
14
14
|
import { ConversationBuffer } from './conversation-buffer.js';
|
|
15
|
+
import { GeminiLiveProvider } from './providers/gemini-live-provider.js';
|
|
16
|
+
import { GeminiLiveResponder } from './providers/gemini-live-responder.js';
|
|
17
|
+
import { DEFAULT_GEMINI_LIVE_MODEL, normalizeGeminiLiveModel, supportsGeminiLiveAsyncFunctionCalling, } from './providers/gemini-live-types.js';
|
|
18
|
+
import { buildGeminiToolDeclarations, buildToolSchemas, OPENAI_TO_DISCO_NAME } from '../runtime/openai-tool-schemas.js';
|
|
19
|
+
import { executeToolCall } from '../runtime/openai-tool-exec.js';
|
|
15
20
|
// ---------------------------------------------------------------------------
|
|
16
21
|
// AudioPipelineManager
|
|
17
22
|
// ---------------------------------------------------------------------------
|
|
@@ -31,6 +36,13 @@ export class AudioPipelineManager {
|
|
|
31
36
|
transcriptMirror;
|
|
32
37
|
botDisplayName;
|
|
33
38
|
backfill;
|
|
39
|
+
buildGeminiSystemInstruction;
|
|
40
|
+
voiceProvider;
|
|
41
|
+
geminiApiKey;
|
|
42
|
+
enabledTools;
|
|
43
|
+
silentTools;
|
|
44
|
+
sessionRotationMs;
|
|
45
|
+
onFallbackTriggered;
|
|
34
46
|
pipelines = new Map();
|
|
35
47
|
/** Re-entrancy guard: VoiceConnection.subscribe() can synchronously fire stateChange→Ready. */
|
|
36
48
|
starting = new Set();
|
|
@@ -50,6 +62,14 @@ export class AudioPipelineManager {
|
|
|
50
62
|
this.transcriptMirror = opts.transcriptMirror;
|
|
51
63
|
this.botDisplayName = opts.botDisplayName ?? 'Bot';
|
|
52
64
|
this.backfill = opts.backfill;
|
|
65
|
+
this.buildGeminiSystemInstruction = opts.buildGeminiSystemInstruction;
|
|
66
|
+
this.voiceProvider = opts.voiceProvider ?? 'pipeline';
|
|
67
|
+
this.geminiApiKey = opts.geminiApiKey;
|
|
68
|
+
this.enabledTools = opts.enabledTools ?? [];
|
|
69
|
+
this.silentTools = new Set(opts.silentTools ?? []);
|
|
70
|
+
this.sessionRotationMs = opts.sessionRotationMs;
|
|
71
|
+
this.onFallbackTriggered = opts.onFallbackTriggered;
|
|
72
|
+
this.log.info({ voiceProvider: this.voiceProvider }, 'audio pipeline manager initialized');
|
|
53
73
|
}
|
|
54
74
|
/**
|
|
55
75
|
* Attach to a VoiceConnection and auto-manage the audio pipeline
|
|
@@ -67,8 +87,8 @@ export class AudioPipelineManager {
|
|
|
67
87
|
}
|
|
68
88
|
});
|
|
69
89
|
}
|
|
70
|
-
/** Start the audio receive pipeline for a guild. */
|
|
71
|
-
async startPipeline(guildId, connection) {
|
|
90
|
+
/** Start the audio receive pipeline for a guild. Pass `forceMode` to override the configured provider (used during fallback). */
|
|
91
|
+
async startPipeline(guildId, connection, forceMode) {
|
|
72
92
|
// Re-entrancy guard: VoiceConnection.subscribe() (called when wiring the
|
|
73
93
|
// AudioPlayer) synchronously fires a stateChange→Ready event, which would
|
|
74
94
|
// re-invoke startPipeline and recurse infinitely.
|
|
@@ -80,7 +100,175 @@ export class AudioPipelineManager {
|
|
|
80
100
|
this.log.info({ guildId }, 'stopping existing pipeline before restart');
|
|
81
101
|
await this.stopPipeline(guildId);
|
|
82
102
|
}
|
|
103
|
+
const effectiveMode = forceMode ?? this.voiceProvider;
|
|
83
104
|
try {
|
|
105
|
+
// ----- gemini-live mode: skip STT/TTS, use GeminiLiveProvider directly -----
|
|
106
|
+
if (effectiveMode === 'gemini-live') {
|
|
107
|
+
const apiKey = this.geminiApiKey;
|
|
108
|
+
if (!apiKey)
|
|
109
|
+
throw new Error('geminiApiKey is required for gemini-live voice provider');
|
|
110
|
+
const buffer = new ConversationBuffer();
|
|
111
|
+
if (this.backfill) {
|
|
112
|
+
try {
|
|
113
|
+
const turns = await this.backfill();
|
|
114
|
+
buffer.backfill(turns);
|
|
115
|
+
this.log.info({ guildId, turns: turns.length }, 'gemini-live conversation buffer backfilled');
|
|
116
|
+
}
|
|
117
|
+
catch (err) {
|
|
118
|
+
this.log.warn({ guildId, err }, 'gemini-live conversation backfill failed — proceeding with empty history');
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
const geminiLiveModel = normalizeGeminiLiveModel(this.runtimeModel) ?? DEFAULT_GEMINI_LIVE_MODEL;
|
|
122
|
+
const supportsAsyncFunctionCalling = supportsGeminiLiveAsyncFunctionCalling(geminiLiveModel);
|
|
123
|
+
const tools = buildGeminiToolDeclarations(this.enabledTools, { nonBlocking: supportsAsyncFunctionCalling });
|
|
124
|
+
const systemInstruction = await this.buildGeminiSystemInstruction?.();
|
|
125
|
+
const initialHistory = toGeminiLiveHistoryTurns(buffer.toTurns());
|
|
126
|
+
const provider = new GeminiLiveProvider({
|
|
127
|
+
apiKey,
|
|
128
|
+
log: this.log,
|
|
129
|
+
model: geminiLiveModel,
|
|
130
|
+
systemInstruction,
|
|
131
|
+
responseModalities: ['AUDIO'],
|
|
132
|
+
tools,
|
|
133
|
+
initialHistoryInClientContent: initialHistory.length > 0,
|
|
134
|
+
sessionRotationMs: this.sessionRotationMs,
|
|
135
|
+
});
|
|
136
|
+
await provider.connect();
|
|
137
|
+
if (initialHistory.length > 0) {
|
|
138
|
+
provider.sendInitialHistory(initialHistory);
|
|
139
|
+
this.log.info({ guildId, turns: initialHistory.length }, 'gemini-live conversation history seeded');
|
|
140
|
+
}
|
|
141
|
+
if (!supportsAsyncFunctionCalling && this.silentTools.size > 0) {
|
|
142
|
+
this.log.info({ guildId, model: geminiLiveModel, count: this.silentTools.size }, 'gemini-live: current model does not support scheduled tool responses; silent tool scheduling disabled');
|
|
143
|
+
}
|
|
144
|
+
const mirror = this.transcriptMirror;
|
|
145
|
+
const botName = this.botDisplayName;
|
|
146
|
+
let latestInputTranscript;
|
|
147
|
+
const responder = new GeminiLiveResponder({
|
|
148
|
+
log: this.log,
|
|
149
|
+
connection,
|
|
150
|
+
provider,
|
|
151
|
+
onBotResponse: mirror
|
|
152
|
+
? (text) => {
|
|
153
|
+
if (latestInputTranscript && text.trim()) {
|
|
154
|
+
buffer.push(latestInputTranscript, text);
|
|
155
|
+
latestInputTranscript = undefined;
|
|
156
|
+
}
|
|
157
|
+
mirror.postBotResponse(botName, text).catch((err) => {
|
|
158
|
+
this.log.warn({ guildId, err }, 'transcript-mirror: failed to post bot response');
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
: (text) => {
|
|
162
|
+
if (latestInputTranscript && text.trim()) {
|
|
163
|
+
buffer.push(latestInputTranscript, text);
|
|
164
|
+
latestInputTranscript = undefined;
|
|
165
|
+
}
|
|
166
|
+
},
|
|
167
|
+
onInputTranscript: mirror
|
|
168
|
+
? (text) => {
|
|
169
|
+
if (text.trim())
|
|
170
|
+
latestInputTranscript = text.trim();
|
|
171
|
+
mirror.postUserTranscription('User', text).catch((err) => {
|
|
172
|
+
this.log.warn({ guildId, err }, 'transcript-mirror: failed to post user transcription');
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
: (text) => {
|
|
176
|
+
if (text.trim())
|
|
177
|
+
latestInputTranscript = text.trim();
|
|
178
|
+
},
|
|
179
|
+
onSessionTerminated: () => {
|
|
180
|
+
this.log.error({ guildId }, 'gemini-live session terminally failed — no fallback (fallback disabled)');
|
|
181
|
+
},
|
|
182
|
+
onFallbackRecommended: (reason) => {
|
|
183
|
+
this.log.warn({ guildId, reason }, 'gemini-live: fallback recommended but fallback is disabled');
|
|
184
|
+
},
|
|
185
|
+
onTokenWarning: (estimatedTokens, threshold) => {
|
|
186
|
+
this.log.warn({ guildId, estimatedTokens, threshold }, 'gemini-live: token usage approaching context window limit');
|
|
187
|
+
},
|
|
188
|
+
onToolCall: tools
|
|
189
|
+
? (calls) => {
|
|
190
|
+
this.log.info({ guildId, count: calls.length, names: calls.map((c) => c.name).join(',') }, 'gemini-live: tool call received — dispatching');
|
|
191
|
+
const allowedRoots = this.runtimeCwd ? [this.runtimeCwd] : [];
|
|
192
|
+
const allowedToolNames = new Set(buildToolSchemas(this.enabledTools).map((t) => t.function.name));
|
|
193
|
+
const logFn = (msg) => this.log.info({ guildId }, msg);
|
|
194
|
+
const execOpts = { enableHybridPipeline: false, allowedToolNames };
|
|
195
|
+
// Gemini 3.1 Live only supports synchronous function calling.
|
|
196
|
+
// Gemini 2.5 Live can opt into NON_BLOCKING declarations and scheduled responses.
|
|
197
|
+
void (async () => {
|
|
198
|
+
const results = await Promise.all(calls.map(async (call) => {
|
|
199
|
+
const scheduling = supportsAsyncFunctionCalling
|
|
200
|
+
? (this.isSilentTool(call.name) ? 'SILENT' : 'INTERRUPT')
|
|
201
|
+
: undefined;
|
|
202
|
+
try {
|
|
203
|
+
const res = await executeToolCall(call.name, call.args, allowedRoots, logFn, execOpts);
|
|
204
|
+
return { id: call.id, name: call.name, output: res.result, scheduling };
|
|
205
|
+
}
|
|
206
|
+
catch (err) {
|
|
207
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
208
|
+
return { id: call.id, name: call.name, output: `Error: ${msg}`, scheduling };
|
|
209
|
+
}
|
|
210
|
+
}));
|
|
211
|
+
const silentCount = supportsAsyncFunctionCalling
|
|
212
|
+
? results.filter((r) => r.scheduling === 'SILENT').length
|
|
213
|
+
: 0;
|
|
214
|
+
if (silentCount > 0) {
|
|
215
|
+
this.log.info({ guildId, count: silentCount }, 'gemini-live: SILENT tool execution complete — results scheduled silently');
|
|
216
|
+
}
|
|
217
|
+
try {
|
|
218
|
+
provider.sendToolResponse(results);
|
|
219
|
+
}
|
|
220
|
+
catch (err) {
|
|
221
|
+
this.log.warn({ guildId, err }, 'gemini-live: sendToolResponse failed (provider likely disconnected)');
|
|
222
|
+
}
|
|
223
|
+
})();
|
|
224
|
+
}
|
|
225
|
+
: undefined,
|
|
226
|
+
});
|
|
227
|
+
responder.start();
|
|
228
|
+
// SttProvider shim: bridges AudioReceiver frames to GeminiLiveProvider.sendAudio
|
|
229
|
+
const sttShim = {
|
|
230
|
+
start: async () => { },
|
|
231
|
+
stop: async () => { },
|
|
232
|
+
onTranscription: () => { },
|
|
233
|
+
feedAudio: (frame) => {
|
|
234
|
+
try {
|
|
235
|
+
provider.sendAudio(frame.buffer);
|
|
236
|
+
}
|
|
237
|
+
catch (err) {
|
|
238
|
+
this.log.warn({ guildId, err }, 'gemini-live: sendAudio error (non-fatal)');
|
|
239
|
+
}
|
|
240
|
+
},
|
|
241
|
+
};
|
|
242
|
+
const receiver = new AudioReceiver({
|
|
243
|
+
connection,
|
|
244
|
+
allowedUserIds: this.allowedUserIds,
|
|
245
|
+
sttProvider: sttShim,
|
|
246
|
+
log: this.log,
|
|
247
|
+
createDecoder: this.createDecoder,
|
|
248
|
+
onUserSpeaking: () => { },
|
|
249
|
+
onUserSilence: () => {
|
|
250
|
+
try {
|
|
251
|
+
provider.sendAudioStreamEnd();
|
|
252
|
+
}
|
|
253
|
+
catch (err) {
|
|
254
|
+
this.log.warn({ guildId, err }, 'gemini-live: sendAudioStreamEnd error (non-fatal)');
|
|
255
|
+
}
|
|
256
|
+
},
|
|
257
|
+
});
|
|
258
|
+
receiver.start();
|
|
259
|
+
this.pipelines.set(guildId, {
|
|
260
|
+
connection,
|
|
261
|
+
sttProvider: sttShim,
|
|
262
|
+
receiver,
|
|
263
|
+
buffer,
|
|
264
|
+
geminiProvider: provider,
|
|
265
|
+
geminiResponder: responder,
|
|
266
|
+
mode: 'gemini-live',
|
|
267
|
+
});
|
|
268
|
+
this.log.info({ guildId }, 'audio pipeline started (gemini-live)');
|
|
269
|
+
return;
|
|
270
|
+
}
|
|
271
|
+
// ----- default pipeline mode: STT/TTS/VoiceResponder -----
|
|
84
272
|
const sttProvider = this.createStt(this.voiceConfig, this.log);
|
|
85
273
|
const mirror = this.transcriptMirror;
|
|
86
274
|
// Create conversation buffer and backfill history if available
|
|
@@ -166,11 +354,15 @@ export class AudioPipelineManager {
|
|
|
166
354
|
},
|
|
167
355
|
});
|
|
168
356
|
receiver.start();
|
|
169
|
-
this.pipelines.set(guildId, { connection, sttProvider, receiver, responder, buffer });
|
|
170
|
-
this.log.info({ guildId }, 'audio pipeline started');
|
|
357
|
+
this.pipelines.set(guildId, { connection, sttProvider, receiver, responder, buffer, mode: 'pipeline' });
|
|
358
|
+
this.log.info({ guildId, mode: effectiveMode }, 'audio pipeline started');
|
|
171
359
|
}
|
|
172
360
|
catch (err) {
|
|
173
361
|
this.log.error({ guildId, err }, 'failed to start audio pipeline');
|
|
362
|
+
// Fallback disabled — gemini-live must succeed or the pipeline stays down
|
|
363
|
+
if (effectiveMode === 'gemini-live') {
|
|
364
|
+
this.log.error({ guildId }, 'gemini-live: connection failed — no fallback (fallback disabled)');
|
|
365
|
+
}
|
|
174
366
|
}
|
|
175
367
|
finally {
|
|
176
368
|
this.starting.delete(guildId);
|
|
@@ -182,6 +374,10 @@ export class AudioPipelineManager {
|
|
|
182
374
|
if (!pipeline)
|
|
183
375
|
return;
|
|
184
376
|
this.pipelines.delete(guildId);
|
|
377
|
+
pipeline.geminiResponder?.destroy();
|
|
378
|
+
if (pipeline.geminiProvider) {
|
|
379
|
+
await pipeline.geminiProvider.disconnect();
|
|
380
|
+
}
|
|
185
381
|
pipeline.responder?.destroy();
|
|
186
382
|
pipeline.receiver.stop();
|
|
187
383
|
try {
|
|
@@ -205,16 +401,52 @@ export class AudioPipelineManager {
|
|
|
205
401
|
get activePipelineCount() {
|
|
206
402
|
return this.pipelines.size;
|
|
207
403
|
}
|
|
404
|
+
/** Configured voice provider mode ('pipeline' or 'gemini-live'). */
|
|
405
|
+
get activeVoiceProvider() {
|
|
406
|
+
return this.voiceProvider;
|
|
407
|
+
}
|
|
408
|
+
/** Active mode for a specific guild (may differ from configured mode during fallback). */
|
|
409
|
+
pipelineMode(guildId) {
|
|
410
|
+
return this.pipelines.get(guildId)?.mode;
|
|
411
|
+
}
|
|
208
412
|
/** Current Deepgram TTS voice model name. */
|
|
209
413
|
get ttsVoice() {
|
|
210
414
|
return this.voiceConfig.deepgramTtsVoice;
|
|
211
415
|
}
|
|
416
|
+
/**
|
|
417
|
+
* Fall back from gemini-live to the standard pipeline for a guild.
|
|
418
|
+
* Stops the current gemini-live session and starts a standard STT/AI/TTS pipeline.
|
|
419
|
+
* No-op if no pipeline exists or the guild is already in standard mode.
|
|
420
|
+
*/
|
|
421
|
+
async fallbackToPipeline(guildId, connection) {
|
|
422
|
+
const pipeline = this.pipelines.get(guildId);
|
|
423
|
+
if (!pipeline || pipeline.mode !== 'gemini-live')
|
|
424
|
+
return;
|
|
425
|
+
this.log.warn({ guildId }, 'gemini-live: initiating fallback to standard pipeline');
|
|
426
|
+
await this.stopPipeline(guildId);
|
|
427
|
+
await this.startPipeline(guildId, connection, 'pipeline');
|
|
428
|
+
if (this.hasPipeline(guildId)) {
|
|
429
|
+
this.log.info({ guildId }, 'gemini-live: fallback to standard pipeline succeeded');
|
|
430
|
+
this.onFallbackTriggered?.(guildId, 'pipeline');
|
|
431
|
+
}
|
|
432
|
+
else {
|
|
433
|
+
this.log.error({ guildId }, 'gemini-live: fallback to standard pipeline also failed — guild has no active pipeline');
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
isSilentTool(toolName) {
|
|
437
|
+
return this.silentTools.has(toolName) || this.silentTools.has(OPENAI_TO_DISCO_NAME[toolName] ?? toolName);
|
|
438
|
+
}
|
|
212
439
|
/**
|
|
213
440
|
* Update the Deepgram TTS voice and restart all active pipelines so the
|
|
214
|
-
* new voice takes effect immediately.
|
|
215
|
-
*
|
|
441
|
+
* new voice takes effect immediately. No-op in gemini-live mode (TTS is
|
|
442
|
+
* handled server-side).
|
|
443
|
+
* @returns The number of pipelines that were restarted (0 in gemini-live mode).
|
|
216
444
|
*/
|
|
217
445
|
async setTtsVoice(voice) {
|
|
446
|
+
if (this.voiceProvider === 'gemini-live') {
|
|
447
|
+
this.log.info({ voice }, 'TTS voice change ignored — gemini-live mode uses server-side TTS');
|
|
448
|
+
return 0;
|
|
449
|
+
}
|
|
218
450
|
this.voiceConfig = { ...this.voiceConfig, deepgramTtsVoice: voice };
|
|
219
451
|
this.log.info({ voice }, 'TTS voice updated — restarting active pipelines');
|
|
220
452
|
const entries = [...this.pipelines.entries()];
|
|
@@ -222,3 +454,11 @@ export class AudioPipelineManager {
|
|
|
222
454
|
return entries.length;
|
|
223
455
|
}
|
|
224
456
|
}
|
|
457
|
+
function toGeminiLiveHistoryTurns(turns) {
|
|
458
|
+
const history = [];
|
|
459
|
+
for (const turn of turns) {
|
|
460
|
+
history.push({ role: 'user', parts: [{ text: turn.user }] });
|
|
461
|
+
history.push({ role: 'model', parts: [{ text: turn.assistant }] });
|
|
462
|
+
}
|
|
463
|
+
return history;
|
|
464
|
+
}
|