discoclaw 1.2.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.context/voice.md +30 -2
- package/.env.example +7 -3
- package/.env.example.full +13 -32
- package/README.md +1 -1
- package/dist/cli/dashboard.js +7 -1
- package/dist/cli/dashboard.test.js +0 -4
- package/dist/cli/init-wizard.js +4 -8
- package/dist/cli/init-wizard.test.js +4 -10
- package/dist/config.js +5 -38
- package/dist/config.test.js +8 -72
- package/dist/cron/executor.js +72 -1
- package/dist/dashboard/api/metrics.js +7 -0
- package/dist/dashboard/api/metrics.test.js +16 -0
- package/dist/dashboard/api/traces.js +14 -0
- package/dist/dashboard/api/traces.test.js +40 -0
- package/dist/dashboard/page.js +187 -8
- package/dist/dashboard/server.js +82 -19
- package/dist/dashboard/server.test.js +123 -10
- package/dist/discord/actions.js +112 -6
- package/dist/discord/actions.test.js +117 -1
- package/dist/discord/deferred-runner.js +306 -219
- package/dist/discord/help-command.js +1 -1
- package/dist/discord/message-coordinator.js +4 -36
- package/dist/discord/models-command.js +1 -1
- package/dist/discord/reaction-handler.js +83 -5
- package/dist/discord/reaction-handler.test.js +55 -0
- package/dist/discord/verify-push.js +31 -36
- package/dist/discord/verify-push.test.js +34 -6
- package/dist/discord/voice-command.js +1 -31
- package/dist/discord/voice-command.test.js +21 -259
- package/dist/discord/voice-status-command.js +3 -22
- package/dist/discord/voice-status-command.test.js +16 -124
- package/dist/discord-followup.test.js +133 -0
- package/dist/health/config-doctor.js +5 -27
- package/dist/health/config-doctor.test.js +1 -4
- package/dist/index.js +15 -28
- package/dist/observability/trace-store.js +56 -0
- package/dist/observability/trace-utils.js +31 -0
- package/dist/runtime/codex-cli.js +3 -2
- package/dist/runtime/codex-cli.test.js +33 -0
- package/dist/runtime/model-tiers.js +1 -1
- package/dist/runtime/model-tiers.test.js +9 -0
- package/dist/runtime/openai-tool-schemas.js +17 -0
- package/dist/runtime-overrides.js +2 -3
- package/dist/runtime-overrides.test.js +27 -193
- package/dist/tasks/store.js +10 -6
- package/dist/tasks/store.test.js +44 -0
- package/dist/tasks/task-action-executor.test.js +162 -50
- package/dist/tasks/task-action-mutations.js +22 -2
- package/dist/tasks/task-action-read-ops.js +7 -1
- package/dist/tasks/task-action-runner-types.js +19 -1
- package/dist/voice/audio-pipeline.js +183 -96
- package/dist/voice/audio-receiver.js +8 -0
- package/dist/voice/audio-receiver.test.js +16 -0
- package/dist/voice/conversation-buffer.js +16 -6
- package/dist/voice/providers/gemini-live-provider.js +481 -0
- package/dist/voice/providers/gemini-live-provider.test.js +834 -0
- package/dist/voice/providers/gemini-live-responder.js +267 -0
- package/dist/voice/providers/gemini-live-responder.test.js +615 -0
- package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
- package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
- package/dist/voice/providers/gemini-live-types.js +32 -0
- package/dist/voice/providers/gemini-tool-mapper.js +91 -0
- package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
- package/dist/voice/providers/index.js +3 -0
- package/dist/voice/voice-prompt-builder.js +26 -17
- package/dist/voice/voice-prompt-builder.test.js +16 -1
- package/docs/configuration.md +4 -9
- package/docs/official-docs.md +6 -9
- package/docs/runtime-switching.md +1 -1
- package/package.json +1 -1
- package/dist/voice/audio-pipeline.test.js +0 -619
- package/dist/voice/stt-deepgram.js +0 -154
- package/dist/voice/stt-deepgram.test.js +0 -275
- package/dist/voice/stt-factory.js +0 -42
- package/dist/voice/stt-factory.test.js +0 -45
- package/dist/voice/stt-openai.js +0 -156
- package/dist/voice/stt-openai.test.js +0 -281
- package/dist/voice/tts-cartesia.js +0 -169
- package/dist/voice/tts-cartesia.test.js +0 -228
- package/dist/voice/tts-deepgram.js +0 -84
- package/dist/voice/tts-deepgram.test.js +0 -220
- package/dist/voice/tts-factory.js +0 -52
- package/dist/voice/tts-factory.test.js +0 -53
- package/dist/voice/tts-openai.js +0 -70
- package/dist/voice/tts-openai.test.js +0 -138
- package/dist/voice/types.test.js +0 -84
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { runTaskSync } from './task-sync.js';
|
|
2
2
|
import { reloadTagMapInPlace } from './tag-map.js';
|
|
3
|
+
import { getTaskActionThreadMetadata } from './task-action-runner-types.js';
|
|
3
4
|
function resolveTaskId(action) {
|
|
4
5
|
return (action.taskId ?? '').trim();
|
|
5
6
|
}
|
|
@@ -18,11 +19,16 @@ export async function handleTaskShow(action, _ctx, taskCtx) {
|
|
|
18
19
|
];
|
|
19
20
|
if (task.owner)
|
|
20
21
|
lines.push(`Owner: ${task.owner}`);
|
|
22
|
+
const thread = getTaskActionThreadMetadata(task);
|
|
23
|
+
if (thread)
|
|
24
|
+
lines.push(`External ref: ${thread.externalRef}`);
|
|
25
|
+
if (thread?.threadUrl)
|
|
26
|
+
lines.push(`Thread: ${thread.threadUrl}`);
|
|
21
27
|
if (task.labels?.length)
|
|
22
28
|
lines.push(`Labels: ${task.labels.join(', ')}`);
|
|
23
29
|
if (task.description)
|
|
24
30
|
lines.push(`\n${task.description.slice(0, 500)}`);
|
|
25
|
-
return { ok: true, summary: lines.join('\n') };
|
|
31
|
+
return { ok: true, summary: lines.join('\n'), ...(thread ? { thread } : {}) };
|
|
26
32
|
}
|
|
27
33
|
export async function handleTaskList(action, _ctx, taskCtx) {
|
|
28
34
|
const tasks = taskCtx.store.list({
|
|
@@ -1 +1,19 @@
|
|
|
1
|
-
|
|
1
|
+
import { getThreadIdFromTask } from './thread-helpers.js';
|
|
2
|
+
export function buildDiscordThreadUrl(guildId, threadId) {
|
|
3
|
+
return `https://discord.com/channels/${guildId}/${threadId}`;
|
|
4
|
+
}
|
|
5
|
+
export function getTaskActionThreadMetadata(task) {
|
|
6
|
+
const externalRef = task.external_ref?.trim() ?? '';
|
|
7
|
+
if (!externalRef)
|
|
8
|
+
return undefined;
|
|
9
|
+
const threadId = getThreadIdFromTask(task) ?? undefined;
|
|
10
|
+
const threadGuildId = task.thread_origin_guild?.trim() || undefined;
|
|
11
|
+
return {
|
|
12
|
+
externalRef,
|
|
13
|
+
...(threadId ? { threadId } : {}),
|
|
14
|
+
...(threadGuildId ? { threadGuildId } : {}),
|
|
15
|
+
...(threadId && threadGuildId
|
|
16
|
+
? { threadUrl: buildDiscordThreadUrl(threadGuildId, threadId) }
|
|
17
|
+
: {}),
|
|
18
|
+
};
|
|
19
|
+
}
|
|
@@ -8,48 +8,55 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import { VoiceConnectionStatus } from '@discordjs/voice';
|
|
10
10
|
import { AudioReceiver } from './audio-receiver.js';
|
|
11
|
-
import { createSttProvider } from './stt-factory.js';
|
|
12
|
-
import { createTtsProvider } from './tts-factory.js';
|
|
13
|
-
import { VoiceResponder } from './voice-responder.js';
|
|
14
11
|
import { ConversationBuffer } from './conversation-buffer.js';
|
|
12
|
+
import { GeminiLiveProvider } from './providers/gemini-live-provider.js';
|
|
13
|
+
import { GeminiLiveResponder } from './providers/gemini-live-responder.js';
|
|
14
|
+
import { DEFAULT_GEMINI_LIVE_MODEL, normalizeGeminiLiveModel, supportsGeminiLiveAsyncFunctionCalling, } from './providers/gemini-live-types.js';
|
|
15
|
+
import { buildGeminiToolDeclarations, buildToolSchemas, OPENAI_TO_DISCO_NAME } from '../runtime/openai-tool-schemas.js';
|
|
16
|
+
import { executeToolCall } from '../runtime/openai-tool-exec.js';
|
|
15
17
|
// ---------------------------------------------------------------------------
|
|
16
18
|
// AudioPipelineManager
|
|
17
19
|
// ---------------------------------------------------------------------------
|
|
18
20
|
export class AudioPipelineManager {
|
|
19
21
|
log;
|
|
20
|
-
voiceConfig;
|
|
21
22
|
allowedUserIds;
|
|
22
23
|
createDecoder;
|
|
23
24
|
onTranscription;
|
|
24
|
-
createStt;
|
|
25
25
|
invokeAi;
|
|
26
26
|
runtime;
|
|
27
27
|
runtimeModel;
|
|
28
28
|
runtimeCwd;
|
|
29
29
|
runtimeTimeoutMs;
|
|
30
|
-
createTts;
|
|
31
30
|
transcriptMirror;
|
|
32
31
|
botDisplayName;
|
|
33
32
|
backfill;
|
|
33
|
+
buildGeminiSystemInstruction;
|
|
34
|
+
geminiApiKey;
|
|
35
|
+
enabledTools;
|
|
36
|
+
silentTools;
|
|
37
|
+
sessionRotationMs;
|
|
34
38
|
pipelines = new Map();
|
|
35
39
|
/** Re-entrancy guard: VoiceConnection.subscribe() can synchronously fire stateChange→Ready. */
|
|
36
40
|
starting = new Set();
|
|
37
41
|
constructor(opts) {
|
|
38
42
|
this.log = opts.log;
|
|
39
|
-
this.voiceConfig = opts.voiceConfig;
|
|
40
43
|
this.allowedUserIds = opts.allowedUserIds;
|
|
41
44
|
this.createDecoder = opts.createDecoder;
|
|
42
45
|
this.onTranscription = opts.onTranscription;
|
|
43
|
-
this.createStt = opts.createStt ?? createSttProvider;
|
|
44
46
|
this.invokeAi = opts.invokeAi;
|
|
45
47
|
this.runtime = opts.runtime;
|
|
46
48
|
this.runtimeModel = opts.runtimeModel;
|
|
47
49
|
this.runtimeCwd = opts.runtimeCwd;
|
|
48
50
|
this.runtimeTimeoutMs = opts.runtimeTimeoutMs;
|
|
49
|
-
this.createTts = opts.createTts ?? createTtsProvider;
|
|
50
51
|
this.transcriptMirror = opts.transcriptMirror;
|
|
51
52
|
this.botDisplayName = opts.botDisplayName ?? 'Bot';
|
|
52
53
|
this.backfill = opts.backfill;
|
|
54
|
+
this.buildGeminiSystemInstruction = opts.buildGeminiSystemInstruction;
|
|
55
|
+
this.geminiApiKey = opts.geminiApiKey;
|
|
56
|
+
this.enabledTools = opts.enabledTools ?? [];
|
|
57
|
+
this.silentTools = new Set(opts.silentTools ?? []);
|
|
58
|
+
this.sessionRotationMs = opts.sessionRotationMs;
|
|
59
|
+
this.log.info({ voiceProvider: 'gemini-live' }, 'audio pipeline manager initialized');
|
|
53
60
|
}
|
|
54
61
|
/**
|
|
55
62
|
* Attach to a VoiceConnection and auto-manage the audio pipeline
|
|
@@ -67,7 +74,7 @@ export class AudioPipelineManager {
|
|
|
67
74
|
}
|
|
68
75
|
});
|
|
69
76
|
}
|
|
70
|
-
/** Start the
|
|
77
|
+
/** Start the Gemini Live voice pipeline for a guild. */
|
|
71
78
|
async startPipeline(guildId, connection) {
|
|
72
79
|
// Re-entrancy guard: VoiceConnection.subscribe() (called when wiring the
|
|
73
80
|
// AudioPlayer) synchronously fires a stateChange→Ready event, which would
|
|
@@ -81,96 +88,170 @@ export class AudioPipelineManager {
|
|
|
81
88
|
await this.stopPipeline(guildId);
|
|
82
89
|
}
|
|
83
90
|
try {
|
|
84
|
-
const
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
if (this.
|
|
89
|
-
buffer = new ConversationBuffer();
|
|
90
|
-
if (this.backfill) {
|
|
91
|
-
try {
|
|
92
|
-
const turns = await this.backfill();
|
|
93
|
-
buffer.backfill(turns);
|
|
94
|
-
this.log.info({ guildId, turns: turns.length }, 'conversation buffer backfilled');
|
|
95
|
-
}
|
|
96
|
-
catch (err) {
|
|
97
|
-
this.log.warn({ guildId, err }, 'conversation backfill failed — proceeding with empty buffer');
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
}
|
|
101
|
-
// Create VoiceResponder for the full conversation loop if invokeAi is configured
|
|
102
|
-
let responder;
|
|
103
|
-
if (this.invokeAi) {
|
|
91
|
+
const apiKey = this.geminiApiKey;
|
|
92
|
+
if (!apiKey)
|
|
93
|
+
throw new Error('geminiApiKey is required for gemini-live voice provider');
|
|
94
|
+
const buffer = new ConversationBuffer();
|
|
95
|
+
if (this.backfill) {
|
|
104
96
|
try {
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
log: this.log,
|
|
109
|
-
tts,
|
|
110
|
-
connection,
|
|
111
|
-
invokeAi: this.invokeAi,
|
|
112
|
-
onBotResponse: mirror
|
|
113
|
-
? (text) => {
|
|
114
|
-
mirror.postBotResponse(botName, text).catch((err) => {
|
|
115
|
-
this.log.warn({ guildId, err }, 'transcript-mirror: failed to post bot response');
|
|
116
|
-
});
|
|
117
|
-
}
|
|
118
|
-
: undefined,
|
|
119
|
-
buffer,
|
|
120
|
-
});
|
|
121
|
-
this.log.info({ guildId }, 'voice responder created');
|
|
97
|
+
const turns = await this.backfill();
|
|
98
|
+
buffer.backfill(turns);
|
|
99
|
+
this.log.info({ guildId, turns: turns.length }, 'gemini-live conversation buffer backfilled');
|
|
122
100
|
}
|
|
123
101
|
catch (err) {
|
|
124
|
-
this.log.warn({ guildId, err }, '
|
|
102
|
+
this.log.warn({ guildId, err }, 'gemini-live conversation backfill failed — proceeding with empty history');
|
|
125
103
|
}
|
|
126
104
|
}
|
|
127
|
-
|
|
128
|
-
const
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
105
|
+
const geminiLiveModel = normalizeGeminiLiveModel(this.runtimeModel) ?? DEFAULT_GEMINI_LIVE_MODEL;
|
|
106
|
+
const supportsAsyncFunctionCalling = supportsGeminiLiveAsyncFunctionCalling(geminiLiveModel);
|
|
107
|
+
const tools = buildGeminiToolDeclarations(this.enabledTools, { nonBlocking: supportsAsyncFunctionCalling });
|
|
108
|
+
const systemInstruction = await this.buildGeminiSystemInstruction?.();
|
|
109
|
+
const initialHistory = toGeminiLiveHistoryTurns(buffer.toTurns());
|
|
110
|
+
const provider = new GeminiLiveProvider({
|
|
111
|
+
apiKey,
|
|
112
|
+
log: this.log,
|
|
113
|
+
model: geminiLiveModel,
|
|
114
|
+
systemInstruction,
|
|
115
|
+
responseModalities: ['AUDIO'],
|
|
116
|
+
tools,
|
|
117
|
+
initialHistoryInClientContent: initialHistory.length > 0,
|
|
118
|
+
sessionRotationMs: this.sessionRotationMs,
|
|
119
|
+
});
|
|
120
|
+
await provider.connect();
|
|
121
|
+
if (initialHistory.length > 0) {
|
|
122
|
+
provider.sendInitialHistory(initialHistory);
|
|
123
|
+
this.log.info({ guildId, turns: initialHistory.length }, 'gemini-live conversation history seeded');
|
|
124
|
+
}
|
|
125
|
+
if (!supportsAsyncFunctionCalling && this.silentTools.size > 0) {
|
|
126
|
+
this.log.info({ guildId, model: geminiLiveModel, count: this.silentTools.size }, 'gemini-live: current model does not support scheduled tool responses; silent tool scheduling disabled');
|
|
127
|
+
}
|
|
128
|
+
const mirror = this.transcriptMirror;
|
|
129
|
+
const botName = this.botDisplayName;
|
|
130
|
+
let latestInputTranscript;
|
|
131
|
+
const responder = new GeminiLiveResponder({
|
|
132
|
+
log: this.log,
|
|
133
|
+
connection,
|
|
134
|
+
provider,
|
|
135
|
+
onBotResponse: mirror
|
|
136
|
+
? (text) => {
|
|
137
|
+
if (latestInputTranscript && text.trim()) {
|
|
138
|
+
buffer.push(latestInputTranscript, text);
|
|
139
|
+
latestInputTranscript = undefined;
|
|
146
140
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
141
|
+
mirror.postBotResponse(botName, text).catch((err) => {
|
|
142
|
+
this.log.warn({ guildId, err }, 'transcript-mirror: failed to post bot response');
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
: (text) => {
|
|
146
|
+
if (latestInputTranscript && text.trim()) {
|
|
147
|
+
buffer.push(latestInputTranscript, text);
|
|
148
|
+
latestInputTranscript = undefined;
|
|
151
149
|
}
|
|
150
|
+
},
|
|
151
|
+
onInputTranscript: mirror
|
|
152
|
+
? (text) => {
|
|
153
|
+
if (text.trim())
|
|
154
|
+
latestInputTranscript = text.trim();
|
|
155
|
+
mirror.postUserTranscription('User', text).catch((err) => {
|
|
156
|
+
this.log.warn({ guildId, err }, 'transcript-mirror: failed to post user transcription');
|
|
157
|
+
});
|
|
152
158
|
}
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
159
|
+
: (text) => {
|
|
160
|
+
if (text.trim())
|
|
161
|
+
latestInputTranscript = text.trim();
|
|
162
|
+
},
|
|
163
|
+
onSessionTerminated: () => {
|
|
164
|
+
this.log.error({ guildId }, 'gemini-live session terminally failed — no fallback');
|
|
165
|
+
},
|
|
166
|
+
onFallbackRecommended: (reason) => {
|
|
167
|
+
this.log.warn({ guildId, reason }, 'gemini-live: fallback recommended but the legacy pipeline has been removed');
|
|
168
|
+
},
|
|
169
|
+
onTokenWarning: (estimatedTokens, threshold) => {
|
|
170
|
+
this.log.warn({ guildId, estimatedTokens, threshold }, 'gemini-live: token usage approaching context window limit');
|
|
171
|
+
},
|
|
172
|
+
onToolCall: tools
|
|
173
|
+
? (calls) => {
|
|
174
|
+
this.log.info({ guildId, count: calls.length, names: calls.map((c) => c.name).join(',') }, 'gemini-live: tool call received — dispatching');
|
|
175
|
+
const allowedRoots = this.runtimeCwd ? [this.runtimeCwd] : [];
|
|
176
|
+
const allowedToolNames = new Set(buildToolSchemas(this.enabledTools).map((t) => t.function.name));
|
|
177
|
+
const logFn = (msg) => this.log.info({ guildId }, msg);
|
|
178
|
+
const execOpts = { enableHybridPipeline: false, allowedToolNames };
|
|
179
|
+
void (async () => {
|
|
180
|
+
const results = await Promise.all(calls.map(async (call) => {
|
|
181
|
+
const scheduling = supportsAsyncFunctionCalling
|
|
182
|
+
? (this.isSilentTool(call.name) ? 'SILENT' : 'INTERRUPT')
|
|
183
|
+
: undefined;
|
|
184
|
+
try {
|
|
185
|
+
const res = await executeToolCall(call.name, call.args, allowedRoots, logFn, execOpts);
|
|
186
|
+
return { id: call.id, name: call.name, output: res.result, scheduling };
|
|
187
|
+
}
|
|
188
|
+
catch (err) {
|
|
189
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
190
|
+
return { id: call.id, name: call.name, output: `Error: ${msg}`, scheduling };
|
|
191
|
+
}
|
|
192
|
+
}));
|
|
193
|
+
const silentCount = supportsAsyncFunctionCalling
|
|
194
|
+
? results.filter((r) => r.scheduling === 'SILENT').length
|
|
195
|
+
: 0;
|
|
196
|
+
if (silentCount > 0) {
|
|
197
|
+
this.log.info({ guildId, count: silentCount }, 'gemini-live: SILENT tool execution complete — results scheduled silently');
|
|
198
|
+
}
|
|
199
|
+
try {
|
|
200
|
+
provider.sendToolResponse(results);
|
|
201
|
+
}
|
|
202
|
+
catch (err) {
|
|
203
|
+
this.log.warn({ guildId, err }, 'gemini-live: sendToolResponse failed (provider likely disconnected)');
|
|
204
|
+
}
|
|
205
|
+
})();
|
|
206
|
+
}
|
|
207
|
+
: undefined,
|
|
208
|
+
});
|
|
209
|
+
responder.start();
|
|
210
|
+
// SttProvider shim: bridges AudioReceiver frames to GeminiLiveProvider.sendAudio
|
|
211
|
+
const sttShim = {
|
|
212
|
+
start: async () => { },
|
|
213
|
+
stop: async () => { },
|
|
214
|
+
onTranscription: () => { },
|
|
215
|
+
feedAudio: (frame) => {
|
|
216
|
+
try {
|
|
217
|
+
provider.sendAudio(frame.buffer);
|
|
218
|
+
}
|
|
219
|
+
catch (err) {
|
|
220
|
+
this.log.warn({ guildId, err }, 'gemini-live: sendAudio error (non-fatal)');
|
|
221
|
+
}
|
|
222
|
+
},
|
|
223
|
+
};
|
|
156
224
|
const receiver = new AudioReceiver({
|
|
157
225
|
connection,
|
|
158
226
|
allowedUserIds: this.allowedUserIds,
|
|
159
|
-
sttProvider,
|
|
227
|
+
sttProvider: sttShim,
|
|
160
228
|
log: this.log,
|
|
161
229
|
createDecoder: this.createDecoder,
|
|
162
|
-
onUserSpeaking: (
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
230
|
+
onUserSpeaking: () => { },
|
|
231
|
+
onUserSilence: () => {
|
|
232
|
+
try {
|
|
233
|
+
provider.sendAudioStreamEnd();
|
|
234
|
+
}
|
|
235
|
+
catch (err) {
|
|
236
|
+
this.log.warn({ guildId, err }, 'gemini-live: sendAudioStreamEnd error (non-fatal)');
|
|
237
|
+
}
|
|
166
238
|
},
|
|
167
239
|
});
|
|
168
240
|
receiver.start();
|
|
169
|
-
this.pipelines.set(guildId, {
|
|
170
|
-
|
|
241
|
+
this.pipelines.set(guildId, {
|
|
242
|
+
connection,
|
|
243
|
+
sttProvider: sttShim,
|
|
244
|
+
receiver,
|
|
245
|
+
buffer,
|
|
246
|
+
geminiProvider: provider,
|
|
247
|
+
geminiResponder: responder,
|
|
248
|
+
mode: 'gemini-live',
|
|
249
|
+
});
|
|
250
|
+
this.log.info({ guildId }, 'audio pipeline started (gemini-live)');
|
|
171
251
|
}
|
|
172
252
|
catch (err) {
|
|
173
253
|
this.log.error({ guildId, err }, 'failed to start audio pipeline');
|
|
254
|
+
this.log.error({ guildId }, 'gemini-live: connection failed — no fallback available');
|
|
174
255
|
}
|
|
175
256
|
finally {
|
|
176
257
|
this.starting.delete(guildId);
|
|
@@ -182,7 +263,10 @@ export class AudioPipelineManager {
|
|
|
182
263
|
if (!pipeline)
|
|
183
264
|
return;
|
|
184
265
|
this.pipelines.delete(guildId);
|
|
185
|
-
pipeline.
|
|
266
|
+
pipeline.geminiResponder?.destroy();
|
|
267
|
+
if (pipeline.geminiProvider) {
|
|
268
|
+
await pipeline.geminiProvider.disconnect();
|
|
269
|
+
}
|
|
186
270
|
pipeline.receiver.stop();
|
|
187
271
|
try {
|
|
188
272
|
await pipeline.sttProvider.stop();
|
|
@@ -205,20 +289,23 @@ export class AudioPipelineManager {
|
|
|
205
289
|
get activePipelineCount() {
|
|
206
290
|
return this.pipelines.size;
|
|
207
291
|
}
|
|
208
|
-
/**
|
|
209
|
-
get
|
|
210
|
-
return
|
|
292
|
+
/** Configured voice provider mode. */
|
|
293
|
+
get activeVoiceProvider() {
|
|
294
|
+
return 'gemini-live';
|
|
211
295
|
}
|
|
212
|
-
/**
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
296
|
+
/** Active mode for a specific guild. */
|
|
297
|
+
pipelineMode(guildId) {
|
|
298
|
+
return this.pipelines.get(guildId)?.mode;
|
|
299
|
+
}
|
|
300
|
+
isSilentTool(toolName) {
|
|
301
|
+
return this.silentTools.has(toolName) || this.silentTools.has(OPENAI_TO_DISCO_NAME[toolName] ?? toolName);
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
function toGeminiLiveHistoryTurns(turns) {
|
|
305
|
+
const history = [];
|
|
306
|
+
for (const turn of turns) {
|
|
307
|
+
history.push({ role: 'user', parts: [{ text: turn.user }] });
|
|
308
|
+
history.push({ role: 'model', parts: [{ text: turn.assistant }] });
|
|
223
309
|
}
|
|
310
|
+
return history;
|
|
224
311
|
}
|
|
@@ -22,6 +22,7 @@ export class AudioReceiver {
|
|
|
22
22
|
log;
|
|
23
23
|
createDecoder;
|
|
24
24
|
onUserSpeaking;
|
|
25
|
+
onUserSilence;
|
|
25
26
|
decoders = new Map();
|
|
26
27
|
running = false;
|
|
27
28
|
constructor(opts) {
|
|
@@ -31,6 +32,7 @@ export class AudioReceiver {
|
|
|
31
32
|
this.log = opts.log;
|
|
32
33
|
this.createDecoder = opts.createDecoder;
|
|
33
34
|
this.onUserSpeaking = opts.onUserSpeaking;
|
|
35
|
+
this.onUserSilence = opts.onUserSilence;
|
|
34
36
|
}
|
|
35
37
|
/** Begin listening for audio from allowlisted users. */
|
|
36
38
|
start() {
|
|
@@ -122,6 +124,12 @@ export class AudioReceiver {
|
|
|
122
124
|
decoder.destroy();
|
|
123
125
|
this.decoders.delete(userId);
|
|
124
126
|
this.log.info({ userId }, 'cleaned up user audio decoder');
|
|
127
|
+
try {
|
|
128
|
+
this.onUserSilence?.(userId);
|
|
129
|
+
}
|
|
130
|
+
catch (err) {
|
|
131
|
+
this.log.error({ err, userId }, 'onUserSilence callback error');
|
|
132
|
+
}
|
|
125
133
|
}
|
|
126
134
|
}
|
|
127
135
|
}
|
|
@@ -224,6 +224,22 @@ describe('AudioReceiver', () => {
|
|
|
224
224
|
expect(decoder.destroy).toHaveBeenCalled();
|
|
225
225
|
expect(log.info).toHaveBeenCalledWith({ userId: '111' }, 'cleaned up user audio decoder');
|
|
226
226
|
});
|
|
227
|
+
it('calls onUserSilence when a speaking burst ends', () => {
|
|
228
|
+
const { connection, speakingEmitter, streams } = createMockConnection();
|
|
229
|
+
const onUserSilence = vi.fn();
|
|
230
|
+
const recv = new AudioReceiver({
|
|
231
|
+
connection,
|
|
232
|
+
allowedUserIds: new Set(['111']),
|
|
233
|
+
sttProvider: createMockStt(),
|
|
234
|
+
log: createLogger(),
|
|
235
|
+
createDecoder: createMockDecoder,
|
|
236
|
+
onUserSilence,
|
|
237
|
+
});
|
|
238
|
+
recv.start();
|
|
239
|
+
speakingEmitter.emit('start', '111');
|
|
240
|
+
streams.get('111').emit('end');
|
|
241
|
+
expect(onUserSilence).toHaveBeenCalledWith('111');
|
|
242
|
+
});
|
|
227
243
|
it('cleans up decoder on stream error', () => {
|
|
228
244
|
const { connection, speakingEmitter, streams } = createMockConnection();
|
|
229
245
|
const decoder = createMockDecoder();
|
|
@@ -45,18 +45,28 @@ export class ConversationBuffer {
|
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
47
|
/**
|
|
48
|
-
*
|
|
49
|
-
*
|
|
48
|
+
* Return the stored turns from oldest to newest.
|
|
49
|
+
* Useful for replaying history into providers that support explicit seeding.
|
|
50
50
|
*/
|
|
51
|
-
|
|
51
|
+
toTurns() {
|
|
52
52
|
if (this.count === 0)
|
|
53
|
-
return
|
|
54
|
-
const
|
|
55
|
-
// Read from oldest to newest.
|
|
53
|
+
return [];
|
|
54
|
+
const turns = [];
|
|
56
55
|
const start = this.count < CAPACITY ? 0 : this.head;
|
|
57
56
|
for (let i = 0; i < this.count; i++) {
|
|
58
57
|
const idx = (start + i) % CAPACITY;
|
|
59
58
|
const turn = this.buffer[idx];
|
|
59
|
+
turns.push({ user: turn.user, assistant: turn.assistant });
|
|
60
|
+
}
|
|
61
|
+
return turns;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Format the stored turns as a conversation log string.
|
|
65
|
+
* Returns empty string when the buffer is empty.
|
|
66
|
+
*/
|
|
67
|
+
getHistory() {
|
|
68
|
+
const lines = [];
|
|
69
|
+
for (const turn of this.toTurns()) {
|
|
60
70
|
lines.push(`[User]: ${turn.user}`);
|
|
61
71
|
lines.push(`[Assistant]: ${turn.assistant}`);
|
|
62
72
|
}
|