discoclaw 1.2.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.context/voice.md +30 -2
- package/.env.example +6 -0
- package/dist/cli/dashboard.js +7 -1
- package/dist/config.js +7 -0
- package/dist/cron/executor.js +72 -1
- package/dist/dashboard/api/metrics.js +7 -0
- package/dist/dashboard/api/metrics.test.js +16 -0
- package/dist/dashboard/api/traces.js +14 -0
- package/dist/dashboard/api/traces.test.js +40 -0
- package/dist/dashboard/page.js +187 -8
- package/dist/dashboard/server.js +81 -14
- package/dist/dashboard/server.test.js +120 -4
- package/dist/discord/deferred-runner.js +306 -219
- package/dist/discord/message-coordinator.js +1 -28
- package/dist/discord/reaction-handler.js +81 -3
- package/dist/index.js +15 -1
- package/dist/observability/trace-store.js +56 -0
- package/dist/observability/trace-utils.js +31 -0
- package/dist/runtime/codex-cli.js +3 -2
- package/dist/runtime/codex-cli.test.js +33 -0
- package/dist/runtime/model-tiers.js +1 -1
- package/dist/runtime/model-tiers.test.js +9 -0
- package/dist/runtime/openai-tool-schemas.js +17 -0
- package/dist/voice/audio-pipeline.js +246 -6
- package/dist/voice/audio-pipeline.test.js +481 -0
- package/dist/voice/audio-receiver.js +8 -0
- package/dist/voice/audio-receiver.test.js +16 -0
- package/dist/voice/conversation-buffer.js +16 -6
- package/dist/voice/providers/gemini-live-provider.js +481 -0
- package/dist/voice/providers/gemini-live-provider.test.js +834 -0
- package/dist/voice/providers/gemini-live-responder.js +267 -0
- package/dist/voice/providers/gemini-live-responder.test.js +615 -0
- package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
- package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
- package/dist/voice/providers/gemini-live-types.js +32 -0
- package/dist/voice/providers/gemini-tool-mapper.js +91 -0
- package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
- package/dist/voice/providers/index.js +3 -0
- package/dist/voice/types.test.js +6 -0
- package/dist/voice/voice-prompt-builder.js +26 -17
- package/dist/voice/voice-prompt-builder.test.js +16 -1
- package/package.json +1 -1
|
@@ -56,6 +56,62 @@ vi.mock('@discordjs/voice', () => ({
|
|
|
56
56
|
}),
|
|
57
57
|
createAudioResource: vi.fn(() => ({ type: 'mock-resource' })),
|
|
58
58
|
}));
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Mock Gemini Live providers
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
let mockGeminiProvider;
|
|
63
|
+
let mockGeminiResponder;
|
|
64
|
+
vi.mock('./providers/gemini-live-provider.js', () => ({
|
|
65
|
+
GeminiLiveProvider: vi.fn().mockImplementation(() => {
|
|
66
|
+
mockGeminiProvider = {
|
|
67
|
+
connect: vi.fn(async () => { }),
|
|
68
|
+
disconnect: vi.fn(async () => { }),
|
|
69
|
+
sendAudio: vi.fn(),
|
|
70
|
+
sendAudioStreamEnd: vi.fn(),
|
|
71
|
+
sendInitialHistory: vi.fn(),
|
|
72
|
+
sendToolResponse: vi.fn(),
|
|
73
|
+
onEvent: vi.fn(),
|
|
74
|
+
state: 'open',
|
|
75
|
+
};
|
|
76
|
+
return mockGeminiProvider;
|
|
77
|
+
}),
|
|
78
|
+
}));
|
|
79
|
+
vi.mock('./providers/gemini-live-responder.js', () => ({
|
|
80
|
+
GeminiLiveResponder: vi.fn().mockImplementation(() => {
|
|
81
|
+
mockGeminiResponder = {
|
|
82
|
+
start: vi.fn(),
|
|
83
|
+
stop: vi.fn(),
|
|
84
|
+
destroy: vi.fn(),
|
|
85
|
+
};
|
|
86
|
+
return mockGeminiResponder;
|
|
87
|
+
}),
|
|
88
|
+
}));
|
|
89
|
+
// ---------------------------------------------------------------------------
|
|
90
|
+
// Mock tool execution
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
const mockExecuteToolCall = vi.fn(async () => ({ result: 'ok', ok: true }));
|
|
93
|
+
vi.mock('../runtime/openai-tool-exec.js', () => ({
|
|
94
|
+
executeToolCall: (...args) => mockExecuteToolCall(...args),
|
|
95
|
+
}));
|
|
96
|
+
vi.mock('../runtime/openai-tool-schemas.js', () => ({
|
|
97
|
+
OPENAI_TO_DISCO_NAME: {
|
|
98
|
+
Read: 'Read',
|
|
99
|
+
Bash: 'Bash',
|
|
100
|
+
MemoryQuery: 'MemoryQuery',
|
|
101
|
+
read_file: 'Read',
|
|
102
|
+
bash: 'Bash',
|
|
103
|
+
},
|
|
104
|
+
buildGeminiToolDeclarations: vi.fn((enabledTools, opts) => ({
|
|
105
|
+
functionDeclarations: enabledTools.map((name) => ({
|
|
106
|
+
name,
|
|
107
|
+
...(opts?.nonBlocking ? { behavior: 'NON_BLOCKING' } : {}),
|
|
108
|
+
})),
|
|
109
|
+
})),
|
|
110
|
+
buildToolSchemas: vi.fn((enabledTools) => enabledTools.map((name) => ({
|
|
111
|
+
type: 'function',
|
|
112
|
+
function: { name, description: `${name} tool`, parameters: {} },
|
|
113
|
+
}))),
|
|
114
|
+
}));
|
|
59
115
|
// We don't want real stt-factory or audio-receiver internals — the pipeline
|
|
60
116
|
// injects a createStt override and AudioReceiver is tested separately.
|
|
61
117
|
// However we do import AudioReceiver for real so the wiring is exercised.
|
|
@@ -148,6 +204,8 @@ function createPipelineOpts(overrides = {}) {
|
|
|
148
204
|
// ---------------------------------------------------------------------------
|
|
149
205
|
beforeEach(() => {
|
|
150
206
|
vi.clearAllMocks();
|
|
207
|
+
mockExecuteToolCall.mockReset();
|
|
208
|
+
mockExecuteToolCall.mockResolvedValue({ result: 'ok', ok: true });
|
|
151
209
|
lastMockPlayer = null;
|
|
152
210
|
});
|
|
153
211
|
describe('AudioPipelineManager', () => {
|
|
@@ -616,4 +674,427 @@ describe('AudioPipelineManager', () => {
|
|
|
616
674
|
expect(() => stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.9 })).not.toThrow();
|
|
617
675
|
});
|
|
618
676
|
});
|
|
677
|
+
describe('gemini-live mode', () => {
|
|
678
|
+
function createGeminiOpts(overrides = {}) {
|
|
679
|
+
return {
|
|
680
|
+
log: createLogger(),
|
|
681
|
+
voiceConfig: baseVoiceConfig(),
|
|
682
|
+
allowedUserIds: new Set(['111']),
|
|
683
|
+
createDecoder: () => createMockDecoder(),
|
|
684
|
+
voiceProvider: 'gemini-live',
|
|
685
|
+
geminiApiKey: 'test-gemini-key',
|
|
686
|
+
...overrides,
|
|
687
|
+
};
|
|
688
|
+
}
|
|
689
|
+
it('creates GeminiLiveProvider and GeminiLiveResponder, skipping STT/TTS', async () => {
|
|
690
|
+
const opts = createGeminiOpts();
|
|
691
|
+
const mgr = new AudioPipelineManager(opts);
|
|
692
|
+
const { connection } = createMockConnection();
|
|
693
|
+
await mgr.startPipeline('g1', connection);
|
|
694
|
+
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
695
|
+
expect(mockGeminiProvider.connect).toHaveBeenCalled();
|
|
696
|
+
expect(mockGeminiResponder.start).toHaveBeenCalled();
|
|
697
|
+
});
|
|
698
|
+
it('passes built systemInstruction into GeminiLiveProvider setup', async () => {
|
|
699
|
+
const buildGeminiSystemInstruction = vi.fn(async () => 'voice system instruction');
|
|
700
|
+
const opts = createGeminiOpts({ buildGeminiSystemInstruction });
|
|
701
|
+
const mgr = new AudioPipelineManager(opts);
|
|
702
|
+
const { connection } = createMockConnection();
|
|
703
|
+
const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
|
|
704
|
+
await mgr.startPipeline('g1', connection);
|
|
705
|
+
expect(buildGeminiSystemInstruction).toHaveBeenCalled();
|
|
706
|
+
const providerCalls = ProviderMock.mock.calls;
|
|
707
|
+
expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
|
|
708
|
+
systemInstruction: 'voice system instruction',
|
|
709
|
+
}));
|
|
710
|
+
});
|
|
711
|
+
it('backfills and seeds initial history into Gemini Live before audio starts', async () => {
|
|
712
|
+
const backfill = vi.fn(async () => [
|
|
713
|
+
{ user: 'first user', assistant: 'first reply' },
|
|
714
|
+
{ user: 'second user', assistant: 'second reply' },
|
|
715
|
+
]);
|
|
716
|
+
const opts = createGeminiOpts({ backfill });
|
|
717
|
+
const mgr = new AudioPipelineManager(opts);
|
|
718
|
+
const { connection } = createMockConnection();
|
|
719
|
+
const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
|
|
720
|
+
await mgr.startPipeline('g1', connection);
|
|
721
|
+
const providerCalls = ProviderMock.mock.calls;
|
|
722
|
+
expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
|
|
723
|
+
initialHistoryInClientContent: true,
|
|
724
|
+
}));
|
|
725
|
+
expect(mockGeminiProvider.sendInitialHistory).toHaveBeenCalledWith([
|
|
726
|
+
{ role: 'user', parts: [{ text: 'first user' }] },
|
|
727
|
+
{ role: 'model', parts: [{ text: 'first reply' }] },
|
|
728
|
+
{ role: 'user', parts: [{ text: 'second user' }] },
|
|
729
|
+
{ role: 'model', parts: [{ text: 'second reply' }] },
|
|
730
|
+
]);
|
|
731
|
+
});
|
|
732
|
+
it('uses synchronous tool declarations for the default 3.1 live model', async () => {
|
|
733
|
+
const opts = createGeminiOpts({ enabledTools: ['Read', 'Bash'] });
|
|
734
|
+
const mgr = new AudioPipelineManager(opts);
|
|
735
|
+
const { connection } = createMockConnection();
|
|
736
|
+
const toolSchemas = await import('../runtime/openai-tool-schemas.js');
|
|
737
|
+
const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
|
|
738
|
+
await mgr.startPipeline('g1', connection);
|
|
739
|
+
expect(toolSchemas.buildGeminiToolDeclarations).toHaveBeenCalledWith(['Read', 'Bash'], { nonBlocking: false });
|
|
740
|
+
const providerCalls = ProviderMock.mock.calls;
|
|
741
|
+
expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
|
|
742
|
+
model: 'gemini-3.1-flash-live-preview',
|
|
743
|
+
}));
|
|
744
|
+
});
|
|
745
|
+
it('preserves NON_BLOCKING tool declarations for explicit 2.5 live models', async () => {
|
|
746
|
+
const opts = createGeminiOpts({
|
|
747
|
+
enabledTools: ['Read'],
|
|
748
|
+
runtimeModel: 'gemini-2.5-flash-live-preview',
|
|
749
|
+
});
|
|
750
|
+
const mgr = new AudioPipelineManager(opts);
|
|
751
|
+
const { connection } = createMockConnection();
|
|
752
|
+
const toolSchemas = await import('../runtime/openai-tool-schemas.js');
|
|
753
|
+
const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
|
|
754
|
+
await mgr.startPipeline('g1', connection);
|
|
755
|
+
expect(toolSchemas.buildGeminiToolDeclarations).toHaveBeenCalledWith(['Read'], { nonBlocking: true });
|
|
756
|
+
const providerCalls = ProviderMock.mock.calls;
|
|
757
|
+
expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
|
|
758
|
+
model: 'gemini-2.5-flash-live-preview',
|
|
759
|
+
}));
|
|
760
|
+
});
|
|
761
|
+
it('calls provider.disconnect() and responder.destroy() on stopPipeline', async () => {
|
|
762
|
+
const opts = createGeminiOpts();
|
|
763
|
+
const mgr = new AudioPipelineManager(opts);
|
|
764
|
+
const { connection } = createMockConnection();
|
|
765
|
+
await mgr.startPipeline('g1', connection);
|
|
766
|
+
await mgr.stopPipeline('g1');
|
|
767
|
+
expect(mockGeminiResponder.destroy).toHaveBeenCalled();
|
|
768
|
+
expect(mockGeminiProvider.disconnect).toHaveBeenCalled();
|
|
769
|
+
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
770
|
+
});
|
|
771
|
+
it('shim feedAudio bridges to provider.sendAudio', async () => {
|
|
772
|
+
const opts = createGeminiOpts();
|
|
773
|
+
const mgr = new AudioPipelineManager(opts);
|
|
774
|
+
const { connection, speakingEmitter, streams } = createMockConnection();
|
|
775
|
+
await mgr.startPipeline('g1', connection);
|
|
776
|
+
// Simulate a user speaking — trigger the receiver to subscribe
|
|
777
|
+
speakingEmitter.emit('start', '111');
|
|
778
|
+
// Feed a packet through the stream to exercise the shim
|
|
779
|
+
const stream = streams.get('111');
|
|
780
|
+
if (stream) {
|
|
781
|
+
stream.emit('data', Buffer.alloc(80));
|
|
782
|
+
}
|
|
783
|
+
// Allow async processing
|
|
784
|
+
await new Promise((r) => setTimeout(r, 20));
|
|
785
|
+
// The shim feedAudio calls provider.sendAudio
|
|
786
|
+
expect(mockGeminiProvider.sendAudio).toHaveBeenCalled();
|
|
787
|
+
});
|
|
788
|
+
it('shim swallows sendAudio errors without crashing', async () => {
|
|
789
|
+
const log = createLogger();
|
|
790
|
+
const opts = createGeminiOpts({ log });
|
|
791
|
+
const mgr = new AudioPipelineManager(opts);
|
|
792
|
+
const { connection, speakingEmitter, streams } = createMockConnection();
|
|
793
|
+
await mgr.startPipeline('g1', connection);
|
|
794
|
+
// Make sendAudio throw
|
|
795
|
+
mockGeminiProvider.sendAudio.mockImplementation(() => {
|
|
796
|
+
throw new Error('WebSocket not open');
|
|
797
|
+
});
|
|
798
|
+
// Simulate a user speaking
|
|
799
|
+
speakingEmitter.emit('start', '111');
|
|
800
|
+
const stream = streams.get('111');
|
|
801
|
+
if (stream) {
|
|
802
|
+
stream.emit('data', Buffer.alloc(80));
|
|
803
|
+
}
|
|
804
|
+
await new Promise((r) => setTimeout(r, 20));
|
|
805
|
+
// Should have logged a warning but not thrown
|
|
806
|
+
expect(log.warn).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live: sendAudio error (non-fatal)');
|
|
807
|
+
});
|
|
808
|
+
it('signals audioStreamEnd when a user speaking burst ends', async () => {
|
|
809
|
+
const opts = createGeminiOpts();
|
|
810
|
+
const mgr = new AudioPipelineManager(opts);
|
|
811
|
+
const { connection, speakingEmitter, streams } = createMockConnection();
|
|
812
|
+
await mgr.startPipeline('g1', connection);
|
|
813
|
+
speakingEmitter.emit('start', '111');
|
|
814
|
+
streams.get('111').emit('end');
|
|
815
|
+
expect(mockGeminiProvider.sendAudioStreamEnd).toHaveBeenCalled();
|
|
816
|
+
});
|
|
817
|
+
it('throws when geminiApiKey is missing', async () => {
|
|
818
|
+
const log = createLogger();
|
|
819
|
+
const opts = createGeminiOpts({ geminiApiKey: undefined, log });
|
|
820
|
+
const mgr = new AudioPipelineManager(opts);
|
|
821
|
+
const { connection } = createMockConnection();
|
|
822
|
+
await mgr.startPipeline('g1', connection);
|
|
823
|
+
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
824
|
+
expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'failed to start audio pipeline');
|
|
825
|
+
});
|
|
826
|
+
it('default voiceProvider unset uses standard pipeline path', async () => {
|
|
827
|
+
// No voiceProvider set — should use the normal STT path
|
|
828
|
+
const mockStt = createMockStt();
|
|
829
|
+
const opts = createPipelineOpts({ createStt: () => mockStt });
|
|
830
|
+
const mgr = new AudioPipelineManager(opts);
|
|
831
|
+
const { connection } = createMockConnection();
|
|
832
|
+
await mgr.startPipeline('g1', connection);
|
|
833
|
+
expect(mockStt.start).toHaveBeenCalled();
|
|
834
|
+
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
835
|
+
});
|
|
836
|
+
it('wires onBotResponse to transcriptMirror.postBotResponse', async () => {
|
|
837
|
+
const mirror = {
|
|
838
|
+
postUserTranscription: vi.fn(async () => { }),
|
|
839
|
+
postBotResponse: vi.fn(async () => { }),
|
|
840
|
+
};
|
|
841
|
+
const opts = createGeminiOpts({
|
|
842
|
+
transcriptMirror: mirror,
|
|
843
|
+
botDisplayName: 'GeminiBot',
|
|
844
|
+
});
|
|
845
|
+
const mgr = new AudioPipelineManager(opts);
|
|
846
|
+
const { connection } = createMockConnection();
|
|
847
|
+
// Access the GeminiLiveResponder constructor mock to check the onBotResponse option
|
|
848
|
+
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
849
|
+
await mgr.startPipeline('g1', connection);
|
|
850
|
+
// Extract the onBotResponse callback passed to GeminiLiveResponder
|
|
851
|
+
const constructorCalls = ResponderMock.mock.calls;
|
|
852
|
+
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
853
|
+
const responderOpts = lastCall[0];
|
|
854
|
+
expect(responderOpts.onBotResponse).toBeDefined();
|
|
855
|
+
responderOpts.onBotResponse('Hello from Gemini');
|
|
856
|
+
await vi.waitFor(() => {
|
|
857
|
+
expect(mirror.postBotResponse).toHaveBeenCalledWith('GeminiBot', 'Hello from Gemini');
|
|
858
|
+
});
|
|
859
|
+
});
|
|
860
|
+
it('records completed Gemini Live turns in the local conversation buffer', async () => {
|
|
861
|
+
const backfill = vi.fn(async () => []);
|
|
862
|
+
const opts = createGeminiOpts({ backfill });
|
|
863
|
+
const mgr = new AudioPipelineManager(opts);
|
|
864
|
+
const { connection } = createMockConnection();
|
|
865
|
+
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
866
|
+
await mgr.startPipeline('g1', connection);
|
|
867
|
+
const constructorCalls = ResponderMock.mock.calls;
|
|
868
|
+
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
869
|
+
const responderOpts = lastCall[0];
|
|
870
|
+
responderOpts.onInputTranscript?.('hello there');
|
|
871
|
+
responderOpts.onBotResponse?.('general kenobi');
|
|
872
|
+
expect(backfill).toHaveBeenCalled();
|
|
873
|
+
const pipeline = mgr.pipelines.get('g1');
|
|
874
|
+
expect(pipeline?.buffer?.getHistory()).toContain('[User]: hello there');
|
|
875
|
+
expect(pipeline?.buffer?.getHistory()).toContain('[Assistant]: general kenobi');
|
|
876
|
+
});
|
|
877
|
+
it('wires onInputTranscript to transcriptMirror.postUserTranscription', async () => {
|
|
878
|
+
const mirror = {
|
|
879
|
+
postUserTranscription: vi.fn(async () => { }),
|
|
880
|
+
postBotResponse: vi.fn(async () => { }),
|
|
881
|
+
};
|
|
882
|
+
const opts = createGeminiOpts({
|
|
883
|
+
transcriptMirror: mirror,
|
|
884
|
+
});
|
|
885
|
+
const mgr = new AudioPipelineManager(opts);
|
|
886
|
+
const { connection } = createMockConnection();
|
|
887
|
+
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
888
|
+
await mgr.startPipeline('g1', connection);
|
|
889
|
+
// Extract the onInputTranscript callback passed to GeminiLiveResponder
|
|
890
|
+
const constructorCalls = ResponderMock.mock.calls;
|
|
891
|
+
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
892
|
+
const responderOpts = lastCall[0];
|
|
893
|
+
expect(responderOpts.onInputTranscript).toBeDefined();
|
|
894
|
+
responderOpts.onInputTranscript('hello from user');
|
|
895
|
+
await vi.waitFor(() => {
|
|
896
|
+
expect(mirror.postUserTranscription).toHaveBeenCalledWith('User', 'hello from user');
|
|
897
|
+
});
|
|
898
|
+
});
|
|
899
|
+
// -----------------------------------------------------------------------
|
|
900
|
+
// Tool call dispatch tests
|
|
901
|
+
// -----------------------------------------------------------------------
|
|
902
|
+
/** Helper: extract the onToolCall callback from the GeminiLiveResponder constructor mock. */
|
|
903
|
+
async function extractOnToolCall(overrides = {}) {
|
|
904
|
+
const opts = createGeminiOpts({
|
|
905
|
+
enabledTools: ['Read', 'Bash'],
|
|
906
|
+
runtimeCwd: '/fake/cwd',
|
|
907
|
+
...overrides,
|
|
908
|
+
});
|
|
909
|
+
const mgr = new AudioPipelineManager(opts);
|
|
910
|
+
const { connection } = createMockConnection();
|
|
911
|
+
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
912
|
+
await mgr.startPipeline('g1', connection);
|
|
913
|
+
const constructorCalls = ResponderMock.mock.calls;
|
|
914
|
+
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
915
|
+
const responderOpts = lastCall[0];
|
|
916
|
+
return responderOpts.onToolCall;
|
|
917
|
+
}
|
|
918
|
+
it('dispatches tool call to executor and sends response back via provider', async () => {
|
|
919
|
+
mockExecuteToolCall.mockResolvedValueOnce({ result: 'file contents here', ok: true });
|
|
920
|
+
const onToolCall = await extractOnToolCall();
|
|
921
|
+
onToolCall([{ id: 'tc-1', name: 'Read', args: { file_path: '/foo.txt' } }]);
|
|
922
|
+
await vi.waitFor(() => {
|
|
923
|
+
expect(mockExecuteToolCall).toHaveBeenCalledWith('Read', { file_path: '/foo.txt' }, ['/fake/cwd'], expect.any(Function), expect.objectContaining({ enableHybridPipeline: false }));
|
|
924
|
+
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
925
|
+
{ id: 'tc-1', name: 'Read', output: 'file contents here' },
|
|
926
|
+
]);
|
|
927
|
+
});
|
|
928
|
+
});
|
|
929
|
+
it('returns error result string to Gemini when executor throws', async () => {
|
|
930
|
+
mockExecuteToolCall.mockRejectedValueOnce(new Error('permission denied'));
|
|
931
|
+
const onToolCall = await extractOnToolCall();
|
|
932
|
+
onToolCall([{ id: 'tc-err', name: 'Read', args: {} }]);
|
|
933
|
+
await vi.waitFor(() => {
|
|
934
|
+
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
935
|
+
{ id: 'tc-err', name: 'Read', output: 'Error: permission denied' },
|
|
936
|
+
]);
|
|
937
|
+
});
|
|
938
|
+
});
|
|
939
|
+
it('returns error for unknown tool name (executor rejects non-allowlisted tool)', async () => {
|
|
940
|
+
mockExecuteToolCall.mockResolvedValueOnce({ result: 'Tool not allowed: UnknownTool', ok: false });
|
|
941
|
+
const onToolCall = await extractOnToolCall();
|
|
942
|
+
onToolCall([{ id: 'tc-unk', name: 'UnknownTool', args: {} }]);
|
|
943
|
+
await vi.waitFor(() => {
|
|
944
|
+
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
945
|
+
{ id: 'tc-unk', name: 'UnknownTool', output: 'Tool not allowed: UnknownTool' },
|
|
946
|
+
]);
|
|
947
|
+
});
|
|
948
|
+
});
|
|
949
|
+
it('dispatches multiple function calls and sends all responses in one sendToolResponse', async () => {
|
|
950
|
+
mockExecuteToolCall
|
|
951
|
+
.mockResolvedValueOnce({ result: 'result-A', ok: true })
|
|
952
|
+
.mockResolvedValueOnce({ result: 'result-B', ok: true });
|
|
953
|
+
const onToolCall = await extractOnToolCall();
|
|
954
|
+
onToolCall([
|
|
955
|
+
{ id: 'tc-a', name: 'Read', args: { file_path: '/a.txt' } },
|
|
956
|
+
{ id: 'tc-b', name: 'Bash', args: { command: 'ls' } },
|
|
957
|
+
]);
|
|
958
|
+
await vi.waitFor(() => {
|
|
959
|
+
expect(mockExecuteToolCall).toHaveBeenCalledTimes(2);
|
|
960
|
+
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
961
|
+
{ id: 'tc-a', name: 'Read', output: 'result-A' },
|
|
962
|
+
{ id: 'tc-b', name: 'Bash', output: 'result-B' },
|
|
963
|
+
]);
|
|
964
|
+
});
|
|
965
|
+
});
|
|
966
|
+
it('catches and logs sendToolResponse throw without crashing', async () => {
|
|
967
|
+
mockExecuteToolCall.mockResolvedValueOnce({ result: 'ok', ok: true });
|
|
968
|
+
const log = createLogger();
|
|
969
|
+
const onToolCall = await extractOnToolCall({ log });
|
|
970
|
+
mockGeminiProvider.sendToolResponse.mockImplementation(() => {
|
|
971
|
+
throw new Error('WebSocket closed');
|
|
972
|
+
});
|
|
973
|
+
onToolCall([{ id: 'tc-disc', name: 'Read', args: {} }]);
|
|
974
|
+
await vi.waitFor(() => {
|
|
975
|
+
expect(log.warn).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live: sendToolResponse failed (provider likely disconnected)');
|
|
976
|
+
});
|
|
977
|
+
});
|
|
978
|
+
// -----------------------------------------------------------------------
|
|
979
|
+
// Scheduled tool responses
|
|
980
|
+
// -----------------------------------------------------------------------
|
|
981
|
+
it('ignores silent tool scheduling on the default 3.1 live model', async () => {
|
|
982
|
+
mockExecuteToolCall.mockResolvedValue({ result: 'memory contents', ok: true });
|
|
983
|
+
const log = createLogger();
|
|
984
|
+
const opts = createGeminiOpts({
|
|
985
|
+
log,
|
|
986
|
+
enabledTools: ['Read', 'MemoryQuery'],
|
|
987
|
+
silentTools: ['MemoryQuery'],
|
|
988
|
+
runtimeCwd: '/fake/cwd',
|
|
989
|
+
});
|
|
990
|
+
const mgr = new AudioPipelineManager(opts);
|
|
991
|
+
const { connection } = createMockConnection();
|
|
992
|
+
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
993
|
+
await mgr.startPipeline('g1', connection);
|
|
994
|
+
const constructorCalls = ResponderMock.mock.calls;
|
|
995
|
+
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
996
|
+
const responderOpts = lastCall[0];
|
|
997
|
+
// Dispatch a SILENT tool call
|
|
998
|
+
responderOpts.onToolCall([{ id: 'tc-silent', name: 'MemoryQuery', args: { key: 'test' } }]);
|
|
999
|
+
await vi.waitFor(() => {
|
|
1000
|
+
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
1001
|
+
{ id: 'tc-silent', name: 'MemoryQuery', output: 'memory contents' },
|
|
1002
|
+
]);
|
|
1003
|
+
});
|
|
1004
|
+
expect(log.info).toHaveBeenCalledWith(expect.objectContaining({
|
|
1005
|
+
guildId: 'g1',
|
|
1006
|
+
model: 'gemini-3.1-flash-live-preview',
|
|
1007
|
+
count: 1,
|
|
1008
|
+
}), 'gemini-live: current model does not support scheduled tool responses; silent tool scheduling disabled');
|
|
1009
|
+
});
|
|
1010
|
+
it('sends SILENT-scheduled tool responses for explicit 2.5 live models', async () => {
|
|
1011
|
+
mockExecuteToolCall
|
|
1012
|
+
.mockResolvedValueOnce({ result: 'memory data', ok: true });
|
|
1013
|
+
const log = createLogger();
|
|
1014
|
+
const opts = createGeminiOpts({
|
|
1015
|
+
log,
|
|
1016
|
+
enabledTools: ['MemoryQuery'],
|
|
1017
|
+
silentTools: ['MemoryQuery'],
|
|
1018
|
+
runtimeCwd: '/fake/cwd',
|
|
1019
|
+
runtimeModel: 'gemini-2.5-flash-live-preview',
|
|
1020
|
+
});
|
|
1021
|
+
const mgr = new AudioPipelineManager(opts);
|
|
1022
|
+
const { connection } = createMockConnection();
|
|
1023
|
+
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
1024
|
+
await mgr.startPipeline('g1', connection);
|
|
1025
|
+
const constructorCalls = ResponderMock.mock.calls;
|
|
1026
|
+
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
1027
|
+
const responderOpts = lastCall[0];
|
|
1028
|
+
responderOpts.onToolCall([
|
|
1029
|
+
{ id: 'tc-mem', name: 'MemoryQuery', args: { key: 'test' } },
|
|
1030
|
+
]);
|
|
1031
|
+
await vi.waitFor(() => {
|
|
1032
|
+
expect(mockExecuteToolCall).toHaveBeenCalledTimes(1);
|
|
1033
|
+
expect(log.info).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1', count: 1 }), 'gemini-live: SILENT tool execution complete — results scheduled silently');
|
|
1034
|
+
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
1035
|
+
{ id: 'tc-mem', name: 'MemoryQuery', output: 'memory data', scheduling: 'SILENT' },
|
|
1036
|
+
]);
|
|
1037
|
+
});
|
|
1038
|
+
});
|
|
1039
|
+
// -----------------------------------------------------------------------
|
|
1040
|
+
// Fallback to standard pipeline
|
|
1041
|
+
// -----------------------------------------------------------------------
|
|
1042
|
+
it('does not fall back when initial gemini-live connection fails (fallback disabled)', async () => {
|
|
1043
|
+
const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
|
|
1044
|
+
// Make the next provider's connect() reject
|
|
1045
|
+
ProviderMock.mockImplementationOnce(() => {
|
|
1046
|
+
mockGeminiProvider = {
|
|
1047
|
+
connect: vi.fn(async () => { throw new Error('connection refused'); }),
|
|
1048
|
+
disconnect: vi.fn(async () => { }),
|
|
1049
|
+
sendAudio: vi.fn(),
|
|
1050
|
+
sendAudioStreamEnd: vi.fn(),
|
|
1051
|
+
sendInitialHistory: vi.fn(),
|
|
1052
|
+
sendToolResponse: vi.fn(),
|
|
1053
|
+
onEvent: vi.fn(),
|
|
1054
|
+
state: 'idle',
|
|
1055
|
+
};
|
|
1056
|
+
return mockGeminiProvider;
|
|
1057
|
+
});
|
|
1058
|
+
const log = createLogger();
|
|
1059
|
+
const opts = createGeminiOpts({ log });
|
|
1060
|
+
const mgr = new AudioPipelineManager(opts);
|
|
1061
|
+
const { connection } = createMockConnection();
|
|
1062
|
+
await mgr.startPipeline('g1', connection);
|
|
1063
|
+
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
1064
|
+
expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live: connection failed — no fallback (fallback disabled)');
|
|
1065
|
+
});
|
|
1066
|
+
it('does not fall back when onSessionTerminated is triggered (fallback disabled)', async () => {
|
|
1067
|
+
const log = createLogger();
|
|
1068
|
+
const opts = createGeminiOpts({ log });
|
|
1069
|
+
const mgr = new AudioPipelineManager(opts);
|
|
1070
|
+
const { connection } = createMockConnection();
|
|
1071
|
+
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
1072
|
+
await mgr.startPipeline('g1', connection);
|
|
1073
|
+
expect(mgr.pipelineMode('g1')).toBe('gemini-live');
|
|
1074
|
+
// Extract onSessionTerminated callback
|
|
1075
|
+
const constructorCalls = ResponderMock.mock.calls;
|
|
1076
|
+
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
1077
|
+
const responderOpts = lastCall[0];
|
|
1078
|
+
expect(responderOpts.onSessionTerminated).toBeDefined();
|
|
1079
|
+
responderOpts.onSessionTerminated();
|
|
1080
|
+
// Pipeline should NOT switch to standard mode — it stays as gemini-live (or gets stopped)
|
|
1081
|
+
expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live session terminally failed — no fallback (fallback disabled)');
|
|
1082
|
+
});
|
|
1083
|
+
it('does not fall back when onFallbackRecommended is triggered (fallback disabled)', async () => {
|
|
1084
|
+
const log = createLogger();
|
|
1085
|
+
const opts = createGeminiOpts({ log });
|
|
1086
|
+
const mgr = new AudioPipelineManager(opts);
|
|
1087
|
+
const { connection } = createMockConnection();
|
|
1088
|
+
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
1089
|
+
await mgr.startPipeline('g1', connection);
|
|
1090
|
+
// Extract onFallbackRecommended callback
|
|
1091
|
+
const constructorCalls = ResponderMock.mock.calls;
|
|
1092
|
+
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
1093
|
+
const responderOpts = lastCall[0];
|
|
1094
|
+
expect(responderOpts.onFallbackRecommended).toBeDefined();
|
|
1095
|
+
responderOpts.onFallbackRecommended('exhausted reconnect retries');
|
|
1096
|
+
// Pipeline should NOT switch to standard mode
|
|
1097
|
+
expect(log.warn).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1', reason: 'exhausted reconnect retries' }), 'gemini-live: fallback recommended but fallback is disabled');
|
|
1098
|
+
});
|
|
1099
|
+
});
|
|
619
1100
|
});
|
|
@@ -22,6 +22,7 @@ export class AudioReceiver {
|
|
|
22
22
|
log;
|
|
23
23
|
createDecoder;
|
|
24
24
|
onUserSpeaking;
|
|
25
|
+
onUserSilence;
|
|
25
26
|
decoders = new Map();
|
|
26
27
|
running = false;
|
|
27
28
|
constructor(opts) {
|
|
@@ -31,6 +32,7 @@ export class AudioReceiver {
|
|
|
31
32
|
this.log = opts.log;
|
|
32
33
|
this.createDecoder = opts.createDecoder;
|
|
33
34
|
this.onUserSpeaking = opts.onUserSpeaking;
|
|
35
|
+
this.onUserSilence = opts.onUserSilence;
|
|
34
36
|
}
|
|
35
37
|
/** Begin listening for audio from allowlisted users. */
|
|
36
38
|
start() {
|
|
@@ -122,6 +124,12 @@ export class AudioReceiver {
|
|
|
122
124
|
decoder.destroy();
|
|
123
125
|
this.decoders.delete(userId);
|
|
124
126
|
this.log.info({ userId }, 'cleaned up user audio decoder');
|
|
127
|
+
try {
|
|
128
|
+
this.onUserSilence?.(userId);
|
|
129
|
+
}
|
|
130
|
+
catch (err) {
|
|
131
|
+
this.log.error({ err, userId }, 'onUserSilence callback error');
|
|
132
|
+
}
|
|
125
133
|
}
|
|
126
134
|
}
|
|
127
135
|
}
|
|
@@ -224,6 +224,22 @@ describe('AudioReceiver', () => {
|
|
|
224
224
|
expect(decoder.destroy).toHaveBeenCalled();
|
|
225
225
|
expect(log.info).toHaveBeenCalledWith({ userId: '111' }, 'cleaned up user audio decoder');
|
|
226
226
|
});
|
|
227
|
+
it('calls onUserSilence when a speaking burst ends', () => {
|
|
228
|
+
const { connection, speakingEmitter, streams } = createMockConnection();
|
|
229
|
+
const onUserSilence = vi.fn();
|
|
230
|
+
const recv = new AudioReceiver({
|
|
231
|
+
connection,
|
|
232
|
+
allowedUserIds: new Set(['111']),
|
|
233
|
+
sttProvider: createMockStt(),
|
|
234
|
+
log: createLogger(),
|
|
235
|
+
createDecoder: createMockDecoder,
|
|
236
|
+
onUserSilence,
|
|
237
|
+
});
|
|
238
|
+
recv.start();
|
|
239
|
+
speakingEmitter.emit('start', '111');
|
|
240
|
+
streams.get('111').emit('end');
|
|
241
|
+
expect(onUserSilence).toHaveBeenCalledWith('111');
|
|
242
|
+
});
|
|
227
243
|
it('cleans up decoder on stream error', () => {
|
|
228
244
|
const { connection, speakingEmitter, streams } = createMockConnection();
|
|
229
245
|
const decoder = createMockDecoder();
|
|
@@ -45,18 +45,28 @@ export class ConversationBuffer {
|
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
47
|
/**
|
|
48
|
-
*
|
|
49
|
-
*
|
|
48
|
+
* Return the stored turns from oldest to newest.
|
|
49
|
+
* Useful for replaying history into providers that support explicit seeding.
|
|
50
50
|
*/
|
|
51
|
-
|
|
51
|
+
toTurns() {
|
|
52
52
|
if (this.count === 0)
|
|
53
|
-
return
|
|
54
|
-
const
|
|
55
|
-
// Read from oldest to newest.
|
|
53
|
+
return [];
|
|
54
|
+
const turns = [];
|
|
56
55
|
const start = this.count < CAPACITY ? 0 : this.head;
|
|
57
56
|
for (let i = 0; i < this.count; i++) {
|
|
58
57
|
const idx = (start + i) % CAPACITY;
|
|
59
58
|
const turn = this.buffer[idx];
|
|
59
|
+
turns.push({ user: turn.user, assistant: turn.assistant });
|
|
60
|
+
}
|
|
61
|
+
return turns;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Format the stored turns as a conversation log string.
|
|
65
|
+
* Returns empty string when the buffer is empty.
|
|
66
|
+
*/
|
|
67
|
+
getHistory() {
|
|
68
|
+
const lines = [];
|
|
69
|
+
for (const turn of this.toTurns()) {
|
|
60
70
|
lines.push(`[User]: ${turn.user}`);
|
|
61
71
|
lines.push(`[Assistant]: ${turn.assistant}`);
|
|
62
72
|
}
|