discoclaw 1.2.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.context/voice.md +30 -2
  2. package/.env.example +6 -0
  3. package/dist/cli/dashboard.js +7 -1
  4. package/dist/config.js +7 -0
  5. package/dist/cron/executor.js +72 -1
  6. package/dist/dashboard/api/metrics.js +7 -0
  7. package/dist/dashboard/api/metrics.test.js +16 -0
  8. package/dist/dashboard/api/traces.js +14 -0
  9. package/dist/dashboard/api/traces.test.js +40 -0
  10. package/dist/dashboard/page.js +187 -8
  11. package/dist/dashboard/server.js +81 -14
  12. package/dist/dashboard/server.test.js +120 -4
  13. package/dist/discord/deferred-runner.js +306 -219
  14. package/dist/discord/message-coordinator.js +1 -28
  15. package/dist/discord/reaction-handler.js +81 -3
  16. package/dist/index.js +15 -1
  17. package/dist/observability/trace-store.js +56 -0
  18. package/dist/observability/trace-utils.js +31 -0
  19. package/dist/runtime/codex-cli.js +3 -2
  20. package/dist/runtime/codex-cli.test.js +33 -0
  21. package/dist/runtime/model-tiers.js +1 -1
  22. package/dist/runtime/model-tiers.test.js +9 -0
  23. package/dist/runtime/openai-tool-schemas.js +17 -0
  24. package/dist/voice/audio-pipeline.js +246 -6
  25. package/dist/voice/audio-pipeline.test.js +481 -0
  26. package/dist/voice/audio-receiver.js +8 -0
  27. package/dist/voice/audio-receiver.test.js +16 -0
  28. package/dist/voice/conversation-buffer.js +16 -6
  29. package/dist/voice/providers/gemini-live-provider.js +481 -0
  30. package/dist/voice/providers/gemini-live-provider.test.js +834 -0
  31. package/dist/voice/providers/gemini-live-responder.js +267 -0
  32. package/dist/voice/providers/gemini-live-responder.test.js +615 -0
  33. package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
  34. package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
  35. package/dist/voice/providers/gemini-live-types.js +32 -0
  36. package/dist/voice/providers/gemini-tool-mapper.js +91 -0
  37. package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
  38. package/dist/voice/providers/index.js +3 -0
  39. package/dist/voice/types.test.js +6 -0
  40. package/dist/voice/voice-prompt-builder.js +26 -17
  41. package/dist/voice/voice-prompt-builder.test.js +16 -1
  42. package/package.json +1 -1
@@ -56,6 +56,62 @@ vi.mock('@discordjs/voice', () => ({
56
56
  }),
57
57
  createAudioResource: vi.fn(() => ({ type: 'mock-resource' })),
58
58
  }));
59
+ // ---------------------------------------------------------------------------
60
+ // Mock Gemini Live providers
61
+ // ---------------------------------------------------------------------------
62
+ let mockGeminiProvider;
63
+ let mockGeminiResponder;
64
+ vi.mock('./providers/gemini-live-provider.js', () => ({
65
+ GeminiLiveProvider: vi.fn().mockImplementation(() => {
66
+ mockGeminiProvider = {
67
+ connect: vi.fn(async () => { }),
68
+ disconnect: vi.fn(async () => { }),
69
+ sendAudio: vi.fn(),
70
+ sendAudioStreamEnd: vi.fn(),
71
+ sendInitialHistory: vi.fn(),
72
+ sendToolResponse: vi.fn(),
73
+ onEvent: vi.fn(),
74
+ state: 'open',
75
+ };
76
+ return mockGeminiProvider;
77
+ }),
78
+ }));
79
+ vi.mock('./providers/gemini-live-responder.js', () => ({
80
+ GeminiLiveResponder: vi.fn().mockImplementation(() => {
81
+ mockGeminiResponder = {
82
+ start: vi.fn(),
83
+ stop: vi.fn(),
84
+ destroy: vi.fn(),
85
+ };
86
+ return mockGeminiResponder;
87
+ }),
88
+ }));
89
+ // ---------------------------------------------------------------------------
90
+ // Mock tool execution
91
+ // ---------------------------------------------------------------------------
92
+ const mockExecuteToolCall = vi.fn(async () => ({ result: 'ok', ok: true }));
93
+ vi.mock('../runtime/openai-tool-exec.js', () => ({
94
+ executeToolCall: (...args) => mockExecuteToolCall(...args),
95
+ }));
96
+ vi.mock('../runtime/openai-tool-schemas.js', () => ({
97
+ OPENAI_TO_DISCO_NAME: {
98
+ Read: 'Read',
99
+ Bash: 'Bash',
100
+ MemoryQuery: 'MemoryQuery',
101
+ read_file: 'Read',
102
+ bash: 'Bash',
103
+ },
104
+ buildGeminiToolDeclarations: vi.fn((enabledTools, opts) => ({
105
+ functionDeclarations: enabledTools.map((name) => ({
106
+ name,
107
+ ...(opts?.nonBlocking ? { behavior: 'NON_BLOCKING' } : {}),
108
+ })),
109
+ })),
110
+ buildToolSchemas: vi.fn((enabledTools) => enabledTools.map((name) => ({
111
+ type: 'function',
112
+ function: { name, description: `${name} tool`, parameters: {} },
113
+ }))),
114
+ }));
59
115
  // We don't want real stt-factory or audio-receiver internals — the pipeline
60
116
  // injects a createStt override and AudioReceiver is tested separately.
61
117
  // However we do import AudioReceiver for real so the wiring is exercised.
@@ -148,6 +204,8 @@ function createPipelineOpts(overrides = {}) {
148
204
  // ---------------------------------------------------------------------------
149
205
  beforeEach(() => {
150
206
  vi.clearAllMocks();
207
+ mockExecuteToolCall.mockReset();
208
+ mockExecuteToolCall.mockResolvedValue({ result: 'ok', ok: true });
151
209
  lastMockPlayer = null;
152
210
  });
153
211
  describe('AudioPipelineManager', () => {
@@ -616,4 +674,427 @@ describe('AudioPipelineManager', () => {
616
674
  expect(() => stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.9 })).not.toThrow();
617
675
  });
618
676
  });
677
+ describe('gemini-live mode', () => {
678
+ function createGeminiOpts(overrides = {}) {
679
+ return {
680
+ log: createLogger(),
681
+ voiceConfig: baseVoiceConfig(),
682
+ allowedUserIds: new Set(['111']),
683
+ createDecoder: () => createMockDecoder(),
684
+ voiceProvider: 'gemini-live',
685
+ geminiApiKey: 'test-gemini-key',
686
+ ...overrides,
687
+ };
688
+ }
689
+ it('creates GeminiLiveProvider and GeminiLiveResponder, skipping STT/TTS', async () => {
690
+ const opts = createGeminiOpts();
691
+ const mgr = new AudioPipelineManager(opts);
692
+ const { connection } = createMockConnection();
693
+ await mgr.startPipeline('g1', connection);
694
+ expect(mgr.hasPipeline('g1')).toBe(true);
695
+ expect(mockGeminiProvider.connect).toHaveBeenCalled();
696
+ expect(mockGeminiResponder.start).toHaveBeenCalled();
697
+ });
698
+ it('passes built systemInstruction into GeminiLiveProvider setup', async () => {
699
+ const buildGeminiSystemInstruction = vi.fn(async () => 'voice system instruction');
700
+ const opts = createGeminiOpts({ buildGeminiSystemInstruction });
701
+ const mgr = new AudioPipelineManager(opts);
702
+ const { connection } = createMockConnection();
703
+ const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
704
+ await mgr.startPipeline('g1', connection);
705
+ expect(buildGeminiSystemInstruction).toHaveBeenCalled();
706
+ const providerCalls = ProviderMock.mock.calls;
707
+ expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
708
+ systemInstruction: 'voice system instruction',
709
+ }));
710
+ });
711
+ it('backfills and seeds initial history into Gemini Live before audio starts', async () => {
712
+ const backfill = vi.fn(async () => [
713
+ { user: 'first user', assistant: 'first reply' },
714
+ { user: 'second user', assistant: 'second reply' },
715
+ ]);
716
+ const opts = createGeminiOpts({ backfill });
717
+ const mgr = new AudioPipelineManager(opts);
718
+ const { connection } = createMockConnection();
719
+ const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
720
+ await mgr.startPipeline('g1', connection);
721
+ const providerCalls = ProviderMock.mock.calls;
722
+ expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
723
+ initialHistoryInClientContent: true,
724
+ }));
725
+ expect(mockGeminiProvider.sendInitialHistory).toHaveBeenCalledWith([
726
+ { role: 'user', parts: [{ text: 'first user' }] },
727
+ { role: 'model', parts: [{ text: 'first reply' }] },
728
+ { role: 'user', parts: [{ text: 'second user' }] },
729
+ { role: 'model', parts: [{ text: 'second reply' }] },
730
+ ]);
731
+ });
732
+ it('uses synchronous tool declarations for the default 3.1 live model', async () => {
733
+ const opts = createGeminiOpts({ enabledTools: ['Read', 'Bash'] });
734
+ const mgr = new AudioPipelineManager(opts);
735
+ const { connection } = createMockConnection();
736
+ const toolSchemas = await import('../runtime/openai-tool-schemas.js');
737
+ const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
738
+ await mgr.startPipeline('g1', connection);
739
+ expect(toolSchemas.buildGeminiToolDeclarations).toHaveBeenCalledWith(['Read', 'Bash'], { nonBlocking: false });
740
+ const providerCalls = ProviderMock.mock.calls;
741
+ expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
742
+ model: 'gemini-3.1-flash-live-preview',
743
+ }));
744
+ });
745
+ it('preserves NON_BLOCKING tool declarations for explicit 2.5 live models', async () => {
746
+ const opts = createGeminiOpts({
747
+ enabledTools: ['Read'],
748
+ runtimeModel: 'gemini-2.5-flash-live-preview',
749
+ });
750
+ const mgr = new AudioPipelineManager(opts);
751
+ const { connection } = createMockConnection();
752
+ const toolSchemas = await import('../runtime/openai-tool-schemas.js');
753
+ const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
754
+ await mgr.startPipeline('g1', connection);
755
+ expect(toolSchemas.buildGeminiToolDeclarations).toHaveBeenCalledWith(['Read'], { nonBlocking: true });
756
+ const providerCalls = ProviderMock.mock.calls;
757
+ expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
758
+ model: 'gemini-2.5-flash-live-preview',
759
+ }));
760
+ });
761
+ it('calls provider.disconnect() and responder.destroy() on stopPipeline', async () => {
762
+ const opts = createGeminiOpts();
763
+ const mgr = new AudioPipelineManager(opts);
764
+ const { connection } = createMockConnection();
765
+ await mgr.startPipeline('g1', connection);
766
+ await mgr.stopPipeline('g1');
767
+ expect(mockGeminiResponder.destroy).toHaveBeenCalled();
768
+ expect(mockGeminiProvider.disconnect).toHaveBeenCalled();
769
+ expect(mgr.hasPipeline('g1')).toBe(false);
770
+ });
771
+ it('shim feedAudio bridges to provider.sendAudio', async () => {
772
+ const opts = createGeminiOpts();
773
+ const mgr = new AudioPipelineManager(opts);
774
+ const { connection, speakingEmitter, streams } = createMockConnection();
775
+ await mgr.startPipeline('g1', connection);
776
+ // Simulate a user speaking — trigger the receiver to subscribe
777
+ speakingEmitter.emit('start', '111');
778
+ // Feed a packet through the stream to exercise the shim
779
+ const stream = streams.get('111');
780
+ if (stream) {
781
+ stream.emit('data', Buffer.alloc(80));
782
+ }
783
+ // Allow async processing
784
+ await new Promise((r) => setTimeout(r, 20));
785
+ // The shim feedAudio calls provider.sendAudio
786
+ expect(mockGeminiProvider.sendAudio).toHaveBeenCalled();
787
+ });
788
+ it('shim swallows sendAudio errors without crashing', async () => {
789
+ const log = createLogger();
790
+ const opts = createGeminiOpts({ log });
791
+ const mgr = new AudioPipelineManager(opts);
792
+ const { connection, speakingEmitter, streams } = createMockConnection();
793
+ await mgr.startPipeline('g1', connection);
794
+ // Make sendAudio throw
795
+ mockGeminiProvider.sendAudio.mockImplementation(() => {
796
+ throw new Error('WebSocket not open');
797
+ });
798
+ // Simulate a user speaking
799
+ speakingEmitter.emit('start', '111');
800
+ const stream = streams.get('111');
801
+ if (stream) {
802
+ stream.emit('data', Buffer.alloc(80));
803
+ }
804
+ await new Promise((r) => setTimeout(r, 20));
805
+ // Should have logged a warning but not thrown
806
+ expect(log.warn).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live: sendAudio error (non-fatal)');
807
+ });
808
+ it('signals audioStreamEnd when a user speaking burst ends', async () => {
809
+ const opts = createGeminiOpts();
810
+ const mgr = new AudioPipelineManager(opts);
811
+ const { connection, speakingEmitter, streams } = createMockConnection();
812
+ await mgr.startPipeline('g1', connection);
813
+ speakingEmitter.emit('start', '111');
814
+ streams.get('111').emit('end');
815
+ expect(mockGeminiProvider.sendAudioStreamEnd).toHaveBeenCalled();
816
+ });
817
+ it('throws when geminiApiKey is missing', async () => {
818
+ const log = createLogger();
819
+ const opts = createGeminiOpts({ geminiApiKey: undefined, log });
820
+ const mgr = new AudioPipelineManager(opts);
821
+ const { connection } = createMockConnection();
822
+ await mgr.startPipeline('g1', connection);
823
+ expect(mgr.hasPipeline('g1')).toBe(false);
824
+ expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'failed to start audio pipeline');
825
+ });
826
+ it('default voiceProvider unset uses standard pipeline path', async () => {
827
+ // No voiceProvider set — should use the normal STT path
828
+ const mockStt = createMockStt();
829
+ const opts = createPipelineOpts({ createStt: () => mockStt });
830
+ const mgr = new AudioPipelineManager(opts);
831
+ const { connection } = createMockConnection();
832
+ await mgr.startPipeline('g1', connection);
833
+ expect(mockStt.start).toHaveBeenCalled();
834
+ expect(mgr.hasPipeline('g1')).toBe(true);
835
+ });
836
+ it('wires onBotResponse to transcriptMirror.postBotResponse', async () => {
837
+ const mirror = {
838
+ postUserTranscription: vi.fn(async () => { }),
839
+ postBotResponse: vi.fn(async () => { }),
840
+ };
841
+ const opts = createGeminiOpts({
842
+ transcriptMirror: mirror,
843
+ botDisplayName: 'GeminiBot',
844
+ });
845
+ const mgr = new AudioPipelineManager(opts);
846
+ const { connection } = createMockConnection();
847
+ // Access the GeminiLiveResponder constructor mock to check the onBotResponse option
848
+ const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
849
+ await mgr.startPipeline('g1', connection);
850
+ // Extract the onBotResponse callback passed to GeminiLiveResponder
851
+ const constructorCalls = ResponderMock.mock.calls;
852
+ const lastCall = constructorCalls[constructorCalls.length - 1];
853
+ const responderOpts = lastCall[0];
854
+ expect(responderOpts.onBotResponse).toBeDefined();
855
+ responderOpts.onBotResponse('Hello from Gemini');
856
+ await vi.waitFor(() => {
857
+ expect(mirror.postBotResponse).toHaveBeenCalledWith('GeminiBot', 'Hello from Gemini');
858
+ });
859
+ });
860
+ it('records completed Gemini Live turns in the local conversation buffer', async () => {
861
+ const backfill = vi.fn(async () => []);
862
+ const opts = createGeminiOpts({ backfill });
863
+ const mgr = new AudioPipelineManager(opts);
864
+ const { connection } = createMockConnection();
865
+ const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
866
+ await mgr.startPipeline('g1', connection);
867
+ const constructorCalls = ResponderMock.mock.calls;
868
+ const lastCall = constructorCalls[constructorCalls.length - 1];
869
+ const responderOpts = lastCall[0];
870
+ responderOpts.onInputTranscript?.('hello there');
871
+ responderOpts.onBotResponse?.('general kenobi');
872
+ expect(backfill).toHaveBeenCalled();
873
+ const pipeline = mgr.pipelines.get('g1');
874
+ expect(pipeline?.buffer?.getHistory()).toContain('[User]: hello there');
875
+ expect(pipeline?.buffer?.getHistory()).toContain('[Assistant]: general kenobi');
876
+ });
877
+ it('wires onInputTranscript to transcriptMirror.postUserTranscription', async () => {
878
+ const mirror = {
879
+ postUserTranscription: vi.fn(async () => { }),
880
+ postBotResponse: vi.fn(async () => { }),
881
+ };
882
+ const opts = createGeminiOpts({
883
+ transcriptMirror: mirror,
884
+ });
885
+ const mgr = new AudioPipelineManager(opts);
886
+ const { connection } = createMockConnection();
887
+ const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
888
+ await mgr.startPipeline('g1', connection);
889
+ // Extract the onInputTranscript callback passed to GeminiLiveResponder
890
+ const constructorCalls = ResponderMock.mock.calls;
891
+ const lastCall = constructorCalls[constructorCalls.length - 1];
892
+ const responderOpts = lastCall[0];
893
+ expect(responderOpts.onInputTranscript).toBeDefined();
894
+ responderOpts.onInputTranscript('hello from user');
895
+ await vi.waitFor(() => {
896
+ expect(mirror.postUserTranscription).toHaveBeenCalledWith('User', 'hello from user');
897
+ });
898
+ });
899
+ // -----------------------------------------------------------------------
900
+ // Tool call dispatch tests
901
+ // -----------------------------------------------------------------------
902
+ /** Helper: extract the onToolCall callback from the GeminiLiveResponder constructor mock. */
903
+ async function extractOnToolCall(overrides = {}) {
904
+ const opts = createGeminiOpts({
905
+ enabledTools: ['Read', 'Bash'],
906
+ runtimeCwd: '/fake/cwd',
907
+ ...overrides,
908
+ });
909
+ const mgr = new AudioPipelineManager(opts);
910
+ const { connection } = createMockConnection();
911
+ const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
912
+ await mgr.startPipeline('g1', connection);
913
+ const constructorCalls = ResponderMock.mock.calls;
914
+ const lastCall = constructorCalls[constructorCalls.length - 1];
915
+ const responderOpts = lastCall[0];
916
+ return responderOpts.onToolCall;
917
+ }
918
+ it('dispatches tool call to executor and sends response back via provider', async () => {
919
+ mockExecuteToolCall.mockResolvedValueOnce({ result: 'file contents here', ok: true });
920
+ const onToolCall = await extractOnToolCall();
921
+ onToolCall([{ id: 'tc-1', name: 'Read', args: { file_path: '/foo.txt' } }]);
922
+ await vi.waitFor(() => {
923
+ expect(mockExecuteToolCall).toHaveBeenCalledWith('Read', { file_path: '/foo.txt' }, ['/fake/cwd'], expect.any(Function), expect.objectContaining({ enableHybridPipeline: false }));
924
+ expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
925
+ { id: 'tc-1', name: 'Read', output: 'file contents here' },
926
+ ]);
927
+ });
928
+ });
929
+ it('returns error result string to Gemini when executor throws', async () => {
930
+ mockExecuteToolCall.mockRejectedValueOnce(new Error('permission denied'));
931
+ const onToolCall = await extractOnToolCall();
932
+ onToolCall([{ id: 'tc-err', name: 'Read', args: {} }]);
933
+ await vi.waitFor(() => {
934
+ expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
935
+ { id: 'tc-err', name: 'Read', output: 'Error: permission denied' },
936
+ ]);
937
+ });
938
+ });
939
+ it('returns error for unknown tool name (executor rejects non-allowlisted tool)', async () => {
940
+ mockExecuteToolCall.mockResolvedValueOnce({ result: 'Tool not allowed: UnknownTool', ok: false });
941
+ const onToolCall = await extractOnToolCall();
942
+ onToolCall([{ id: 'tc-unk', name: 'UnknownTool', args: {} }]);
943
+ await vi.waitFor(() => {
944
+ expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
945
+ { id: 'tc-unk', name: 'UnknownTool', output: 'Tool not allowed: UnknownTool' },
946
+ ]);
947
+ });
948
+ });
949
+ it('dispatches multiple function calls and sends all responses in one sendToolResponse', async () => {
950
+ mockExecuteToolCall
951
+ .mockResolvedValueOnce({ result: 'result-A', ok: true })
952
+ .mockResolvedValueOnce({ result: 'result-B', ok: true });
953
+ const onToolCall = await extractOnToolCall();
954
+ onToolCall([
955
+ { id: 'tc-a', name: 'Read', args: { file_path: '/a.txt' } },
956
+ { id: 'tc-b', name: 'Bash', args: { command: 'ls' } },
957
+ ]);
958
+ await vi.waitFor(() => {
959
+ expect(mockExecuteToolCall).toHaveBeenCalledTimes(2);
960
+ expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
961
+ { id: 'tc-a', name: 'Read', output: 'result-A' },
962
+ { id: 'tc-b', name: 'Bash', output: 'result-B' },
963
+ ]);
964
+ });
965
+ });
966
+ it('catches and logs sendToolResponse throw without crashing', async () => {
967
+ mockExecuteToolCall.mockResolvedValueOnce({ result: 'ok', ok: true });
968
+ const log = createLogger();
969
+ const onToolCall = await extractOnToolCall({ log });
970
+ mockGeminiProvider.sendToolResponse.mockImplementation(() => {
971
+ throw new Error('WebSocket closed');
972
+ });
973
+ onToolCall([{ id: 'tc-disc', name: 'Read', args: {} }]);
974
+ await vi.waitFor(() => {
975
+ expect(log.warn).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live: sendToolResponse failed (provider likely disconnected)');
976
+ });
977
+ });
978
+ // -----------------------------------------------------------------------
979
+ // Scheduled tool responses
980
+ // -----------------------------------------------------------------------
981
+ it('ignores silent tool scheduling on the default 3.1 live model', async () => {
982
+ mockExecuteToolCall.mockResolvedValue({ result: 'memory contents', ok: true });
983
+ const log = createLogger();
984
+ const opts = createGeminiOpts({
985
+ log,
986
+ enabledTools: ['Read', 'MemoryQuery'],
987
+ silentTools: ['MemoryQuery'],
988
+ runtimeCwd: '/fake/cwd',
989
+ });
990
+ const mgr = new AudioPipelineManager(opts);
991
+ const { connection } = createMockConnection();
992
+ const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
993
+ await mgr.startPipeline('g1', connection);
994
+ const constructorCalls = ResponderMock.mock.calls;
995
+ const lastCall = constructorCalls[constructorCalls.length - 1];
996
+ const responderOpts = lastCall[0];
997
+ // Dispatch a SILENT tool call
998
+ responderOpts.onToolCall([{ id: 'tc-silent', name: 'MemoryQuery', args: { key: 'test' } }]);
999
+ await vi.waitFor(() => {
1000
+ expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
1001
+ { id: 'tc-silent', name: 'MemoryQuery', output: 'memory contents' },
1002
+ ]);
1003
+ });
1004
+ expect(log.info).toHaveBeenCalledWith(expect.objectContaining({
1005
+ guildId: 'g1',
1006
+ model: 'gemini-3.1-flash-live-preview',
1007
+ count: 1,
1008
+ }), 'gemini-live: current model does not support scheduled tool responses; silent tool scheduling disabled');
1009
+ });
1010
+ it('sends SILENT-scheduled tool responses for explicit 2.5 live models', async () => {
1011
+ mockExecuteToolCall
1012
+ .mockResolvedValueOnce({ result: 'memory data', ok: true });
1013
+ const log = createLogger();
1014
+ const opts = createGeminiOpts({
1015
+ log,
1016
+ enabledTools: ['MemoryQuery'],
1017
+ silentTools: ['MemoryQuery'],
1018
+ runtimeCwd: '/fake/cwd',
1019
+ runtimeModel: 'gemini-2.5-flash-live-preview',
1020
+ });
1021
+ const mgr = new AudioPipelineManager(opts);
1022
+ const { connection } = createMockConnection();
1023
+ const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
1024
+ await mgr.startPipeline('g1', connection);
1025
+ const constructorCalls = ResponderMock.mock.calls;
1026
+ const lastCall = constructorCalls[constructorCalls.length - 1];
1027
+ const responderOpts = lastCall[0];
1028
+ responderOpts.onToolCall([
1029
+ { id: 'tc-mem', name: 'MemoryQuery', args: { key: 'test' } },
1030
+ ]);
1031
+ await vi.waitFor(() => {
1032
+ expect(mockExecuteToolCall).toHaveBeenCalledTimes(1);
1033
+ expect(log.info).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1', count: 1 }), 'gemini-live: SILENT tool execution complete — results scheduled silently');
1034
+ expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
1035
+ { id: 'tc-mem', name: 'MemoryQuery', output: 'memory data', scheduling: 'SILENT' },
1036
+ ]);
1037
+ });
1038
+ });
1039
+ // -----------------------------------------------------------------------
1040
+ // Fallback to standard pipeline
1041
+ // -----------------------------------------------------------------------
1042
+ it('does not fall back when initial gemini-live connection fails (fallback disabled)', async () => {
1043
+ const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
1044
+ // Make the next provider's connect() reject
1045
+ ProviderMock.mockImplementationOnce(() => {
1046
+ mockGeminiProvider = {
1047
+ connect: vi.fn(async () => { throw new Error('connection refused'); }),
1048
+ disconnect: vi.fn(async () => { }),
1049
+ sendAudio: vi.fn(),
1050
+ sendAudioStreamEnd: vi.fn(),
1051
+ sendInitialHistory: vi.fn(),
1052
+ sendToolResponse: vi.fn(),
1053
+ onEvent: vi.fn(),
1054
+ state: 'idle',
1055
+ };
1056
+ return mockGeminiProvider;
1057
+ });
1058
+ const log = createLogger();
1059
+ const opts = createGeminiOpts({ log });
1060
+ const mgr = new AudioPipelineManager(opts);
1061
+ const { connection } = createMockConnection();
1062
+ await mgr.startPipeline('g1', connection);
1063
+ expect(mgr.hasPipeline('g1')).toBe(false);
1064
+ expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live: connection failed — no fallback (fallback disabled)');
1065
+ });
1066
+ it('does not fall back when onSessionTerminated is triggered (fallback disabled)', async () => {
1067
+ const log = createLogger();
1068
+ const opts = createGeminiOpts({ log });
1069
+ const mgr = new AudioPipelineManager(opts);
1070
+ const { connection } = createMockConnection();
1071
+ const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
1072
+ await mgr.startPipeline('g1', connection);
1073
+ expect(mgr.pipelineMode('g1')).toBe('gemini-live');
1074
+ // Extract onSessionTerminated callback
1075
+ const constructorCalls = ResponderMock.mock.calls;
1076
+ const lastCall = constructorCalls[constructorCalls.length - 1];
1077
+ const responderOpts = lastCall[0];
1078
+ expect(responderOpts.onSessionTerminated).toBeDefined();
1079
+ responderOpts.onSessionTerminated();
1080
+ // Pipeline should NOT switch to standard mode — it stays as gemini-live (or gets stopped)
1081
+ expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live session terminally failed — no fallback (fallback disabled)');
1082
+ });
1083
+ it('does not fall back when onFallbackRecommended is triggered (fallback disabled)', async () => {
1084
+ const log = createLogger();
1085
+ const opts = createGeminiOpts({ log });
1086
+ const mgr = new AudioPipelineManager(opts);
1087
+ const { connection } = createMockConnection();
1088
+ const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
1089
+ await mgr.startPipeline('g1', connection);
1090
+ // Extract onFallbackRecommended callback
1091
+ const constructorCalls = ResponderMock.mock.calls;
1092
+ const lastCall = constructorCalls[constructorCalls.length - 1];
1093
+ const responderOpts = lastCall[0];
1094
+ expect(responderOpts.onFallbackRecommended).toBeDefined();
1095
+ responderOpts.onFallbackRecommended('exhausted reconnect retries');
1096
+ // Pipeline should NOT switch to standard mode
1097
+ expect(log.warn).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1', reason: 'exhausted reconnect retries' }), 'gemini-live: fallback recommended but fallback is disabled');
1098
+ });
1099
+ });
619
1100
  });
@@ -22,6 +22,7 @@ export class AudioReceiver {
22
22
  log;
23
23
  createDecoder;
24
24
  onUserSpeaking;
25
+ onUserSilence;
25
26
  decoders = new Map();
26
27
  running = false;
27
28
  constructor(opts) {
@@ -31,6 +32,7 @@ export class AudioReceiver {
31
32
  this.log = opts.log;
32
33
  this.createDecoder = opts.createDecoder;
33
34
  this.onUserSpeaking = opts.onUserSpeaking;
35
+ this.onUserSilence = opts.onUserSilence;
34
36
  }
35
37
  /** Begin listening for audio from allowlisted users. */
36
38
  start() {
@@ -122,6 +124,12 @@ export class AudioReceiver {
122
124
  decoder.destroy();
123
125
  this.decoders.delete(userId);
124
126
  this.log.info({ userId }, 'cleaned up user audio decoder');
127
+ try {
128
+ this.onUserSilence?.(userId);
129
+ }
130
+ catch (err) {
131
+ this.log.error({ err, userId }, 'onUserSilence callback error');
132
+ }
125
133
  }
126
134
  }
127
135
  }
@@ -224,6 +224,22 @@ describe('AudioReceiver', () => {
224
224
  expect(decoder.destroy).toHaveBeenCalled();
225
225
  expect(log.info).toHaveBeenCalledWith({ userId: '111' }, 'cleaned up user audio decoder');
226
226
  });
227
+ it('calls onUserSilence when a speaking burst ends', () => {
228
+ const { connection, speakingEmitter, streams } = createMockConnection();
229
+ const onUserSilence = vi.fn();
230
+ const recv = new AudioReceiver({
231
+ connection,
232
+ allowedUserIds: new Set(['111']),
233
+ sttProvider: createMockStt(),
234
+ log: createLogger(),
235
+ createDecoder: createMockDecoder,
236
+ onUserSilence,
237
+ });
238
+ recv.start();
239
+ speakingEmitter.emit('start', '111');
240
+ streams.get('111').emit('end');
241
+ expect(onUserSilence).toHaveBeenCalledWith('111');
242
+ });
227
243
  it('cleans up decoder on stream error', () => {
228
244
  const { connection, speakingEmitter, streams } = createMockConnection();
229
245
  const decoder = createMockDecoder();
@@ -45,18 +45,28 @@ export class ConversationBuffer {
45
45
  }
46
46
  }
47
47
  /**
48
- * Format the stored turns as a conversation log string.
49
- * Returns empty string when the buffer is empty.
48
+ * Return the stored turns from oldest to newest.
49
+ * Useful for replaying history into providers that support explicit seeding.
50
50
  */
51
- getHistory() {
51
+ toTurns() {
52
52
  if (this.count === 0)
53
- return '';
54
- const lines = [];
55
- // Read from oldest to newest.
53
+ return [];
54
+ const turns = [];
56
55
  const start = this.count < CAPACITY ? 0 : this.head;
57
56
  for (let i = 0; i < this.count; i++) {
58
57
  const idx = (start + i) % CAPACITY;
59
58
  const turn = this.buffer[idx];
59
+ turns.push({ user: turn.user, assistant: turn.assistant });
60
+ }
61
+ return turns;
62
+ }
63
+ /**
64
+ * Format the stored turns as a conversation log string.
65
+ * Returns empty string when the buffer is empty.
66
+ */
67
+ getHistory() {
68
+ const lines = [];
69
+ for (const turn of this.toTurns()) {
60
70
  lines.push(`[User]: ${turn.user}`);
61
71
  lines.push(`[Assistant]: ${turn.assistant}`);
62
72
  }