@livekit/agents 1.0.45 → 1.0.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +14 -20
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +14 -20
- package/dist/cli.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +14 -5
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +14 -5
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/llm/chat_context.cjs +19 -0
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +4 -0
- package/dist/llm/chat_context.d.ts +4 -0
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +19 -0
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/provider_format/index.cjs +2 -0
- package/dist/llm/provider_format/index.cjs.map +1 -1
- package/dist/llm/provider_format/index.d.cts +1 -1
- package/dist/llm/provider_format/index.d.ts +1 -1
- package/dist/llm/provider_format/index.d.ts.map +1 -1
- package/dist/llm/provider_format/index.js +6 -1
- package/dist/llm/provider_format/index.js.map +1 -1
- package/dist/llm/provider_format/openai.cjs +82 -2
- package/dist/llm/provider_format/openai.cjs.map +1 -1
- package/dist/llm/provider_format/openai.d.cts +1 -0
- package/dist/llm/provider_format/openai.d.ts +1 -0
- package/dist/llm/provider_format/openai.d.ts.map +1 -1
- package/dist/llm/provider_format/openai.js +80 -1
- package/dist/llm/provider_format/openai.js.map +1 -1
- package/dist/llm/provider_format/openai.test.cjs +326 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -1
- package/dist/llm/provider_format/openai.test.js +327 -1
- package/dist/llm/provider_format/openai.test.js.map +1 -1
- package/dist/llm/provider_format/utils.cjs +4 -3
- package/dist/llm/provider_format/utils.cjs.map +1 -1
- package/dist/llm/provider_format/utils.d.ts.map +1 -1
- package/dist/llm/provider_format/utils.js +4 -3
- package/dist/llm/provider_format/utils.js.map +1 -1
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +1 -0
- package/dist/llm/realtime.d.ts +1 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js.map +1 -1
- package/dist/log.cjs +5 -2
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +5 -2
- package/dist/log.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +15 -6
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +15 -6
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stream/index.cjs +3 -0
- package/dist/stream/index.cjs.map +1 -1
- package/dist/stream/index.d.cts +1 -0
- package/dist/stream/index.d.ts +1 -0
- package/dist/stream/index.d.ts.map +1 -1
- package/dist/stream/index.js +2 -0
- package/dist/stream/index.js.map +1 -1
- package/dist/stream/multi_input_stream.cjs +139 -0
- package/dist/stream/multi_input_stream.cjs.map +1 -0
- package/dist/stream/multi_input_stream.d.cts +55 -0
- package/dist/stream/multi_input_stream.d.ts +55 -0
- package/dist/stream/multi_input_stream.d.ts.map +1 -0
- package/dist/stream/multi_input_stream.js +115 -0
- package/dist/stream/multi_input_stream.js.map +1 -0
- package/dist/stream/multi_input_stream.test.cjs +340 -0
- package/dist/stream/multi_input_stream.test.cjs.map +1 -0
- package/dist/stream/multi_input_stream.test.js +339 -0
- package/dist/stream/multi_input_stream.test.js.map +1 -0
- package/dist/telemetry/trace_types.cjs +42 -0
- package/dist/telemetry/trace_types.cjs.map +1 -1
- package/dist/telemetry/trace_types.d.cts +14 -0
- package/dist/telemetry/trace_types.d.ts +14 -0
- package/dist/telemetry/trace_types.d.ts.map +1 -1
- package/dist/telemetry/trace_types.js +28 -0
- package/dist/telemetry/trace_types.js.map +1 -1
- package/dist/utils.cjs +44 -2
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +8 -0
- package/dist/utils.d.ts +8 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +44 -2
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.cjs +71 -0
- package/dist/utils.test.cjs.map +1 -1
- package/dist/utils.test.js +71 -0
- package/dist/utils.test.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.cjs.map +1 -1
- package/dist/version.d.cts +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.d.ts.map +1 -1
- package/dist/version.js +1 -1
- package/dist/version.js.map +1 -1
- package/dist/voice/agent.cjs +144 -12
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +29 -4
- package/dist/voice/agent.d.ts +29 -4
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +140 -11
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent.test.cjs +120 -0
- package/dist/voice/agent.test.cjs.map +1 -1
- package/dist/voice/agent.test.js +122 -2
- package/dist/voice/agent.test.js.map +1 -1
- package/dist/voice/agent_activity.cjs +402 -292
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +35 -7
- package/dist/voice/agent_activity.d.ts +35 -7
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +402 -287
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +156 -44
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +22 -9
- package/dist/voice/agent_session.d.ts +22 -9
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +156 -44
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +89 -36
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +22 -1
- package/dist/voice/audio_recognition.d.ts +22 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +93 -36
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/audio_recognition_span.test.cjs +233 -0
- package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
- package/dist/voice/audio_recognition_span.test.js +232 -0
- package/dist/voice/audio_recognition_span.test.js.map +1 -0
- package/dist/voice/generation.cjs +39 -19
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +44 -20
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -1
- package/dist/voice/index.d.ts +1 -1
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +2 -1
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/io.cjs +6 -3
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +3 -2
- package/dist/voice/io.d.ts +3 -2
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js +6 -3
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/recorder_io/recorder_io.cjs +3 -1
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
- package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
- package/dist/voice/recorder_io/recorder_io.js +3 -1
- package/dist/voice/recorder_io/recorder_io.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +17 -17
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.cts +2 -2
- package/dist/voice/room_io/_input.d.ts +2 -2
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +7 -6
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +9 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +3 -1
- package/dist/voice/room_io/room_io.d.ts +3 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +9 -0
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/speech_handle.cjs +7 -1
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +2 -0
- package/dist/voice/speech_handle.d.ts +2 -0
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +8 -2
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/voice/testing/run_result.cjs +66 -15
- package/dist/voice/testing/run_result.cjs.map +1 -1
- package/dist/voice/testing/run_result.d.cts +14 -3
- package/dist/voice/testing/run_result.d.ts +14 -3
- package/dist/voice/testing/run_result.d.ts.map +1 -1
- package/dist/voice/testing/run_result.js +66 -15
- package/dist/voice/testing/run_result.js.map +1 -1
- package/dist/voice/utils.cjs +47 -0
- package/dist/voice/utils.cjs.map +1 -0
- package/dist/voice/utils.d.cts +4 -0
- package/dist/voice/utils.d.ts +4 -0
- package/dist/voice/utils.d.ts.map +1 -0
- package/dist/voice/utils.js +23 -0
- package/dist/voice/utils.js.map +1 -0
- package/package.json +1 -1
- package/src/cli.ts +20 -33
- package/src/ipc/job_proc_lazy_main.ts +16 -5
- package/src/llm/chat_context.ts +35 -0
- package/src/llm/provider_format/index.ts +7 -2
- package/src/llm/provider_format/openai.test.ts +385 -1
- package/src/llm/provider_format/openai.ts +103 -0
- package/src/llm/provider_format/utils.ts +6 -4
- package/src/llm/realtime.ts +1 -0
- package/src/log.ts +5 -2
- package/src/stream/deferred_stream.ts +17 -6
- package/src/stream/index.ts +1 -0
- package/src/stream/multi_input_stream.test.ts +540 -0
- package/src/stream/multi_input_stream.ts +172 -0
- package/src/telemetry/trace_types.ts +18 -0
- package/src/utils.test.ts +87 -0
- package/src/utils.ts +52 -2
- package/src/version.ts +1 -1
- package/src/voice/agent.test.ts +140 -2
- package/src/voice/agent.ts +189 -10
- package/src/voice/agent_activity.ts +449 -286
- package/src/voice/agent_session.ts +195 -51
- package/src/voice/audio_recognition.ts +118 -38
- package/src/voice/audio_recognition_span.test.ts +261 -0
- package/src/voice/generation.ts +52 -23
- package/src/voice/index.ts +1 -1
- package/src/voice/io.ts +7 -4
- package/src/voice/recorder_io/recorder_io.ts +2 -1
- package/src/voice/room_io/_input.ts +11 -7
- package/src/voice/room_io/room_io.ts +12 -0
- package/src/voice/speech_handle.ts +9 -2
- package/src/voice/testing/run_result.ts +81 -23
- package/src/voice/utils.ts +29 -0
|
@@ -11,7 +11,7 @@ import {
|
|
|
11
11
|
FunctionCallOutput,
|
|
12
12
|
} from '../chat_context.js';
|
|
13
13
|
import { serializeImage } from '../utils.js';
|
|
14
|
-
import { toChatCtx } from './openai.js';
|
|
14
|
+
import { toChatCtx, toResponsesChatCtx } from './openai.js';
|
|
15
15
|
|
|
16
16
|
// Mock the serializeImage function
|
|
17
17
|
vi.mock('../utils.js', () => ({
|
|
@@ -673,3 +673,387 @@ describe('toChatCtx', () => {
|
|
|
673
673
|
]);
|
|
674
674
|
});
|
|
675
675
|
});
|
|
676
|
+
|
|
677
|
+
describe('toResponsesChatCtx', () => {
|
|
678
|
+
const serializeImageMock = vi.mocked(serializeImage);
|
|
679
|
+
|
|
680
|
+
initializeLogger({ level: 'silent', pretty: false });
|
|
681
|
+
|
|
682
|
+
beforeEach(async () => {
|
|
683
|
+
vi.clearAllMocks();
|
|
684
|
+
});
|
|
685
|
+
|
|
686
|
+
it('should convert simple text messages', async () => {
|
|
687
|
+
const ctx = ChatContext.empty();
|
|
688
|
+
ctx.addMessage({ role: 'user', content: 'Hello' });
|
|
689
|
+
ctx.addMessage({ role: 'assistant', content: 'Hi there!' });
|
|
690
|
+
|
|
691
|
+
const result = await toResponsesChatCtx(ctx);
|
|
692
|
+
|
|
693
|
+
expect(result).toHaveLength(2);
|
|
694
|
+
expect(result[0]).toEqual({ role: 'user', content: 'Hello' });
|
|
695
|
+
expect(result[1]).toEqual({ role: 'assistant', content: 'Hi there!' });
|
|
696
|
+
});
|
|
697
|
+
|
|
698
|
+
it('should handle system messages', async () => {
|
|
699
|
+
const ctx = ChatContext.empty();
|
|
700
|
+
ctx.addMessage({ role: 'system', content: 'You are a helpful assistant' });
|
|
701
|
+
ctx.addMessage({ role: 'user', content: 'Hello' });
|
|
702
|
+
|
|
703
|
+
const result = await toResponsesChatCtx(ctx);
|
|
704
|
+
|
|
705
|
+
expect(result).toHaveLength(2);
|
|
706
|
+
expect(result[0]).toEqual({ role: 'system', content: 'You are a helpful assistant' });
|
|
707
|
+
expect(result[1]).toEqual({ role: 'user', content: 'Hello' });
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
it('should handle multi-line text content', async () => {
|
|
711
|
+
const ctx = ChatContext.empty();
|
|
712
|
+
ctx.addMessage({ role: 'user', content: ['Line 1', 'Line 2', 'Line 3'] });
|
|
713
|
+
|
|
714
|
+
const result = await toResponsesChatCtx(ctx);
|
|
715
|
+
|
|
716
|
+
expect(result).toHaveLength(1);
|
|
717
|
+
expect(result[0]).toEqual({ role: 'user', content: 'Line 1\nLine 2\nLine 3' });
|
|
718
|
+
});
|
|
719
|
+
|
|
720
|
+
it('should convert images to input_image format with external URL', async () => {
|
|
721
|
+
serializeImageMock.mockResolvedValue({
|
|
722
|
+
inferenceDetail: 'high',
|
|
723
|
+
externalUrl: 'https://example.com/image.jpg',
|
|
724
|
+
});
|
|
725
|
+
|
|
726
|
+
const ctx = ChatContext.empty();
|
|
727
|
+
ctx.addMessage({
|
|
728
|
+
role: 'user',
|
|
729
|
+
content: [
|
|
730
|
+
{
|
|
731
|
+
id: 'img1',
|
|
732
|
+
type: 'image_content',
|
|
733
|
+
image: 'https://example.com/image.jpg',
|
|
734
|
+
inferenceDetail: 'high',
|
|
735
|
+
_cache: {},
|
|
736
|
+
},
|
|
737
|
+
],
|
|
738
|
+
});
|
|
739
|
+
|
|
740
|
+
const result = await toResponsesChatCtx(ctx);
|
|
741
|
+
|
|
742
|
+
expect(result).toEqual([
|
|
743
|
+
{
|
|
744
|
+
role: 'user',
|
|
745
|
+
content: [
|
|
746
|
+
{
|
|
747
|
+
type: 'input_image',
|
|
748
|
+
image_url: 'https://example.com/image.jpg',
|
|
749
|
+
detail: 'high',
|
|
750
|
+
},
|
|
751
|
+
],
|
|
752
|
+
},
|
|
753
|
+
]);
|
|
754
|
+
});
|
|
755
|
+
|
|
756
|
+
it('should convert images to input_image format with base64 data', async () => {
|
|
757
|
+
serializeImageMock.mockResolvedValue({
|
|
758
|
+
inferenceDetail: 'auto',
|
|
759
|
+
mimeType: 'image/png',
|
|
760
|
+
base64Data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB',
|
|
761
|
+
});
|
|
762
|
+
|
|
763
|
+
const ctx = ChatContext.empty();
|
|
764
|
+
ctx.addMessage({
|
|
765
|
+
role: 'user',
|
|
766
|
+
content: [
|
|
767
|
+
{
|
|
768
|
+
id: 'img1',
|
|
769
|
+
type: 'image_content',
|
|
770
|
+
image: '',
|
|
771
|
+
inferenceDetail: 'auto',
|
|
772
|
+
_cache: {},
|
|
773
|
+
},
|
|
774
|
+
],
|
|
775
|
+
});
|
|
776
|
+
|
|
777
|
+
const result = await toResponsesChatCtx(ctx);
|
|
778
|
+
|
|
779
|
+
expect(result).toEqual([
|
|
780
|
+
{
|
|
781
|
+
role: 'user',
|
|
782
|
+
content: [
|
|
783
|
+
{
|
|
784
|
+
type: 'input_image',
|
|
785
|
+
image_url: '',
|
|
786
|
+
detail: 'auto',
|
|
787
|
+
},
|
|
788
|
+
],
|
|
789
|
+
},
|
|
790
|
+
]);
|
|
791
|
+
});
|
|
792
|
+
|
|
793
|
+
it('should handle mixed content with text and image using input_text', async () => {
|
|
794
|
+
serializeImageMock.mockResolvedValue({
|
|
795
|
+
inferenceDetail: 'high',
|
|
796
|
+
externalUrl: 'https://example.com/image.jpg',
|
|
797
|
+
});
|
|
798
|
+
|
|
799
|
+
const ctx = ChatContext.empty();
|
|
800
|
+
ctx.addMessage({
|
|
801
|
+
role: 'user',
|
|
802
|
+
content: [
|
|
803
|
+
'Check this out:',
|
|
804
|
+
{
|
|
805
|
+
id: 'img1',
|
|
806
|
+
type: 'image_content',
|
|
807
|
+
image: 'https://example.com/image.jpg',
|
|
808
|
+
inferenceDetail: 'high',
|
|
809
|
+
_cache: {},
|
|
810
|
+
},
|
|
811
|
+
],
|
|
812
|
+
});
|
|
813
|
+
|
|
814
|
+
const result = await toResponsesChatCtx(ctx);
|
|
815
|
+
|
|
816
|
+
expect(result).toEqual([
|
|
817
|
+
{
|
|
818
|
+
role: 'user',
|
|
819
|
+
content: [
|
|
820
|
+
{
|
|
821
|
+
type: 'input_image',
|
|
822
|
+
image_url: 'https://example.com/image.jpg',
|
|
823
|
+
detail: 'high',
|
|
824
|
+
},
|
|
825
|
+
{ type: 'input_text', text: 'Check this out:' },
|
|
826
|
+
],
|
|
827
|
+
},
|
|
828
|
+
]);
|
|
829
|
+
});
|
|
830
|
+
|
|
831
|
+
it('should handle tool calls as top-level function_call items', async () => {
|
|
832
|
+
const ctx = ChatContext.empty();
|
|
833
|
+
|
|
834
|
+
const msg = ctx.addMessage({ role: 'assistant', content: 'Let me help you.' });
|
|
835
|
+
const toolCall = FunctionCall.create({
|
|
836
|
+
id: msg.id + '/tool_1',
|
|
837
|
+
callId: 'call_123',
|
|
838
|
+
name: 'get_weather',
|
|
839
|
+
args: '{"location": "Paris"}',
|
|
840
|
+
});
|
|
841
|
+
const toolOutput = FunctionCallOutput.create({
|
|
842
|
+
callId: 'call_123',
|
|
843
|
+
output: '{"temperature": 20}',
|
|
844
|
+
isError: false,
|
|
845
|
+
});
|
|
846
|
+
|
|
847
|
+
ctx.insert([toolCall, toolOutput]);
|
|
848
|
+
|
|
849
|
+
const result = await toResponsesChatCtx(ctx);
|
|
850
|
+
|
|
851
|
+
expect(result).toEqual([
|
|
852
|
+
{ role: 'assistant', content: 'Let me help you.' },
|
|
853
|
+
{
|
|
854
|
+
type: 'function_call',
|
|
855
|
+
call_id: 'call_123',
|
|
856
|
+
name: 'get_weather',
|
|
857
|
+
arguments: '{"location": "Paris"}',
|
|
858
|
+
},
|
|
859
|
+
{
|
|
860
|
+
type: 'function_call_output',
|
|
861
|
+
call_id: 'call_123',
|
|
862
|
+
output: '{"temperature": 20}',
|
|
863
|
+
},
|
|
864
|
+
]);
|
|
865
|
+
});
|
|
866
|
+
|
|
867
|
+
it('should handle tool calls without an accompanying message', async () => {
|
|
868
|
+
const ctx = ChatContext.empty();
|
|
869
|
+
|
|
870
|
+
const toolCall = new FunctionCall({
|
|
871
|
+
id: 'func_1',
|
|
872
|
+
callId: 'call_456',
|
|
873
|
+
name: 'calculate',
|
|
874
|
+
args: '{"a": 5, "b": 3}',
|
|
875
|
+
});
|
|
876
|
+
const toolOutput = new FunctionCallOutput({
|
|
877
|
+
callId: 'call_456',
|
|
878
|
+
output: '{"result": 8}',
|
|
879
|
+
isError: false,
|
|
880
|
+
});
|
|
881
|
+
|
|
882
|
+
ctx.insert([toolCall, toolOutput]);
|
|
883
|
+
|
|
884
|
+
const result = await toResponsesChatCtx(ctx);
|
|
885
|
+
|
|
886
|
+
expect(result).toEqual([
|
|
887
|
+
{
|
|
888
|
+
type: 'function_call',
|
|
889
|
+
call_id: 'call_456',
|
|
890
|
+
name: 'calculate',
|
|
891
|
+
arguments: '{"a": 5, "b": 3}',
|
|
892
|
+
},
|
|
893
|
+
{
|
|
894
|
+
type: 'function_call_output',
|
|
895
|
+
call_id: 'call_456',
|
|
896
|
+
output: '{"result": 8}',
|
|
897
|
+
},
|
|
898
|
+
]);
|
|
899
|
+
});
|
|
900
|
+
|
|
901
|
+
it('should handle multiple tool calls as separate function_call items', async () => {
|
|
902
|
+
const ctx = ChatContext.empty();
|
|
903
|
+
|
|
904
|
+
const msg = ctx.addMessage({ role: 'assistant', content: "I'll check both." });
|
|
905
|
+
const toolCall1 = new FunctionCall({
|
|
906
|
+
id: msg.id + '/tool_1',
|
|
907
|
+
callId: 'call_1',
|
|
908
|
+
name: 'get_weather',
|
|
909
|
+
args: '{"location": "NYC"}',
|
|
910
|
+
});
|
|
911
|
+
const toolCall2 = new FunctionCall({
|
|
912
|
+
id: msg.id + '/tool_2',
|
|
913
|
+
callId: 'call_2',
|
|
914
|
+
name: 'get_weather',
|
|
915
|
+
args: '{"location": "LA"}',
|
|
916
|
+
});
|
|
917
|
+
const toolOutput1 = new FunctionCallOutput({
|
|
918
|
+
callId: 'call_1',
|
|
919
|
+
output: '{"temperature": 65}',
|
|
920
|
+
isError: false,
|
|
921
|
+
});
|
|
922
|
+
const toolOutput2 = new FunctionCallOutput({
|
|
923
|
+
callId: 'call_2',
|
|
924
|
+
output: '{"temperature": 78}',
|
|
925
|
+
isError: false,
|
|
926
|
+
});
|
|
927
|
+
|
|
928
|
+
ctx.insert([toolCall1, toolCall2, toolOutput1, toolOutput2]);
|
|
929
|
+
|
|
930
|
+
const result = await toResponsesChatCtx(ctx);
|
|
931
|
+
|
|
932
|
+
expect(result).toEqual([
|
|
933
|
+
{ role: 'assistant', content: "I'll check both." },
|
|
934
|
+
{
|
|
935
|
+
type: 'function_call',
|
|
936
|
+
call_id: 'call_1',
|
|
937
|
+
name: 'get_weather',
|
|
938
|
+
arguments: '{"location": "NYC"}',
|
|
939
|
+
},
|
|
940
|
+
{
|
|
941
|
+
type: 'function_call',
|
|
942
|
+
call_id: 'call_2',
|
|
943
|
+
name: 'get_weather',
|
|
944
|
+
arguments: '{"location": "LA"}',
|
|
945
|
+
},
|
|
946
|
+
{
|
|
947
|
+
type: 'function_call_output',
|
|
948
|
+
call_id: 'call_1',
|
|
949
|
+
output: '{"temperature": 65}',
|
|
950
|
+
},
|
|
951
|
+
{
|
|
952
|
+
type: 'function_call_output',
|
|
953
|
+
call_id: 'call_2',
|
|
954
|
+
output: '{"temperature": 78}',
|
|
955
|
+
},
|
|
956
|
+
]);
|
|
957
|
+
});
|
|
958
|
+
|
|
959
|
+
it('should skip empty groups', async () => {
|
|
960
|
+
const ctx = ChatContext.empty();
|
|
961
|
+
ctx.addMessage({ role: 'user', content: 'Hello', createdAt: 1000 });
|
|
962
|
+
|
|
963
|
+
const orphanOutput = new FunctionCallOutput({
|
|
964
|
+
callId: 'orphan_call',
|
|
965
|
+
output: 'This should be ignored',
|
|
966
|
+
isError: false,
|
|
967
|
+
createdAt: 2000,
|
|
968
|
+
});
|
|
969
|
+
ctx.insert(orphanOutput);
|
|
970
|
+
|
|
971
|
+
ctx.addMessage({ role: 'assistant', content: 'Hi!', createdAt: 3000 });
|
|
972
|
+
|
|
973
|
+
const result = await toResponsesChatCtx(ctx);
|
|
974
|
+
|
|
975
|
+
expect(result).toHaveLength(2);
|
|
976
|
+
expect(result).toContainEqual({ role: 'user', content: 'Hello' });
|
|
977
|
+
expect(result).toContainEqual({ role: 'assistant', content: 'Hi!' });
|
|
978
|
+
});
|
|
979
|
+
|
|
980
|
+
it('should filter out agent handoff items', async () => {
|
|
981
|
+
const ctx = ChatContext.empty();
|
|
982
|
+
|
|
983
|
+
ctx.addMessage({ role: 'user', content: 'Hello' });
|
|
984
|
+
ctx.insert(new AgentHandoffItem({ oldAgentId: 'agent_1', newAgentId: 'agent_2' }));
|
|
985
|
+
ctx.addMessage({ role: 'assistant', content: 'Hi there!' });
|
|
986
|
+
|
|
987
|
+
const result = await toResponsesChatCtx(ctx);
|
|
988
|
+
|
|
989
|
+
expect(result).toEqual([
|
|
990
|
+
{ role: 'user', content: 'Hello' },
|
|
991
|
+
{ role: 'assistant', content: 'Hi there!' },
|
|
992
|
+
]);
|
|
993
|
+
});
|
|
994
|
+
|
|
995
|
+
it('should cache serialized images', async () => {
|
|
996
|
+
serializeImageMock.mockResolvedValue({
|
|
997
|
+
inferenceDetail: 'high',
|
|
998
|
+
mimeType: 'image/png',
|
|
999
|
+
base64Data: 'cached-data',
|
|
1000
|
+
});
|
|
1001
|
+
|
|
1002
|
+
const imageContent = {
|
|
1003
|
+
id: 'img1',
|
|
1004
|
+
type: 'image_content' as const,
|
|
1005
|
+
image: 'https://example.com/image.jpg',
|
|
1006
|
+
inferenceDetail: 'high' as const,
|
|
1007
|
+
_cache: {},
|
|
1008
|
+
};
|
|
1009
|
+
|
|
1010
|
+
const ctx = ChatContext.empty();
|
|
1011
|
+
ctx.addMessage({ role: 'user', content: [imageContent] });
|
|
1012
|
+
|
|
1013
|
+
await toResponsesChatCtx(ctx);
|
|
1014
|
+
await toResponsesChatCtx(ctx);
|
|
1015
|
+
|
|
1016
|
+
expect(serializeImageMock).toHaveBeenCalledTimes(1);
|
|
1017
|
+
expect(imageContent._cache).toHaveProperty('serialized_image');
|
|
1018
|
+
});
|
|
1019
|
+
|
|
1020
|
+
it('should throw error for unsupported content type', async () => {
|
|
1021
|
+
const ctx = ChatContext.empty();
|
|
1022
|
+
ctx.addMessage({
|
|
1023
|
+
role: 'user',
|
|
1024
|
+
content: [
|
|
1025
|
+
{
|
|
1026
|
+
type: 'audio_content',
|
|
1027
|
+
frame: [],
|
|
1028
|
+
},
|
|
1029
|
+
],
|
|
1030
|
+
});
|
|
1031
|
+
|
|
1032
|
+
await expect(toResponsesChatCtx(ctx)).rejects.toThrow(
|
|
1033
|
+
'Unsupported content type: audio_content',
|
|
1034
|
+
);
|
|
1035
|
+
});
|
|
1036
|
+
|
|
1037
|
+
it('should throw error when serialized image has no data', async () => {
|
|
1038
|
+
serializeImageMock.mockResolvedValue({
|
|
1039
|
+
inferenceDetail: 'high',
|
|
1040
|
+
// No base64Data or externalUrl
|
|
1041
|
+
});
|
|
1042
|
+
|
|
1043
|
+
const ctx = ChatContext.empty();
|
|
1044
|
+
ctx.addMessage({
|
|
1045
|
+
role: 'user',
|
|
1046
|
+
content: [
|
|
1047
|
+
{
|
|
1048
|
+
id: 'img1',
|
|
1049
|
+
type: 'image_content',
|
|
1050
|
+
image: 'invalid-image',
|
|
1051
|
+
inferenceDetail: 'high',
|
|
1052
|
+
_cache: {},
|
|
1053
|
+
},
|
|
1054
|
+
],
|
|
1055
|
+
});
|
|
1056
|
+
|
|
1057
|
+
await expect(toResponsesChatCtx(ctx)).rejects.toThrow('Serialized image has no data bytes');
|
|
1058
|
+
});
|
|
1059
|
+
});
|
|
@@ -144,3 +144,106 @@ async function toImageContent(content: ImageContent) {
|
|
|
144
144
|
},
|
|
145
145
|
};
|
|
146
146
|
}
|
|
147
|
+
|
|
148
|
+
async function toResponsesImageContent(content: ImageContent) {
|
|
149
|
+
const cacheKey = 'serialized_image';
|
|
150
|
+
let serialized: SerializedImage;
|
|
151
|
+
|
|
152
|
+
if (content._cache[cacheKey] === undefined) {
|
|
153
|
+
serialized = await serializeImage(content);
|
|
154
|
+
content._cache[cacheKey] = serialized;
|
|
155
|
+
}
|
|
156
|
+
serialized = content._cache[cacheKey];
|
|
157
|
+
|
|
158
|
+
if (serialized.externalUrl) {
|
|
159
|
+
return {
|
|
160
|
+
type: 'input_image' as const,
|
|
161
|
+
image_url: serialized.externalUrl,
|
|
162
|
+
detail: serialized.inferenceDetail,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (serialized.base64Data === undefined) {
|
|
167
|
+
throw new Error('Serialized image has no data bytes');
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
type: 'input_image' as const,
|
|
172
|
+
image_url: `data:${serialized.mimeType};base64,${serialized.base64Data}`,
|
|
173
|
+
detail: serialized.inferenceDetail,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export async function toResponsesChatCtx(
|
|
178
|
+
chatCtx: ChatContext,
|
|
179
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
180
|
+
injectDummyUserMessage: boolean = true,
|
|
181
|
+
) {
|
|
182
|
+
const itemGroups = groupToolCalls(chatCtx);
|
|
183
|
+
const messages: Record<string, any>[] = []; // eslint-disable-line @typescript-eslint/no-explicit-any
|
|
184
|
+
|
|
185
|
+
for (const group of itemGroups) {
|
|
186
|
+
if (group.isEmpty) continue;
|
|
187
|
+
|
|
188
|
+
if (group.message) {
|
|
189
|
+
messages.push(await toResponsesChatItem(group.message));
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
for (const toolCall of group.toolCalls) {
|
|
193
|
+
messages.push({
|
|
194
|
+
type: 'function_call',
|
|
195
|
+
call_id: toolCall.callId,
|
|
196
|
+
name: toolCall.name,
|
|
197
|
+
arguments: toolCall.args,
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
for (const toolOutput of group.toolOutputs) {
|
|
202
|
+
messages.push(await toResponsesChatItem(toolOutput));
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return messages;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async function toResponsesChatItem(item: ChatItem) {
|
|
210
|
+
if (item.type === 'message') {
|
|
211
|
+
const listContent: Record<string, any>[] = []; // eslint-disable-line @typescript-eslint/no-explicit-any
|
|
212
|
+
let textContent = '';
|
|
213
|
+
|
|
214
|
+
for (const content of item.content) {
|
|
215
|
+
if (typeof content === 'string') {
|
|
216
|
+
if (textContent) textContent += '\n';
|
|
217
|
+
textContent += content;
|
|
218
|
+
} else if (content.type === 'image_content') {
|
|
219
|
+
listContent.push(await toResponsesImageContent(content));
|
|
220
|
+
} else {
|
|
221
|
+
throw new Error(`Unsupported content type: ${content.type}`);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const content =
|
|
226
|
+
listContent.length == 0
|
|
227
|
+
? textContent
|
|
228
|
+
: textContent.length == 0
|
|
229
|
+
? listContent
|
|
230
|
+
: [...listContent, { type: 'input_text', text: textContent }];
|
|
231
|
+
|
|
232
|
+
return { role: item.role, content };
|
|
233
|
+
} else if (item.type === 'function_call') {
|
|
234
|
+
return {
|
|
235
|
+
type: 'function_call',
|
|
236
|
+
call_id: item.callId,
|
|
237
|
+
name: item.name,
|
|
238
|
+
arguments: item.args,
|
|
239
|
+
};
|
|
240
|
+
} else if (item.type === 'function_call_output') {
|
|
241
|
+
return {
|
|
242
|
+
type: 'function_call_output',
|
|
243
|
+
call_id: item.callId,
|
|
244
|
+
output: item.output,
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
throw new Error(`Unsupported item type: ${item['type']}`);
|
|
249
|
+
}
|
|
@@ -56,12 +56,14 @@ class ChatItemGroup {
|
|
|
56
56
|
}
|
|
57
57
|
|
|
58
58
|
removeInvalidToolCalls() {
|
|
59
|
-
if (this.toolCalls.length === this.toolOutputs.length) {
|
|
60
|
-
return;
|
|
61
|
-
}
|
|
62
|
-
|
|
63
59
|
const toolCallIds = new Set(this.toolCalls.map((call) => call.callId));
|
|
64
60
|
const toolOutputIds = new Set(this.toolOutputs.map((output) => output.callId));
|
|
61
|
+
const sameIds =
|
|
62
|
+
toolCallIds.size === toolOutputIds.size &&
|
|
63
|
+
[...toolCallIds].every((id) => toolOutputIds.has(id));
|
|
64
|
+
if (this.toolCalls.length === this.toolOutputs.length && sameIds) {
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
65
67
|
|
|
66
68
|
// intersection of tool call ids and tool output ids
|
|
67
69
|
const validCallIds = intersection(toolCallIds, toolOutputIds);
|
package/src/llm/realtime.ts
CHANGED
package/src/log.ts
CHANGED
|
@@ -44,7 +44,7 @@ export const log = () => {
|
|
|
44
44
|
export const initializeLogger = ({ pretty, level }: LoggerOptions) => {
|
|
45
45
|
globals[LOGGER_OPTIONS_KEY] = { pretty, level };
|
|
46
46
|
globals[LOGGER_KEY] = pino(
|
|
47
|
-
{ level: level || 'info' },
|
|
47
|
+
{ level: level || 'info', serializers: { error: pino.stdSerializers.err } },
|
|
48
48
|
pretty ? pinoPretty({ colorize: true }) : process.stdout,
|
|
49
49
|
);
|
|
50
50
|
};
|
|
@@ -90,5 +90,8 @@ export const enableOtelLogging = () => {
|
|
|
90
90
|
{ stream: new OtelDestination(), level: 'debug' },
|
|
91
91
|
];
|
|
92
92
|
|
|
93
|
-
globals[LOGGER_KEY] = pino(
|
|
93
|
+
globals[LOGGER_KEY] = pino(
|
|
94
|
+
{ level: logLevel, serializers: { error: pino.stdSerializers.err } },
|
|
95
|
+
multistream(streams),
|
|
96
|
+
);
|
|
94
97
|
};
|
|
@@ -59,16 +59,17 @@ export class DeferredReadableStream<T> {
|
|
|
59
59
|
throw new Error('Stream source already set');
|
|
60
60
|
}
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
this.
|
|
62
|
+
const sourceReader = source.getReader();
|
|
63
|
+
this.sourceReader = sourceReader;
|
|
64
|
+
void this.pump(sourceReader);
|
|
64
65
|
}
|
|
65
66
|
|
|
66
|
-
private async pump() {
|
|
67
|
+
private async pump(sourceReader: ReadableStreamDefaultReader<T>) {
|
|
67
68
|
let sourceError: unknown;
|
|
68
69
|
|
|
69
70
|
try {
|
|
70
71
|
while (true) {
|
|
71
|
-
const { done, value } = await
|
|
72
|
+
const { done, value } = await sourceReader.read();
|
|
72
73
|
if (done) break;
|
|
73
74
|
await this.writer.write(value);
|
|
74
75
|
}
|
|
@@ -81,7 +82,7 @@ export class DeferredReadableStream<T> {
|
|
|
81
82
|
// any other error from source will be propagated to the consumer
|
|
82
83
|
if (sourceError) {
|
|
83
84
|
try {
|
|
84
|
-
this.writer.abort(sourceError);
|
|
85
|
+
await this.writer.abort(sourceError);
|
|
85
86
|
} catch (e) {
|
|
86
87
|
// ignore if writer is already closed
|
|
87
88
|
}
|
|
@@ -118,10 +119,20 @@ export class DeferredReadableStream<T> {
|
|
|
118
119
|
return;
|
|
119
120
|
}
|
|
120
121
|
|
|
122
|
+
const sourceReader = this.sourceReader!;
|
|
123
|
+
// Clear source first so future setSource() calls can reattach cleanly.
|
|
124
|
+
this.sourceReader = undefined;
|
|
125
|
+
|
|
121
126
|
// release lock will make any pending read() throw TypeError
|
|
122
127
|
// which are expected, and we intentionally catch those error
|
|
123
128
|
// using isStreamReaderReleaseError
|
|
124
129
|
// this will unblock any pending read() inside the async for loop
|
|
125
|
-
|
|
130
|
+
try {
|
|
131
|
+
sourceReader.releaseLock();
|
|
132
|
+
} catch (e) {
|
|
133
|
+
if (!isStreamReaderReleaseError(e)) {
|
|
134
|
+
throw e;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
126
137
|
}
|
|
127
138
|
}
|
package/src/stream/index.ts
CHANGED
|
@@ -4,4 +4,5 @@
|
|
|
4
4
|
export { DeferredReadableStream } from './deferred_stream.js';
|
|
5
5
|
export { IdentityTransform } from './identity_transform.js';
|
|
6
6
|
export { mergeReadableStreams } from './merge_readable_streams.js';
|
|
7
|
+
export { MultiInputStream } from './multi_input_stream.js';
|
|
7
8
|
export { createStreamChannel, type StreamChannel } from './stream_channel.js';
|