@livekit/agents 1.0.45 → 1.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/dist/cli.cjs +14 -20
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +14 -20
  5. package/dist/cli.js.map +1 -1
  6. package/dist/ipc/job_proc_lazy_main.cjs +14 -5
  7. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  8. package/dist/ipc/job_proc_lazy_main.js +14 -5
  9. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  10. package/dist/llm/chat_context.cjs +19 -0
  11. package/dist/llm/chat_context.cjs.map +1 -1
  12. package/dist/llm/chat_context.d.cts +4 -0
  13. package/dist/llm/chat_context.d.ts +4 -0
  14. package/dist/llm/chat_context.d.ts.map +1 -1
  15. package/dist/llm/chat_context.js +19 -0
  16. package/dist/llm/chat_context.js.map +1 -1
  17. package/dist/llm/provider_format/index.cjs +2 -0
  18. package/dist/llm/provider_format/index.cjs.map +1 -1
  19. package/dist/llm/provider_format/index.d.cts +1 -1
  20. package/dist/llm/provider_format/index.d.ts +1 -1
  21. package/dist/llm/provider_format/index.d.ts.map +1 -1
  22. package/dist/llm/provider_format/index.js +6 -1
  23. package/dist/llm/provider_format/index.js.map +1 -1
  24. package/dist/llm/provider_format/openai.cjs +82 -2
  25. package/dist/llm/provider_format/openai.cjs.map +1 -1
  26. package/dist/llm/provider_format/openai.d.cts +1 -0
  27. package/dist/llm/provider_format/openai.d.ts +1 -0
  28. package/dist/llm/provider_format/openai.d.ts.map +1 -1
  29. package/dist/llm/provider_format/openai.js +80 -1
  30. package/dist/llm/provider_format/openai.js.map +1 -1
  31. package/dist/llm/provider_format/openai.test.cjs +326 -0
  32. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  33. package/dist/llm/provider_format/openai.test.js +327 -1
  34. package/dist/llm/provider_format/openai.test.js.map +1 -1
  35. package/dist/llm/provider_format/utils.cjs +4 -3
  36. package/dist/llm/provider_format/utils.cjs.map +1 -1
  37. package/dist/llm/provider_format/utils.d.ts.map +1 -1
  38. package/dist/llm/provider_format/utils.js +4 -3
  39. package/dist/llm/provider_format/utils.js.map +1 -1
  40. package/dist/llm/realtime.cjs.map +1 -1
  41. package/dist/llm/realtime.d.cts +1 -0
  42. package/dist/llm/realtime.d.ts +1 -0
  43. package/dist/llm/realtime.d.ts.map +1 -1
  44. package/dist/llm/realtime.js.map +1 -1
  45. package/dist/log.cjs +5 -2
  46. package/dist/log.cjs.map +1 -1
  47. package/dist/log.d.ts.map +1 -1
  48. package/dist/log.js +5 -2
  49. package/dist/log.js.map +1 -1
  50. package/dist/stream/deferred_stream.cjs +15 -6
  51. package/dist/stream/deferred_stream.cjs.map +1 -1
  52. package/dist/stream/deferred_stream.d.ts.map +1 -1
  53. package/dist/stream/deferred_stream.js +15 -6
  54. package/dist/stream/deferred_stream.js.map +1 -1
  55. package/dist/stream/index.cjs +3 -0
  56. package/dist/stream/index.cjs.map +1 -1
  57. package/dist/stream/index.d.cts +1 -0
  58. package/dist/stream/index.d.ts +1 -0
  59. package/dist/stream/index.d.ts.map +1 -1
  60. package/dist/stream/index.js +2 -0
  61. package/dist/stream/index.js.map +1 -1
  62. package/dist/stream/multi_input_stream.cjs +139 -0
  63. package/dist/stream/multi_input_stream.cjs.map +1 -0
  64. package/dist/stream/multi_input_stream.d.cts +55 -0
  65. package/dist/stream/multi_input_stream.d.ts +55 -0
  66. package/dist/stream/multi_input_stream.d.ts.map +1 -0
  67. package/dist/stream/multi_input_stream.js +115 -0
  68. package/dist/stream/multi_input_stream.js.map +1 -0
  69. package/dist/stream/multi_input_stream.test.cjs +340 -0
  70. package/dist/stream/multi_input_stream.test.cjs.map +1 -0
  71. package/dist/stream/multi_input_stream.test.js +339 -0
  72. package/dist/stream/multi_input_stream.test.js.map +1 -0
  73. package/dist/telemetry/trace_types.cjs +42 -0
  74. package/dist/telemetry/trace_types.cjs.map +1 -1
  75. package/dist/telemetry/trace_types.d.cts +14 -0
  76. package/dist/telemetry/trace_types.d.ts +14 -0
  77. package/dist/telemetry/trace_types.d.ts.map +1 -1
  78. package/dist/telemetry/trace_types.js +28 -0
  79. package/dist/telemetry/trace_types.js.map +1 -1
  80. package/dist/utils.cjs +44 -2
  81. package/dist/utils.cjs.map +1 -1
  82. package/dist/utils.d.cts +8 -0
  83. package/dist/utils.d.ts +8 -0
  84. package/dist/utils.d.ts.map +1 -1
  85. package/dist/utils.js +44 -2
  86. package/dist/utils.js.map +1 -1
  87. package/dist/utils.test.cjs +71 -0
  88. package/dist/utils.test.cjs.map +1 -1
  89. package/dist/utils.test.js +71 -0
  90. package/dist/utils.test.js.map +1 -1
  91. package/dist/version.cjs +1 -1
  92. package/dist/version.cjs.map +1 -1
  93. package/dist/version.d.cts +1 -1
  94. package/dist/version.d.ts +1 -1
  95. package/dist/version.d.ts.map +1 -1
  96. package/dist/version.js +1 -1
  97. package/dist/version.js.map +1 -1
  98. package/dist/voice/agent.cjs +144 -12
  99. package/dist/voice/agent.cjs.map +1 -1
  100. package/dist/voice/agent.d.cts +29 -4
  101. package/dist/voice/agent.d.ts +29 -4
  102. package/dist/voice/agent.d.ts.map +1 -1
  103. package/dist/voice/agent.js +140 -11
  104. package/dist/voice/agent.js.map +1 -1
  105. package/dist/voice/agent.test.cjs +120 -0
  106. package/dist/voice/agent.test.cjs.map +1 -1
  107. package/dist/voice/agent.test.js +122 -2
  108. package/dist/voice/agent.test.js.map +1 -1
  109. package/dist/voice/agent_activity.cjs +402 -292
  110. package/dist/voice/agent_activity.cjs.map +1 -1
  111. package/dist/voice/agent_activity.d.cts +35 -7
  112. package/dist/voice/agent_activity.d.ts +35 -7
  113. package/dist/voice/agent_activity.d.ts.map +1 -1
  114. package/dist/voice/agent_activity.js +402 -287
  115. package/dist/voice/agent_activity.js.map +1 -1
  116. package/dist/voice/agent_session.cjs +156 -44
  117. package/dist/voice/agent_session.cjs.map +1 -1
  118. package/dist/voice/agent_session.d.cts +22 -9
  119. package/dist/voice/agent_session.d.ts +22 -9
  120. package/dist/voice/agent_session.d.ts.map +1 -1
  121. package/dist/voice/agent_session.js +156 -44
  122. package/dist/voice/agent_session.js.map +1 -1
  123. package/dist/voice/audio_recognition.cjs +89 -36
  124. package/dist/voice/audio_recognition.cjs.map +1 -1
  125. package/dist/voice/audio_recognition.d.cts +22 -1
  126. package/dist/voice/audio_recognition.d.ts +22 -1
  127. package/dist/voice/audio_recognition.d.ts.map +1 -1
  128. package/dist/voice/audio_recognition.js +93 -36
  129. package/dist/voice/audio_recognition.js.map +1 -1
  130. package/dist/voice/audio_recognition_span.test.cjs +233 -0
  131. package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
  132. package/dist/voice/audio_recognition_span.test.js +232 -0
  133. package/dist/voice/audio_recognition_span.test.js.map +1 -0
  134. package/dist/voice/generation.cjs +39 -19
  135. package/dist/voice/generation.cjs.map +1 -1
  136. package/dist/voice/generation.d.ts.map +1 -1
  137. package/dist/voice/generation.js +44 -20
  138. package/dist/voice/generation.js.map +1 -1
  139. package/dist/voice/index.cjs +2 -0
  140. package/dist/voice/index.cjs.map +1 -1
  141. package/dist/voice/index.d.cts +1 -1
  142. package/dist/voice/index.d.ts +1 -1
  143. package/dist/voice/index.d.ts.map +1 -1
  144. package/dist/voice/index.js +2 -1
  145. package/dist/voice/index.js.map +1 -1
  146. package/dist/voice/io.cjs +6 -3
  147. package/dist/voice/io.cjs.map +1 -1
  148. package/dist/voice/io.d.cts +3 -2
  149. package/dist/voice/io.d.ts +3 -2
  150. package/dist/voice/io.d.ts.map +1 -1
  151. package/dist/voice/io.js +6 -3
  152. package/dist/voice/io.js.map +1 -1
  153. package/dist/voice/recorder_io/recorder_io.cjs +3 -1
  154. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
  155. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
  156. package/dist/voice/recorder_io/recorder_io.js +3 -1
  157. package/dist/voice/recorder_io/recorder_io.js.map +1 -1
  158. package/dist/voice/room_io/_input.cjs +17 -17
  159. package/dist/voice/room_io/_input.cjs.map +1 -1
  160. package/dist/voice/room_io/_input.d.cts +2 -2
  161. package/dist/voice/room_io/_input.d.ts +2 -2
  162. package/dist/voice/room_io/_input.d.ts.map +1 -1
  163. package/dist/voice/room_io/_input.js +7 -6
  164. package/dist/voice/room_io/_input.js.map +1 -1
  165. package/dist/voice/room_io/room_io.cjs +9 -0
  166. package/dist/voice/room_io/room_io.cjs.map +1 -1
  167. package/dist/voice/room_io/room_io.d.cts +3 -1
  168. package/dist/voice/room_io/room_io.d.ts +3 -1
  169. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  170. package/dist/voice/room_io/room_io.js +9 -0
  171. package/dist/voice/room_io/room_io.js.map +1 -1
  172. package/dist/voice/speech_handle.cjs +7 -1
  173. package/dist/voice/speech_handle.cjs.map +1 -1
  174. package/dist/voice/speech_handle.d.cts +2 -0
  175. package/dist/voice/speech_handle.d.ts +2 -0
  176. package/dist/voice/speech_handle.d.ts.map +1 -1
  177. package/dist/voice/speech_handle.js +8 -2
  178. package/dist/voice/speech_handle.js.map +1 -1
  179. package/dist/voice/testing/run_result.cjs +66 -15
  180. package/dist/voice/testing/run_result.cjs.map +1 -1
  181. package/dist/voice/testing/run_result.d.cts +14 -3
  182. package/dist/voice/testing/run_result.d.ts +14 -3
  183. package/dist/voice/testing/run_result.d.ts.map +1 -1
  184. package/dist/voice/testing/run_result.js +66 -15
  185. package/dist/voice/testing/run_result.js.map +1 -1
  186. package/dist/voice/utils.cjs +47 -0
  187. package/dist/voice/utils.cjs.map +1 -0
  188. package/dist/voice/utils.d.cts +4 -0
  189. package/dist/voice/utils.d.ts +4 -0
  190. package/dist/voice/utils.d.ts.map +1 -0
  191. package/dist/voice/utils.js +23 -0
  192. package/dist/voice/utils.js.map +1 -0
  193. package/package.json +1 -1
  194. package/src/cli.ts +20 -33
  195. package/src/ipc/job_proc_lazy_main.ts +16 -5
  196. package/src/llm/chat_context.ts +35 -0
  197. package/src/llm/provider_format/index.ts +7 -2
  198. package/src/llm/provider_format/openai.test.ts +385 -1
  199. package/src/llm/provider_format/openai.ts +103 -0
  200. package/src/llm/provider_format/utils.ts +6 -4
  201. package/src/llm/realtime.ts +1 -0
  202. package/src/log.ts +5 -2
  203. package/src/stream/deferred_stream.ts +17 -6
  204. package/src/stream/index.ts +1 -0
  205. package/src/stream/multi_input_stream.test.ts +540 -0
  206. package/src/stream/multi_input_stream.ts +172 -0
  207. package/src/telemetry/trace_types.ts +18 -0
  208. package/src/utils.test.ts +87 -0
  209. package/src/utils.ts +52 -2
  210. package/src/version.ts +1 -1
  211. package/src/voice/agent.test.ts +140 -2
  212. package/src/voice/agent.ts +189 -10
  213. package/src/voice/agent_activity.ts +449 -286
  214. package/src/voice/agent_session.ts +195 -51
  215. package/src/voice/audio_recognition.ts +118 -38
  216. package/src/voice/audio_recognition_span.test.ts +261 -0
  217. package/src/voice/generation.ts +52 -23
  218. package/src/voice/index.ts +1 -1
  219. package/src/voice/io.ts +7 -4
  220. package/src/voice/recorder_io/recorder_io.ts +2 -1
  221. package/src/voice/room_io/_input.ts +11 -7
  222. package/src/voice/room_io/room_io.ts +12 -0
  223. package/src/voice/speech_handle.ts +9 -2
  224. package/src/voice/testing/run_result.ts +81 -23
  225. package/src/voice/utils.ts +29 -0
@@ -11,7 +11,7 @@ import {
11
11
  FunctionCallOutput,
12
12
  } from '../chat_context.js';
13
13
  import { serializeImage } from '../utils.js';
14
- import { toChatCtx } from './openai.js';
14
+ import { toChatCtx, toResponsesChatCtx } from './openai.js';
15
15
 
16
16
  // Mock the serializeImage function
17
17
  vi.mock('../utils.js', () => ({
@@ -673,3 +673,387 @@ describe('toChatCtx', () => {
673
673
  ]);
674
674
  });
675
675
  });
676
+
677
+ describe('toResponsesChatCtx', () => {
678
+ const serializeImageMock = vi.mocked(serializeImage);
679
+
680
+ initializeLogger({ level: 'silent', pretty: false });
681
+
682
+ beforeEach(async () => {
683
+ vi.clearAllMocks();
684
+ });
685
+
686
+ it('should convert simple text messages', async () => {
687
+ const ctx = ChatContext.empty();
688
+ ctx.addMessage({ role: 'user', content: 'Hello' });
689
+ ctx.addMessage({ role: 'assistant', content: 'Hi there!' });
690
+
691
+ const result = await toResponsesChatCtx(ctx);
692
+
693
+ expect(result).toHaveLength(2);
694
+ expect(result[0]).toEqual({ role: 'user', content: 'Hello' });
695
+ expect(result[1]).toEqual({ role: 'assistant', content: 'Hi there!' });
696
+ });
697
+
698
+ it('should handle system messages', async () => {
699
+ const ctx = ChatContext.empty();
700
+ ctx.addMessage({ role: 'system', content: 'You are a helpful assistant' });
701
+ ctx.addMessage({ role: 'user', content: 'Hello' });
702
+
703
+ const result = await toResponsesChatCtx(ctx);
704
+
705
+ expect(result).toHaveLength(2);
706
+ expect(result[0]).toEqual({ role: 'system', content: 'You are a helpful assistant' });
707
+ expect(result[1]).toEqual({ role: 'user', content: 'Hello' });
708
+ });
709
+
710
+ it('should handle multi-line text content', async () => {
711
+ const ctx = ChatContext.empty();
712
+ ctx.addMessage({ role: 'user', content: ['Line 1', 'Line 2', 'Line 3'] });
713
+
714
+ const result = await toResponsesChatCtx(ctx);
715
+
716
+ expect(result).toHaveLength(1);
717
+ expect(result[0]).toEqual({ role: 'user', content: 'Line 1\nLine 2\nLine 3' });
718
+ });
719
+
720
+ it('should convert images to input_image format with external URL', async () => {
721
+ serializeImageMock.mockResolvedValue({
722
+ inferenceDetail: 'high',
723
+ externalUrl: 'https://example.com/image.jpg',
724
+ });
725
+
726
+ const ctx = ChatContext.empty();
727
+ ctx.addMessage({
728
+ role: 'user',
729
+ content: [
730
+ {
731
+ id: 'img1',
732
+ type: 'image_content',
733
+ image: 'https://example.com/image.jpg',
734
+ inferenceDetail: 'high',
735
+ _cache: {},
736
+ },
737
+ ],
738
+ });
739
+
740
+ const result = await toResponsesChatCtx(ctx);
741
+
742
+ expect(result).toEqual([
743
+ {
744
+ role: 'user',
745
+ content: [
746
+ {
747
+ type: 'input_image',
748
+ image_url: 'https://example.com/image.jpg',
749
+ detail: 'high',
750
+ },
751
+ ],
752
+ },
753
+ ]);
754
+ });
755
+
756
+ it('should convert images to input_image format with base64 data', async () => {
757
+ serializeImageMock.mockResolvedValue({
758
+ inferenceDetail: 'auto',
759
+ mimeType: 'image/png',
760
+ base64Data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB',
761
+ });
762
+
763
+ const ctx = ChatContext.empty();
764
+ ctx.addMessage({
765
+ role: 'user',
766
+ content: [
767
+ {
768
+ id: 'img1',
769
+ type: 'image_content',
770
+ image: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB',
771
+ inferenceDetail: 'auto',
772
+ _cache: {},
773
+ },
774
+ ],
775
+ });
776
+
777
+ const result = await toResponsesChatCtx(ctx);
778
+
779
+ expect(result).toEqual([
780
+ {
781
+ role: 'user',
782
+ content: [
783
+ {
784
+ type: 'input_image',
785
+ image_url: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB',
786
+ detail: 'auto',
787
+ },
788
+ ],
789
+ },
790
+ ]);
791
+ });
792
+
793
+ it('should handle mixed content with text and image using input_text', async () => {
794
+ serializeImageMock.mockResolvedValue({
795
+ inferenceDetail: 'high',
796
+ externalUrl: 'https://example.com/image.jpg',
797
+ });
798
+
799
+ const ctx = ChatContext.empty();
800
+ ctx.addMessage({
801
+ role: 'user',
802
+ content: [
803
+ 'Check this out:',
804
+ {
805
+ id: 'img1',
806
+ type: 'image_content',
807
+ image: 'https://example.com/image.jpg',
808
+ inferenceDetail: 'high',
809
+ _cache: {},
810
+ },
811
+ ],
812
+ });
813
+
814
+ const result = await toResponsesChatCtx(ctx);
815
+
816
+ expect(result).toEqual([
817
+ {
818
+ role: 'user',
819
+ content: [
820
+ {
821
+ type: 'input_image',
822
+ image_url: 'https://example.com/image.jpg',
823
+ detail: 'high',
824
+ },
825
+ { type: 'input_text', text: 'Check this out:' },
826
+ ],
827
+ },
828
+ ]);
829
+ });
830
+
831
+ it('should handle tool calls as top-level function_call items', async () => {
832
+ const ctx = ChatContext.empty();
833
+
834
+ const msg = ctx.addMessage({ role: 'assistant', content: 'Let me help you.' });
835
+ const toolCall = FunctionCall.create({
836
+ id: msg.id + '/tool_1',
837
+ callId: 'call_123',
838
+ name: 'get_weather',
839
+ args: '{"location": "Paris"}',
840
+ });
841
+ const toolOutput = FunctionCallOutput.create({
842
+ callId: 'call_123',
843
+ output: '{"temperature": 20}',
844
+ isError: false,
845
+ });
846
+
847
+ ctx.insert([toolCall, toolOutput]);
848
+
849
+ const result = await toResponsesChatCtx(ctx);
850
+
851
+ expect(result).toEqual([
852
+ { role: 'assistant', content: 'Let me help you.' },
853
+ {
854
+ type: 'function_call',
855
+ call_id: 'call_123',
856
+ name: 'get_weather',
857
+ arguments: '{"location": "Paris"}',
858
+ },
859
+ {
860
+ type: 'function_call_output',
861
+ call_id: 'call_123',
862
+ output: '{"temperature": 20}',
863
+ },
864
+ ]);
865
+ });
866
+
867
+ it('should handle tool calls without an accompanying message', async () => {
868
+ const ctx = ChatContext.empty();
869
+
870
+ const toolCall = new FunctionCall({
871
+ id: 'func_1',
872
+ callId: 'call_456',
873
+ name: 'calculate',
874
+ args: '{"a": 5, "b": 3}',
875
+ });
876
+ const toolOutput = new FunctionCallOutput({
877
+ callId: 'call_456',
878
+ output: '{"result": 8}',
879
+ isError: false,
880
+ });
881
+
882
+ ctx.insert([toolCall, toolOutput]);
883
+
884
+ const result = await toResponsesChatCtx(ctx);
885
+
886
+ expect(result).toEqual([
887
+ {
888
+ type: 'function_call',
889
+ call_id: 'call_456',
890
+ name: 'calculate',
891
+ arguments: '{"a": 5, "b": 3}',
892
+ },
893
+ {
894
+ type: 'function_call_output',
895
+ call_id: 'call_456',
896
+ output: '{"result": 8}',
897
+ },
898
+ ]);
899
+ });
900
+
901
+ it('should handle multiple tool calls as separate function_call items', async () => {
902
+ const ctx = ChatContext.empty();
903
+
904
+ const msg = ctx.addMessage({ role: 'assistant', content: "I'll check both." });
905
+ const toolCall1 = new FunctionCall({
906
+ id: msg.id + '/tool_1',
907
+ callId: 'call_1',
908
+ name: 'get_weather',
909
+ args: '{"location": "NYC"}',
910
+ });
911
+ const toolCall2 = new FunctionCall({
912
+ id: msg.id + '/tool_2',
913
+ callId: 'call_2',
914
+ name: 'get_weather',
915
+ args: '{"location": "LA"}',
916
+ });
917
+ const toolOutput1 = new FunctionCallOutput({
918
+ callId: 'call_1',
919
+ output: '{"temperature": 65}',
920
+ isError: false,
921
+ });
922
+ const toolOutput2 = new FunctionCallOutput({
923
+ callId: 'call_2',
924
+ output: '{"temperature": 78}',
925
+ isError: false,
926
+ });
927
+
928
+ ctx.insert([toolCall1, toolCall2, toolOutput1, toolOutput2]);
929
+
930
+ const result = await toResponsesChatCtx(ctx);
931
+
932
+ expect(result).toEqual([
933
+ { role: 'assistant', content: "I'll check both." },
934
+ {
935
+ type: 'function_call',
936
+ call_id: 'call_1',
937
+ name: 'get_weather',
938
+ arguments: '{"location": "NYC"}',
939
+ },
940
+ {
941
+ type: 'function_call',
942
+ call_id: 'call_2',
943
+ name: 'get_weather',
944
+ arguments: '{"location": "LA"}',
945
+ },
946
+ {
947
+ type: 'function_call_output',
948
+ call_id: 'call_1',
949
+ output: '{"temperature": 65}',
950
+ },
951
+ {
952
+ type: 'function_call_output',
953
+ call_id: 'call_2',
954
+ output: '{"temperature": 78}',
955
+ },
956
+ ]);
957
+ });
958
+
959
+ it('should skip empty groups', async () => {
960
+ const ctx = ChatContext.empty();
961
+ ctx.addMessage({ role: 'user', content: 'Hello', createdAt: 1000 });
962
+
963
+ const orphanOutput = new FunctionCallOutput({
964
+ callId: 'orphan_call',
965
+ output: 'This should be ignored',
966
+ isError: false,
967
+ createdAt: 2000,
968
+ });
969
+ ctx.insert(orphanOutput);
970
+
971
+ ctx.addMessage({ role: 'assistant', content: 'Hi!', createdAt: 3000 });
972
+
973
+ const result = await toResponsesChatCtx(ctx);
974
+
975
+ expect(result).toHaveLength(2);
976
+ expect(result).toContainEqual({ role: 'user', content: 'Hello' });
977
+ expect(result).toContainEqual({ role: 'assistant', content: 'Hi!' });
978
+ });
979
+
980
+ it('should filter out agent handoff items', async () => {
981
+ const ctx = ChatContext.empty();
982
+
983
+ ctx.addMessage({ role: 'user', content: 'Hello' });
984
+ ctx.insert(new AgentHandoffItem({ oldAgentId: 'agent_1', newAgentId: 'agent_2' }));
985
+ ctx.addMessage({ role: 'assistant', content: 'Hi there!' });
986
+
987
+ const result = await toResponsesChatCtx(ctx);
988
+
989
+ expect(result).toEqual([
990
+ { role: 'user', content: 'Hello' },
991
+ { role: 'assistant', content: 'Hi there!' },
992
+ ]);
993
+ });
994
+
995
+ it('should cache serialized images', async () => {
996
+ serializeImageMock.mockResolvedValue({
997
+ inferenceDetail: 'high',
998
+ mimeType: 'image/png',
999
+ base64Data: 'cached-data',
1000
+ });
1001
+
1002
+ const imageContent = {
1003
+ id: 'img1',
1004
+ type: 'image_content' as const,
1005
+ image: 'https://example.com/image.jpg',
1006
+ inferenceDetail: 'high' as const,
1007
+ _cache: {},
1008
+ };
1009
+
1010
+ const ctx = ChatContext.empty();
1011
+ ctx.addMessage({ role: 'user', content: [imageContent] });
1012
+
1013
+ await toResponsesChatCtx(ctx);
1014
+ await toResponsesChatCtx(ctx);
1015
+
1016
+ expect(serializeImageMock).toHaveBeenCalledTimes(1);
1017
+ expect(imageContent._cache).toHaveProperty('serialized_image');
1018
+ });
1019
+
1020
+ it('should throw error for unsupported content type', async () => {
1021
+ const ctx = ChatContext.empty();
1022
+ ctx.addMessage({
1023
+ role: 'user',
1024
+ content: [
1025
+ {
1026
+ type: 'audio_content',
1027
+ frame: [],
1028
+ },
1029
+ ],
1030
+ });
1031
+
1032
+ await expect(toResponsesChatCtx(ctx)).rejects.toThrow(
1033
+ 'Unsupported content type: audio_content',
1034
+ );
1035
+ });
1036
+
1037
+ it('should throw error when serialized image has no data', async () => {
1038
+ serializeImageMock.mockResolvedValue({
1039
+ inferenceDetail: 'high',
1040
+ // No base64Data or externalUrl
1041
+ });
1042
+
1043
+ const ctx = ChatContext.empty();
1044
+ ctx.addMessage({
1045
+ role: 'user',
1046
+ content: [
1047
+ {
1048
+ id: 'img1',
1049
+ type: 'image_content',
1050
+ image: 'invalid-image',
1051
+ inferenceDetail: 'high',
1052
+ _cache: {},
1053
+ },
1054
+ ],
1055
+ });
1056
+
1057
+ await expect(toResponsesChatCtx(ctx)).rejects.toThrow('Serialized image has no data bytes');
1058
+ });
1059
+ });
@@ -144,3 +144,106 @@ async function toImageContent(content: ImageContent) {
144
144
  },
145
145
  };
146
146
  }
147
+
148
+ async function toResponsesImageContent(content: ImageContent) {
149
+ const cacheKey = 'serialized_image';
150
+ let serialized: SerializedImage;
151
+
152
+ if (content._cache[cacheKey] === undefined) {
153
+ serialized = await serializeImage(content);
154
+ content._cache[cacheKey] = serialized;
155
+ }
156
+ serialized = content._cache[cacheKey];
157
+
158
+ if (serialized.externalUrl) {
159
+ return {
160
+ type: 'input_image' as const,
161
+ image_url: serialized.externalUrl,
162
+ detail: serialized.inferenceDetail,
163
+ };
164
+ }
165
+
166
+ if (serialized.base64Data === undefined) {
167
+ throw new Error('Serialized image has no data bytes');
168
+ }
169
+
170
+ return {
171
+ type: 'input_image' as const,
172
+ image_url: `data:${serialized.mimeType};base64,${serialized.base64Data}`,
173
+ detail: serialized.inferenceDetail,
174
+ };
175
+ }
176
+
177
+ export async function toResponsesChatCtx(
178
+ chatCtx: ChatContext,
179
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
180
+ injectDummyUserMessage: boolean = true,
181
+ ) {
182
+ const itemGroups = groupToolCalls(chatCtx);
183
+ const messages: Record<string, any>[] = []; // eslint-disable-line @typescript-eslint/no-explicit-any
184
+
185
+ for (const group of itemGroups) {
186
+ if (group.isEmpty) continue;
187
+
188
+ if (group.message) {
189
+ messages.push(await toResponsesChatItem(group.message));
190
+ }
191
+
192
+ for (const toolCall of group.toolCalls) {
193
+ messages.push({
194
+ type: 'function_call',
195
+ call_id: toolCall.callId,
196
+ name: toolCall.name,
197
+ arguments: toolCall.args,
198
+ });
199
+ }
200
+
201
+ for (const toolOutput of group.toolOutputs) {
202
+ messages.push(await toResponsesChatItem(toolOutput));
203
+ }
204
+ }
205
+
206
+ return messages;
207
+ }
208
+
209
+ async function toResponsesChatItem(item: ChatItem) {
210
+ if (item.type === 'message') {
211
+ const listContent: Record<string, any>[] = []; // eslint-disable-line @typescript-eslint/no-explicit-any
212
+ let textContent = '';
213
+
214
+ for (const content of item.content) {
215
+ if (typeof content === 'string') {
216
+ if (textContent) textContent += '\n';
217
+ textContent += content;
218
+ } else if (content.type === 'image_content') {
219
+ listContent.push(await toResponsesImageContent(content));
220
+ } else {
221
+ throw new Error(`Unsupported content type: ${content.type}`);
222
+ }
223
+ }
224
+
225
+ const content =
226
+ listContent.length == 0
227
+ ? textContent
228
+ : textContent.length == 0
229
+ ? listContent
230
+ : [...listContent, { type: 'input_text', text: textContent }];
231
+
232
+ return { role: item.role, content };
233
+ } else if (item.type === 'function_call') {
234
+ return {
235
+ type: 'function_call',
236
+ call_id: item.callId,
237
+ name: item.name,
238
+ arguments: item.args,
239
+ };
240
+ } else if (item.type === 'function_call_output') {
241
+ return {
242
+ type: 'function_call_output',
243
+ call_id: item.callId,
244
+ output: item.output,
245
+ };
246
+ }
247
+
248
+ throw new Error(`Unsupported item type: ${item['type']}`);
249
+ }
@@ -56,12 +56,14 @@ class ChatItemGroup {
56
56
  }
57
57
 
58
58
  removeInvalidToolCalls() {
59
- if (this.toolCalls.length === this.toolOutputs.length) {
60
- return;
61
- }
62
-
63
59
  const toolCallIds = new Set(this.toolCalls.map((call) => call.callId));
64
60
  const toolOutputIds = new Set(this.toolOutputs.map((output) => output.callId));
61
+ const sameIds =
62
+ toolCallIds.size === toolOutputIds.size &&
63
+ [...toolCallIds].every((id) => toolOutputIds.has(id));
64
+ if (this.toolCalls.length === this.toolOutputs.length && sameIds) {
65
+ return;
66
+ }
65
67
 
66
68
  // intersection of tool call ids and tool output ids
67
69
  const validCallIds = intersection(toolCallIds, toolOutputIds);
@@ -48,6 +48,7 @@ export interface RealtimeCapabilities {
48
48
  userTranscription: boolean;
49
49
  autoToolReplyGeneration: boolean;
50
50
  audioOutput: boolean;
51
+ manualFunctionCalls: boolean;
51
52
  }
52
53
 
53
54
  export interface InputTranscriptionCompleted {
package/src/log.ts CHANGED
@@ -44,7 +44,7 @@ export const log = () => {
44
44
  export const initializeLogger = ({ pretty, level }: LoggerOptions) => {
45
45
  globals[LOGGER_OPTIONS_KEY] = { pretty, level };
46
46
  globals[LOGGER_KEY] = pino(
47
- { level: level || 'info' },
47
+ { level: level || 'info', serializers: { error: pino.stdSerializers.err } },
48
48
  pretty ? pinoPretty({ colorize: true }) : process.stdout,
49
49
  );
50
50
  };
@@ -90,5 +90,8 @@ export const enableOtelLogging = () => {
90
90
  { stream: new OtelDestination(), level: 'debug' },
91
91
  ];
92
92
 
93
- globals[LOGGER_KEY] = pino({ level: logLevel }, multistream(streams));
93
+ globals[LOGGER_KEY] = pino(
94
+ { level: logLevel, serializers: { error: pino.stdSerializers.err } },
95
+ multistream(streams),
96
+ );
94
97
  };
@@ -59,16 +59,17 @@ export class DeferredReadableStream<T> {
59
59
  throw new Error('Stream source already set');
60
60
  }
61
61
 
62
- this.sourceReader = source.getReader();
63
- this.pump();
62
+ const sourceReader = source.getReader();
63
+ this.sourceReader = sourceReader;
64
+ void this.pump(sourceReader);
64
65
  }
65
66
 
66
- private async pump() {
67
+ private async pump(sourceReader: ReadableStreamDefaultReader<T>) {
67
68
  let sourceError: unknown;
68
69
 
69
70
  try {
70
71
  while (true) {
71
- const { done, value } = await this.sourceReader!.read();
72
+ const { done, value } = await sourceReader.read();
72
73
  if (done) break;
73
74
  await this.writer.write(value);
74
75
  }
@@ -81,7 +82,7 @@ export class DeferredReadableStream<T> {
81
82
  // any other error from source will be propagated to the consumer
82
83
  if (sourceError) {
83
84
  try {
84
- this.writer.abort(sourceError);
85
+ await this.writer.abort(sourceError);
85
86
  } catch (e) {
86
87
  // ignore if writer is already closed
87
88
  }
@@ -118,10 +119,20 @@ export class DeferredReadableStream<T> {
118
119
  return;
119
120
  }
120
121
 
122
+ const sourceReader = this.sourceReader!;
123
+ // Clear source first so future setSource() calls can reattach cleanly.
124
+ this.sourceReader = undefined;
125
+
121
126
  // release lock will make any pending read() throw TypeError
122
127
  // which are expected, and we intentionally catch those error
123
128
  // using isStreamReaderReleaseError
124
129
  // this will unblock any pending read() inside the async for loop
125
- this.sourceReader!.releaseLock();
130
+ try {
131
+ sourceReader.releaseLock();
132
+ } catch (e) {
133
+ if (!isStreamReaderReleaseError(e)) {
134
+ throw e;
135
+ }
136
+ }
126
137
  }
127
138
  }
@@ -4,4 +4,5 @@
4
4
  export { DeferredReadableStream } from './deferred_stream.js';
5
5
  export { IdentityTransform } from './identity_transform.js';
6
6
  export { mergeReadableStreams } from './merge_readable_streams.js';
7
+ export { MultiInputStream } from './multi_input_stream.js';
7
8
  export { createStreamChannel, type StreamChannel } from './stream_channel.js';