@livekit/agents 1.0.46 → 1.0.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. package/dist/beta/index.cjs +29 -0
  2. package/dist/beta/index.cjs.map +1 -0
  3. package/dist/beta/index.d.cts +2 -0
  4. package/dist/beta/index.d.ts +2 -0
  5. package/dist/beta/index.d.ts.map +1 -0
  6. package/dist/beta/index.js +7 -0
  7. package/dist/beta/index.js.map +1 -0
  8. package/dist/beta/workflows/index.cjs +29 -0
  9. package/dist/beta/workflows/index.cjs.map +1 -0
  10. package/dist/beta/workflows/index.d.cts +2 -0
  11. package/dist/beta/workflows/index.d.ts +2 -0
  12. package/dist/beta/workflows/index.d.ts.map +1 -0
  13. package/dist/beta/workflows/index.js +7 -0
  14. package/dist/beta/workflows/index.js.map +1 -0
  15. package/dist/beta/workflows/task_group.cjs +162 -0
  16. package/dist/beta/workflows/task_group.cjs.map +1 -0
  17. package/dist/beta/workflows/task_group.d.cts +32 -0
  18. package/dist/beta/workflows/task_group.d.ts +32 -0
  19. package/dist/beta/workflows/task_group.d.ts.map +1 -0
  20. package/dist/beta/workflows/task_group.js +138 -0
  21. package/dist/beta/workflows/task_group.js.map +1 -0
  22. package/dist/cli.cjs +14 -20
  23. package/dist/cli.cjs.map +1 -1
  24. package/dist/cli.d.ts.map +1 -1
  25. package/dist/cli.js +14 -20
  26. package/dist/cli.js.map +1 -1
  27. package/dist/index.cjs +3 -0
  28. package/dist/index.cjs.map +1 -1
  29. package/dist/index.d.cts +2 -1
  30. package/dist/index.d.ts +2 -1
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.js +2 -0
  33. package/dist/index.js.map +1 -1
  34. package/dist/inference/api_protos.d.cts +59 -59
  35. package/dist/inference/api_protos.d.ts +59 -59
  36. package/dist/ipc/job_proc_lazy_main.cjs +14 -5
  37. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  38. package/dist/ipc/job_proc_lazy_main.js +14 -5
  39. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  40. package/dist/llm/chat_context.cjs +108 -1
  41. package/dist/llm/chat_context.cjs.map +1 -1
  42. package/dist/llm/chat_context.d.cts +14 -1
  43. package/dist/llm/chat_context.d.ts +14 -1
  44. package/dist/llm/chat_context.d.ts.map +1 -1
  45. package/dist/llm/chat_context.js +108 -1
  46. package/dist/llm/chat_context.js.map +1 -1
  47. package/dist/llm/chat_context.test.cjs +43 -0
  48. package/dist/llm/chat_context.test.cjs.map +1 -1
  49. package/dist/llm/chat_context.test.js +43 -0
  50. package/dist/llm/chat_context.test.js.map +1 -1
  51. package/dist/llm/index.cjs +2 -0
  52. package/dist/llm/index.cjs.map +1 -1
  53. package/dist/llm/index.d.cts +1 -1
  54. package/dist/llm/index.d.ts +1 -1
  55. package/dist/llm/index.d.ts.map +1 -1
  56. package/dist/llm/index.js +3 -1
  57. package/dist/llm/index.js.map +1 -1
  58. package/dist/llm/provider_format/index.cjs +2 -0
  59. package/dist/llm/provider_format/index.cjs.map +1 -1
  60. package/dist/llm/provider_format/index.d.cts +2 -2
  61. package/dist/llm/provider_format/index.d.ts +2 -2
  62. package/dist/llm/provider_format/index.d.ts.map +1 -1
  63. package/dist/llm/provider_format/index.js +6 -1
  64. package/dist/llm/provider_format/index.js.map +1 -1
  65. package/dist/llm/provider_format/openai.cjs +82 -2
  66. package/dist/llm/provider_format/openai.cjs.map +1 -1
  67. package/dist/llm/provider_format/openai.d.cts +1 -0
  68. package/dist/llm/provider_format/openai.d.ts +1 -0
  69. package/dist/llm/provider_format/openai.d.ts.map +1 -1
  70. package/dist/llm/provider_format/openai.js +80 -1
  71. package/dist/llm/provider_format/openai.js.map +1 -1
  72. package/dist/llm/provider_format/openai.test.cjs +326 -0
  73. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  74. package/dist/llm/provider_format/openai.test.js +327 -1
  75. package/dist/llm/provider_format/openai.test.js.map +1 -1
  76. package/dist/llm/provider_format/utils.cjs +4 -3
  77. package/dist/llm/provider_format/utils.cjs.map +1 -1
  78. package/dist/llm/provider_format/utils.d.ts.map +1 -1
  79. package/dist/llm/provider_format/utils.js +4 -3
  80. package/dist/llm/provider_format/utils.js.map +1 -1
  81. package/dist/llm/realtime.cjs.map +1 -1
  82. package/dist/llm/realtime.d.cts +1 -0
  83. package/dist/llm/realtime.d.ts +1 -0
  84. package/dist/llm/realtime.d.ts.map +1 -1
  85. package/dist/llm/realtime.js.map +1 -1
  86. package/dist/llm/tool_context.cjs +7 -0
  87. package/dist/llm/tool_context.cjs.map +1 -1
  88. package/dist/llm/tool_context.d.cts +10 -2
  89. package/dist/llm/tool_context.d.ts +10 -2
  90. package/dist/llm/tool_context.d.ts.map +1 -1
  91. package/dist/llm/tool_context.js +6 -0
  92. package/dist/llm/tool_context.js.map +1 -1
  93. package/dist/log.cjs +5 -2
  94. package/dist/log.cjs.map +1 -1
  95. package/dist/log.d.ts.map +1 -1
  96. package/dist/log.js +5 -2
  97. package/dist/log.js.map +1 -1
  98. package/dist/stream/deferred_stream.cjs +15 -6
  99. package/dist/stream/deferred_stream.cjs.map +1 -1
  100. package/dist/stream/deferred_stream.d.ts.map +1 -1
  101. package/dist/stream/deferred_stream.js +15 -6
  102. package/dist/stream/deferred_stream.js.map +1 -1
  103. package/dist/utils.cjs +32 -2
  104. package/dist/utils.cjs.map +1 -1
  105. package/dist/utils.d.cts +7 -0
  106. package/dist/utils.d.ts +7 -0
  107. package/dist/utils.d.ts.map +1 -1
  108. package/dist/utils.js +32 -2
  109. package/dist/utils.js.map +1 -1
  110. package/dist/utils.test.cjs +71 -0
  111. package/dist/utils.test.cjs.map +1 -1
  112. package/dist/utils.test.js +71 -0
  113. package/dist/utils.test.js.map +1 -1
  114. package/dist/version.cjs +1 -1
  115. package/dist/version.cjs.map +1 -1
  116. package/dist/version.d.cts +1 -1
  117. package/dist/version.d.ts +1 -1
  118. package/dist/version.d.ts.map +1 -1
  119. package/dist/version.js +1 -1
  120. package/dist/version.js.map +1 -1
  121. package/dist/voice/agent.cjs +153 -12
  122. package/dist/voice/agent.cjs.map +1 -1
  123. package/dist/voice/agent.d.cts +30 -4
  124. package/dist/voice/agent.d.ts +30 -4
  125. package/dist/voice/agent.d.ts.map +1 -1
  126. package/dist/voice/agent.js +149 -11
  127. package/dist/voice/agent.js.map +1 -1
  128. package/dist/voice/agent.test.cjs +120 -0
  129. package/dist/voice/agent.test.cjs.map +1 -1
  130. package/dist/voice/agent.test.js +122 -2
  131. package/dist/voice/agent.test.js.map +1 -1
  132. package/dist/voice/agent_activity.cjs +406 -298
  133. package/dist/voice/agent_activity.cjs.map +1 -1
  134. package/dist/voice/agent_activity.d.cts +41 -7
  135. package/dist/voice/agent_activity.d.ts +41 -7
  136. package/dist/voice/agent_activity.d.ts.map +1 -1
  137. package/dist/voice/agent_activity.js +407 -294
  138. package/dist/voice/agent_activity.js.map +1 -1
  139. package/dist/voice/agent_session.cjs +140 -40
  140. package/dist/voice/agent_session.cjs.map +1 -1
  141. package/dist/voice/agent_session.d.cts +19 -7
  142. package/dist/voice/agent_session.d.ts +19 -7
  143. package/dist/voice/agent_session.d.ts.map +1 -1
  144. package/dist/voice/agent_session.js +137 -37
  145. package/dist/voice/agent_session.js.map +1 -1
  146. package/dist/voice/audio_recognition.cjs +4 -0
  147. package/dist/voice/audio_recognition.cjs.map +1 -1
  148. package/dist/voice/audio_recognition.d.ts.map +1 -1
  149. package/dist/voice/audio_recognition.js +4 -0
  150. package/dist/voice/audio_recognition.js.map +1 -1
  151. package/dist/voice/generation.cjs +39 -19
  152. package/dist/voice/generation.cjs.map +1 -1
  153. package/dist/voice/generation.d.ts.map +1 -1
  154. package/dist/voice/generation.js +44 -20
  155. package/dist/voice/generation.js.map +1 -1
  156. package/dist/voice/index.cjs +2 -0
  157. package/dist/voice/index.cjs.map +1 -1
  158. package/dist/voice/index.d.cts +1 -1
  159. package/dist/voice/index.d.ts +1 -1
  160. package/dist/voice/index.d.ts.map +1 -1
  161. package/dist/voice/index.js +2 -1
  162. package/dist/voice/index.js.map +1 -1
  163. package/dist/voice/room_io/room_io.cjs +11 -2
  164. package/dist/voice/room_io/room_io.cjs.map +1 -1
  165. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  166. package/dist/voice/room_io/room_io.js +12 -3
  167. package/dist/voice/room_io/room_io.js.map +1 -1
  168. package/dist/voice/speech_handle.cjs +7 -1
  169. package/dist/voice/speech_handle.cjs.map +1 -1
  170. package/dist/voice/speech_handle.d.cts +2 -0
  171. package/dist/voice/speech_handle.d.ts +2 -0
  172. package/dist/voice/speech_handle.d.ts.map +1 -1
  173. package/dist/voice/speech_handle.js +8 -2
  174. package/dist/voice/speech_handle.js.map +1 -1
  175. package/dist/voice/testing/fake_llm.cjs +127 -0
  176. package/dist/voice/testing/fake_llm.cjs.map +1 -0
  177. package/dist/voice/testing/fake_llm.d.cts +30 -0
  178. package/dist/voice/testing/fake_llm.d.ts +30 -0
  179. package/dist/voice/testing/fake_llm.d.ts.map +1 -0
  180. package/dist/voice/testing/fake_llm.js +103 -0
  181. package/dist/voice/testing/fake_llm.js.map +1 -0
  182. package/dist/voice/testing/index.cjs +3 -0
  183. package/dist/voice/testing/index.cjs.map +1 -1
  184. package/dist/voice/testing/index.d.cts +1 -0
  185. package/dist/voice/testing/index.d.ts +1 -0
  186. package/dist/voice/testing/index.d.ts.map +1 -1
  187. package/dist/voice/testing/index.js +2 -0
  188. package/dist/voice/testing/index.js.map +1 -1
  189. package/dist/voice/testing/run_result.cjs +66 -15
  190. package/dist/voice/testing/run_result.cjs.map +1 -1
  191. package/dist/voice/testing/run_result.d.cts +14 -3
  192. package/dist/voice/testing/run_result.d.ts +14 -3
  193. package/dist/voice/testing/run_result.d.ts.map +1 -1
  194. package/dist/voice/testing/run_result.js +66 -15
  195. package/dist/voice/testing/run_result.js.map +1 -1
  196. package/package.json +1 -1
  197. package/src/beta/index.ts +9 -0
  198. package/src/beta/workflows/index.ts +9 -0
  199. package/src/beta/workflows/task_group.ts +194 -0
  200. package/src/cli.ts +20 -33
  201. package/src/index.ts +2 -1
  202. package/src/ipc/job_proc_lazy_main.ts +16 -5
  203. package/src/llm/chat_context.test.ts +48 -0
  204. package/src/llm/chat_context.ts +158 -0
  205. package/src/llm/index.ts +1 -0
  206. package/src/llm/provider_format/index.ts +7 -2
  207. package/src/llm/provider_format/openai.test.ts +385 -1
  208. package/src/llm/provider_format/openai.ts +103 -0
  209. package/src/llm/provider_format/utils.ts +6 -4
  210. package/src/llm/realtime.ts +1 -0
  211. package/src/llm/tool_context.ts +14 -0
  212. package/src/log.ts +5 -2
  213. package/src/stream/deferred_stream.ts +17 -6
  214. package/src/utils.test.ts +87 -0
  215. package/src/utils.ts +41 -2
  216. package/src/version.ts +1 -1
  217. package/src/voice/agent.test.ts +140 -2
  218. package/src/voice/agent.ts +200 -10
  219. package/src/voice/agent_activity.ts +466 -290
  220. package/src/voice/agent_session.ts +178 -40
  221. package/src/voice/audio_recognition.ts +4 -0
  222. package/src/voice/generation.ts +52 -23
  223. package/src/voice/index.ts +1 -1
  224. package/src/voice/room_io/room_io.ts +14 -3
  225. package/src/voice/speech_handle.ts +9 -2
  226. package/src/voice/testing/fake_llm.ts +138 -0
  227. package/src/voice/testing/index.ts +2 -0
  228. package/src/voice/testing/run_result.ts +81 -23
@@ -11,7 +11,7 @@ import {
11
11
  FunctionCallOutput,
12
12
  } from '../chat_context.js';
13
13
  import { serializeImage } from '../utils.js';
14
- import { toChatCtx } from './openai.js';
14
+ import { toChatCtx, toResponsesChatCtx } from './openai.js';
15
15
 
16
16
  // Mock the serializeImage function
17
17
  vi.mock('../utils.js', () => ({
@@ -673,3 +673,387 @@ describe('toChatCtx', () => {
673
673
  ]);
674
674
  });
675
675
  });
676
+
677
+ describe('toResponsesChatCtx', () => {
678
+ const serializeImageMock = vi.mocked(serializeImage);
679
+
680
+ initializeLogger({ level: 'silent', pretty: false });
681
+
682
+ beforeEach(async () => {
683
+ vi.clearAllMocks();
684
+ });
685
+
686
+ it('should convert simple text messages', async () => {
687
+ const ctx = ChatContext.empty();
688
+ ctx.addMessage({ role: 'user', content: 'Hello' });
689
+ ctx.addMessage({ role: 'assistant', content: 'Hi there!' });
690
+
691
+ const result = await toResponsesChatCtx(ctx);
692
+
693
+ expect(result).toHaveLength(2);
694
+ expect(result[0]).toEqual({ role: 'user', content: 'Hello' });
695
+ expect(result[1]).toEqual({ role: 'assistant', content: 'Hi there!' });
696
+ });
697
+
698
+ it('should handle system messages', async () => {
699
+ const ctx = ChatContext.empty();
700
+ ctx.addMessage({ role: 'system', content: 'You are a helpful assistant' });
701
+ ctx.addMessage({ role: 'user', content: 'Hello' });
702
+
703
+ const result = await toResponsesChatCtx(ctx);
704
+
705
+ expect(result).toHaveLength(2);
706
+ expect(result[0]).toEqual({ role: 'system', content: 'You are a helpful assistant' });
707
+ expect(result[1]).toEqual({ role: 'user', content: 'Hello' });
708
+ });
709
+
710
+ it('should handle multi-line text content', async () => {
711
+ const ctx = ChatContext.empty();
712
+ ctx.addMessage({ role: 'user', content: ['Line 1', 'Line 2', 'Line 3'] });
713
+
714
+ const result = await toResponsesChatCtx(ctx);
715
+
716
+ expect(result).toHaveLength(1);
717
+ expect(result[0]).toEqual({ role: 'user', content: 'Line 1\nLine 2\nLine 3' });
718
+ });
719
+
720
+ it('should convert images to input_image format with external URL', async () => {
721
+ serializeImageMock.mockResolvedValue({
722
+ inferenceDetail: 'high',
723
+ externalUrl: 'https://example.com/image.jpg',
724
+ });
725
+
726
+ const ctx = ChatContext.empty();
727
+ ctx.addMessage({
728
+ role: 'user',
729
+ content: [
730
+ {
731
+ id: 'img1',
732
+ type: 'image_content',
733
+ image: 'https://example.com/image.jpg',
734
+ inferenceDetail: 'high',
735
+ _cache: {},
736
+ },
737
+ ],
738
+ });
739
+
740
+ const result = await toResponsesChatCtx(ctx);
741
+
742
+ expect(result).toEqual([
743
+ {
744
+ role: 'user',
745
+ content: [
746
+ {
747
+ type: 'input_image',
748
+ image_url: 'https://example.com/image.jpg',
749
+ detail: 'high',
750
+ },
751
+ ],
752
+ },
753
+ ]);
754
+ });
755
+
756
+ it('should convert images to input_image format with base64 data', async () => {
757
+ serializeImageMock.mockResolvedValue({
758
+ inferenceDetail: 'auto',
759
+ mimeType: 'image/png',
760
+ base64Data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB',
761
+ });
762
+
763
+ const ctx = ChatContext.empty();
764
+ ctx.addMessage({
765
+ role: 'user',
766
+ content: [
767
+ {
768
+ id: 'img1',
769
+ type: 'image_content',
770
+ image: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB',
771
+ inferenceDetail: 'auto',
772
+ _cache: {},
773
+ },
774
+ ],
775
+ });
776
+
777
+ const result = await toResponsesChatCtx(ctx);
778
+
779
+ expect(result).toEqual([
780
+ {
781
+ role: 'user',
782
+ content: [
783
+ {
784
+ type: 'input_image',
785
+ image_url: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB',
786
+ detail: 'auto',
787
+ },
788
+ ],
789
+ },
790
+ ]);
791
+ });
792
+
793
+ it('should handle mixed content with text and image using input_text', async () => {
794
+ serializeImageMock.mockResolvedValue({
795
+ inferenceDetail: 'high',
796
+ externalUrl: 'https://example.com/image.jpg',
797
+ });
798
+
799
+ const ctx = ChatContext.empty();
800
+ ctx.addMessage({
801
+ role: 'user',
802
+ content: [
803
+ 'Check this out:',
804
+ {
805
+ id: 'img1',
806
+ type: 'image_content',
807
+ image: 'https://example.com/image.jpg',
808
+ inferenceDetail: 'high',
809
+ _cache: {},
810
+ },
811
+ ],
812
+ });
813
+
814
+ const result = await toResponsesChatCtx(ctx);
815
+
816
+ expect(result).toEqual([
817
+ {
818
+ role: 'user',
819
+ content: [
820
+ {
821
+ type: 'input_image',
822
+ image_url: 'https://example.com/image.jpg',
823
+ detail: 'high',
824
+ },
825
+ { type: 'input_text', text: 'Check this out:' },
826
+ ],
827
+ },
828
+ ]);
829
+ });
830
+
831
+ it('should handle tool calls as top-level function_call items', async () => {
832
+ const ctx = ChatContext.empty();
833
+
834
+ const msg = ctx.addMessage({ role: 'assistant', content: 'Let me help you.' });
835
+ const toolCall = FunctionCall.create({
836
+ id: msg.id + '/tool_1',
837
+ callId: 'call_123',
838
+ name: 'get_weather',
839
+ args: '{"location": "Paris"}',
840
+ });
841
+ const toolOutput = FunctionCallOutput.create({
842
+ callId: 'call_123',
843
+ output: '{"temperature": 20}',
844
+ isError: false,
845
+ });
846
+
847
+ ctx.insert([toolCall, toolOutput]);
848
+
849
+ const result = await toResponsesChatCtx(ctx);
850
+
851
+ expect(result).toEqual([
852
+ { role: 'assistant', content: 'Let me help you.' },
853
+ {
854
+ type: 'function_call',
855
+ call_id: 'call_123',
856
+ name: 'get_weather',
857
+ arguments: '{"location": "Paris"}',
858
+ },
859
+ {
860
+ type: 'function_call_output',
861
+ call_id: 'call_123',
862
+ output: '{"temperature": 20}',
863
+ },
864
+ ]);
865
+ });
866
+
867
+ it('should handle tool calls without an accompanying message', async () => {
868
+ const ctx = ChatContext.empty();
869
+
870
+ const toolCall = new FunctionCall({
871
+ id: 'func_1',
872
+ callId: 'call_456',
873
+ name: 'calculate',
874
+ args: '{"a": 5, "b": 3}',
875
+ });
876
+ const toolOutput = new FunctionCallOutput({
877
+ callId: 'call_456',
878
+ output: '{"result": 8}',
879
+ isError: false,
880
+ });
881
+
882
+ ctx.insert([toolCall, toolOutput]);
883
+
884
+ const result = await toResponsesChatCtx(ctx);
885
+
886
+ expect(result).toEqual([
887
+ {
888
+ type: 'function_call',
889
+ call_id: 'call_456',
890
+ name: 'calculate',
891
+ arguments: '{"a": 5, "b": 3}',
892
+ },
893
+ {
894
+ type: 'function_call_output',
895
+ call_id: 'call_456',
896
+ output: '{"result": 8}',
897
+ },
898
+ ]);
899
+ });
900
+
901
+ it('should handle multiple tool calls as separate function_call items', async () => {
902
+ const ctx = ChatContext.empty();
903
+
904
+ const msg = ctx.addMessage({ role: 'assistant', content: "I'll check both." });
905
+ const toolCall1 = new FunctionCall({
906
+ id: msg.id + '/tool_1',
907
+ callId: 'call_1',
908
+ name: 'get_weather',
909
+ args: '{"location": "NYC"}',
910
+ });
911
+ const toolCall2 = new FunctionCall({
912
+ id: msg.id + '/tool_2',
913
+ callId: 'call_2',
914
+ name: 'get_weather',
915
+ args: '{"location": "LA"}',
916
+ });
917
+ const toolOutput1 = new FunctionCallOutput({
918
+ callId: 'call_1',
919
+ output: '{"temperature": 65}',
920
+ isError: false,
921
+ });
922
+ const toolOutput2 = new FunctionCallOutput({
923
+ callId: 'call_2',
924
+ output: '{"temperature": 78}',
925
+ isError: false,
926
+ });
927
+
928
+ ctx.insert([toolCall1, toolCall2, toolOutput1, toolOutput2]);
929
+
930
+ const result = await toResponsesChatCtx(ctx);
931
+
932
+ expect(result).toEqual([
933
+ { role: 'assistant', content: "I'll check both." },
934
+ {
935
+ type: 'function_call',
936
+ call_id: 'call_1',
937
+ name: 'get_weather',
938
+ arguments: '{"location": "NYC"}',
939
+ },
940
+ {
941
+ type: 'function_call',
942
+ call_id: 'call_2',
943
+ name: 'get_weather',
944
+ arguments: '{"location": "LA"}',
945
+ },
946
+ {
947
+ type: 'function_call_output',
948
+ call_id: 'call_1',
949
+ output: '{"temperature": 65}',
950
+ },
951
+ {
952
+ type: 'function_call_output',
953
+ call_id: 'call_2',
954
+ output: '{"temperature": 78}',
955
+ },
956
+ ]);
957
+ });
958
+
959
+ it('should skip empty groups', async () => {
960
+ const ctx = ChatContext.empty();
961
+ ctx.addMessage({ role: 'user', content: 'Hello', createdAt: 1000 });
962
+
963
+ const orphanOutput = new FunctionCallOutput({
964
+ callId: 'orphan_call',
965
+ output: 'This should be ignored',
966
+ isError: false,
967
+ createdAt: 2000,
968
+ });
969
+ ctx.insert(orphanOutput);
970
+
971
+ ctx.addMessage({ role: 'assistant', content: 'Hi!', createdAt: 3000 });
972
+
973
+ const result = await toResponsesChatCtx(ctx);
974
+
975
+ expect(result).toHaveLength(2);
976
+ expect(result).toContainEqual({ role: 'user', content: 'Hello' });
977
+ expect(result).toContainEqual({ role: 'assistant', content: 'Hi!' });
978
+ });
979
+
980
+ it('should filter out agent handoff items', async () => {
981
+ const ctx = ChatContext.empty();
982
+
983
+ ctx.addMessage({ role: 'user', content: 'Hello' });
984
+ ctx.insert(new AgentHandoffItem({ oldAgentId: 'agent_1', newAgentId: 'agent_2' }));
985
+ ctx.addMessage({ role: 'assistant', content: 'Hi there!' });
986
+
987
+ const result = await toResponsesChatCtx(ctx);
988
+
989
+ expect(result).toEqual([
990
+ { role: 'user', content: 'Hello' },
991
+ { role: 'assistant', content: 'Hi there!' },
992
+ ]);
993
+ });
994
+
995
+ it('should cache serialized images', async () => {
996
+ serializeImageMock.mockResolvedValue({
997
+ inferenceDetail: 'high',
998
+ mimeType: 'image/png',
999
+ base64Data: 'cached-data',
1000
+ });
1001
+
1002
+ const imageContent = {
1003
+ id: 'img1',
1004
+ type: 'image_content' as const,
1005
+ image: 'https://example.com/image.jpg',
1006
+ inferenceDetail: 'high' as const,
1007
+ _cache: {},
1008
+ };
1009
+
1010
+ const ctx = ChatContext.empty();
1011
+ ctx.addMessage({ role: 'user', content: [imageContent] });
1012
+
1013
+ await toResponsesChatCtx(ctx);
1014
+ await toResponsesChatCtx(ctx);
1015
+
1016
+ expect(serializeImageMock).toHaveBeenCalledTimes(1);
1017
+ expect(imageContent._cache).toHaveProperty('serialized_image');
1018
+ });
1019
+
1020
+ it('should throw error for unsupported content type', async () => {
1021
+ const ctx = ChatContext.empty();
1022
+ ctx.addMessage({
1023
+ role: 'user',
1024
+ content: [
1025
+ {
1026
+ type: 'audio_content',
1027
+ frame: [],
1028
+ },
1029
+ ],
1030
+ });
1031
+
1032
+ await expect(toResponsesChatCtx(ctx)).rejects.toThrow(
1033
+ 'Unsupported content type: audio_content',
1034
+ );
1035
+ });
1036
+
1037
+ it('should throw error when serialized image has no data', async () => {
1038
+ serializeImageMock.mockResolvedValue({
1039
+ inferenceDetail: 'high',
1040
+ // No base64Data or externalUrl
1041
+ });
1042
+
1043
+ const ctx = ChatContext.empty();
1044
+ ctx.addMessage({
1045
+ role: 'user',
1046
+ content: [
1047
+ {
1048
+ id: 'img1',
1049
+ type: 'image_content',
1050
+ image: 'invalid-image',
1051
+ inferenceDetail: 'high',
1052
+ _cache: {},
1053
+ },
1054
+ ],
1055
+ });
1056
+
1057
+ await expect(toResponsesChatCtx(ctx)).rejects.toThrow('Serialized image has no data bytes');
1058
+ });
1059
+ });
@@ -144,3 +144,106 @@ async function toImageContent(content: ImageContent) {
144
144
  },
145
145
  };
146
146
  }
147
+
148
+ async function toResponsesImageContent(content: ImageContent) {
149
+ const cacheKey = 'serialized_image';
150
+ let serialized: SerializedImage;
151
+
152
+ if (content._cache[cacheKey] === undefined) {
153
+ serialized = await serializeImage(content);
154
+ content._cache[cacheKey] = serialized;
155
+ }
156
+ serialized = content._cache[cacheKey];
157
+
158
+ if (serialized.externalUrl) {
159
+ return {
160
+ type: 'input_image' as const,
161
+ image_url: serialized.externalUrl,
162
+ detail: serialized.inferenceDetail,
163
+ };
164
+ }
165
+
166
+ if (serialized.base64Data === undefined) {
167
+ throw new Error('Serialized image has no data bytes');
168
+ }
169
+
170
+ return {
171
+ type: 'input_image' as const,
172
+ image_url: `data:${serialized.mimeType};base64,${serialized.base64Data}`,
173
+ detail: serialized.inferenceDetail,
174
+ };
175
+ }
176
+
177
+ export async function toResponsesChatCtx(
178
+ chatCtx: ChatContext,
179
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
180
+ injectDummyUserMessage: boolean = true,
181
+ ) {
182
+ const itemGroups = groupToolCalls(chatCtx);
183
+ const messages: Record<string, any>[] = []; // eslint-disable-line @typescript-eslint/no-explicit-any
184
+
185
+ for (const group of itemGroups) {
186
+ if (group.isEmpty) continue;
187
+
188
+ if (group.message) {
189
+ messages.push(await toResponsesChatItem(group.message));
190
+ }
191
+
192
+ for (const toolCall of group.toolCalls) {
193
+ messages.push({
194
+ type: 'function_call',
195
+ call_id: toolCall.callId,
196
+ name: toolCall.name,
197
+ arguments: toolCall.args,
198
+ });
199
+ }
200
+
201
+ for (const toolOutput of group.toolOutputs) {
202
+ messages.push(await toResponsesChatItem(toolOutput));
203
+ }
204
+ }
205
+
206
+ return messages;
207
+ }
208
+
209
+ async function toResponsesChatItem(item: ChatItem) {
210
+ if (item.type === 'message') {
211
+ const listContent: Record<string, any>[] = []; // eslint-disable-line @typescript-eslint/no-explicit-any
212
+ let textContent = '';
213
+
214
+ for (const content of item.content) {
215
+ if (typeof content === 'string') {
216
+ if (textContent) textContent += '\n';
217
+ textContent += content;
218
+ } else if (content.type === 'image_content') {
219
+ listContent.push(await toResponsesImageContent(content));
220
+ } else {
221
+ throw new Error(`Unsupported content type: ${content.type}`);
222
+ }
223
+ }
224
+
225
+ const content =
226
+ listContent.length == 0
227
+ ? textContent
228
+ : textContent.length == 0
229
+ ? listContent
230
+ : [...listContent, { type: 'input_text', text: textContent }];
231
+
232
+ return { role: item.role, content };
233
+ } else if (item.type === 'function_call') {
234
+ return {
235
+ type: 'function_call',
236
+ call_id: item.callId,
237
+ name: item.name,
238
+ arguments: item.args,
239
+ };
240
+ } else if (item.type === 'function_call_output') {
241
+ return {
242
+ type: 'function_call_output',
243
+ call_id: item.callId,
244
+ output: item.output,
245
+ };
246
+ }
247
+
248
+ throw new Error(`Unsupported item type: ${item['type']}`);
249
+ }
@@ -56,12 +56,14 @@ class ChatItemGroup {
56
56
  }
57
57
 
58
58
  removeInvalidToolCalls() {
59
- if (this.toolCalls.length === this.toolOutputs.length) {
60
- return;
61
- }
62
-
63
59
  const toolCallIds = new Set(this.toolCalls.map((call) => call.callId));
64
60
  const toolOutputIds = new Set(this.toolOutputs.map((output) => output.callId));
61
+ const sameIds =
62
+ toolCallIds.size === toolOutputIds.size &&
63
+ [...toolCallIds].every((id) => toolOutputIds.has(id));
64
+ if (this.toolCalls.length === this.toolOutputs.length && sameIds) {
65
+ return;
66
+ }
65
67
 
66
68
  // intersection of tool call ids and tool output ids
67
69
  const validCallIds = intersection(toolCallIds, toolOutputIds);
@@ -48,6 +48,7 @@ export interface RealtimeCapabilities {
48
48
  userTranscription: boolean;
49
49
  autoToolReplyGeneration: boolean;
50
50
  audioOutput: boolean;
51
+ manualFunctionCalls: boolean;
51
52
  }
52
53
 
53
54
  export interface InputTranscriptionCompleted {
@@ -80,6 +80,13 @@ export class ToolError extends Error {
80
80
  }
81
81
  }
82
82
 
83
+ export const ToolFlag = {
84
+ NONE: 0,
85
+ IGNORE_ON_ENTER: 1 << 0,
86
+ } as const;
87
+
88
+ export type ToolFlag = (typeof ToolFlag)[keyof typeof ToolFlag];
89
+
83
90
  export interface AgentHandoff {
84
91
  /**
85
92
  * The agent to handoff to.
@@ -178,6 +185,8 @@ export interface FunctionTool<
178
185
  */
179
186
  execute: ToolExecuteFunction<Parameters, UserData, Result>;
180
187
 
188
+ flags: number;
189
+
181
190
  [FUNCTION_TOOL_SYMBOL]: true;
182
191
  }
183
192
 
@@ -242,10 +251,12 @@ export function tool<
242
251
  description,
243
252
  parameters,
244
253
  execute,
254
+ flags,
245
255
  }: {
246
256
  description: string;
247
257
  parameters: Schema;
248
258
  execute: ToolExecuteFunction<InferToolInput<Schema>, UserData, Result>;
259
+ flags?: number;
249
260
  }): FunctionTool<InferToolInput<Schema>, UserData, Result>;
250
261
 
251
262
  /**
@@ -254,10 +265,12 @@ export function tool<
254
265
  export function tool<UserData = UnknownUserData, Result = unknown>({
255
266
  description,
256
267
  execute,
268
+ flags,
257
269
  }: {
258
270
  description: string;
259
271
  parameters?: never;
260
272
  execute: ToolExecuteFunction<Record<string, never>, UserData, Result>;
273
+ flags?: number;
261
274
  }): FunctionTool<Record<string, never>, UserData, Result>;
262
275
 
263
276
  /**
@@ -295,6 +308,7 @@ export function tool(tool: any): any {
295
308
  description: tool.description,
296
309
  parameters,
297
310
  execute: tool.execute,
311
+ flags: tool.flags ?? ToolFlag.NONE,
298
312
  [TOOL_SYMBOL]: true,
299
313
  [FUNCTION_TOOL_SYMBOL]: true,
300
314
  };
package/src/log.ts CHANGED
@@ -44,7 +44,7 @@ export const log = () => {
44
44
  export const initializeLogger = ({ pretty, level }: LoggerOptions) => {
45
45
  globals[LOGGER_OPTIONS_KEY] = { pretty, level };
46
46
  globals[LOGGER_KEY] = pino(
47
- { level: level || 'info' },
47
+ { level: level || 'info', serializers: { error: pino.stdSerializers.err } },
48
48
  pretty ? pinoPretty({ colorize: true }) : process.stdout,
49
49
  );
50
50
  };
@@ -90,5 +90,8 @@ export const enableOtelLogging = () => {
90
90
  { stream: new OtelDestination(), level: 'debug' },
91
91
  ];
92
92
 
93
- globals[LOGGER_KEY] = pino({ level: logLevel }, multistream(streams));
93
+ globals[LOGGER_KEY] = pino(
94
+ { level: logLevel, serializers: { error: pino.stdSerializers.err } },
95
+ multistream(streams),
96
+ );
94
97
  };
@@ -59,16 +59,17 @@ export class DeferredReadableStream<T> {
59
59
  throw new Error('Stream source already set');
60
60
  }
61
61
 
62
- this.sourceReader = source.getReader();
63
- this.pump();
62
+ const sourceReader = source.getReader();
63
+ this.sourceReader = sourceReader;
64
+ void this.pump(sourceReader);
64
65
  }
65
66
 
66
- private async pump() {
67
+ private async pump(sourceReader: ReadableStreamDefaultReader<T>) {
67
68
  let sourceError: unknown;
68
69
 
69
70
  try {
70
71
  while (true) {
71
- const { done, value } = await this.sourceReader!.read();
72
+ const { done, value } = await sourceReader.read();
72
73
  if (done) break;
73
74
  await this.writer.write(value);
74
75
  }
@@ -81,7 +82,7 @@ export class DeferredReadableStream<T> {
81
82
  // any other error from source will be propagated to the consumer
82
83
  if (sourceError) {
83
84
  try {
84
- this.writer.abort(sourceError);
85
+ await this.writer.abort(sourceError);
85
86
  } catch (e) {
86
87
  // ignore if writer is already closed
87
88
  }
@@ -118,10 +119,20 @@ export class DeferredReadableStream<T> {
118
119
  return;
119
120
  }
120
121
 
122
+ const sourceReader = this.sourceReader!;
123
+ // Clear source first so future setSource() calls can reattach cleanly.
124
+ this.sourceReader = undefined;
125
+
121
126
  // release lock will make any pending read() throw TypeError
122
127
  // which are expected, and we intentionally catch those error
123
128
  // using isStreamReaderReleaseError
124
129
  // this will unblock any pending read() inside the async for loop
125
- this.sourceReader!.releaseLock();
130
+ try {
131
+ sourceReader.releaseLock();
132
+ } catch (e) {
133
+ if (!isStreamReaderReleaseError(e)) {
134
+ throw e;
135
+ }
136
+ }
126
137
  }
127
138
  }