dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -0,0 +1,1106 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Event models and data structures for OpenAI Realtime API communication."""
8
+
9
+ import json
10
+ import uuid
11
+ from typing import Any, Dict, List, Literal, Optional, Union
12
+
13
+ from pydantic import BaseModel, ConfigDict, Field
14
+
15
+ #
16
+ # session properties
17
+ #
18
+
19
+
20
+ class AudioFormat(BaseModel):
21
+ """Base class for audio format configuration."""
22
+
23
+ type: str
24
+
25
+
26
+ class PCMAudioFormat(AudioFormat):
27
+ """PCM audio format configuration.
28
+
29
+ Parameters:
30
+ type: Audio format type, always "audio/pcm".
31
+ rate: Sample rate, always 24000 for PCM.
32
+ """
33
+
34
+ type: Literal["audio/pcm"] = "audio/pcm"
35
+ rate: Literal[24000] = 24000
36
+
37
+
38
+ class PCMUAudioFormat(AudioFormat):
39
+ """PCMU (G.711 μ-law) audio format configuration.
40
+
41
+ Parameters:
42
+ type: Audio format type, always "audio/pcmu".
43
+ """
44
+
45
+ type: Literal["audio/pcmu"] = "audio/pcmu"
46
+
47
+
48
+ class PCMAAudioFormat(AudioFormat):
49
+ """PCMA (G.711 A-law) audio format configuration.
50
+
51
+ Parameters:
52
+ type: Audio format type, always "audio/pcma".
53
+ """
54
+
55
+ type: Literal["audio/pcma"] = "audio/pcma"
56
+
57
+
58
+ class InputAudioTranscription(BaseModel):
59
+ """Configuration for audio transcription settings."""
60
+
61
+ model: str = "gpt-4o-transcribe"
62
+ language: Optional[str]
63
+ prompt: Optional[str]
64
+
65
+ def __init__(
66
+ self,
67
+ model: Optional[str] = "gpt-4o-transcribe",
68
+ language: Optional[str] = None,
69
+ prompt: Optional[str] = None,
70
+ ):
71
+ """Initialize InputAudioTranscription.
72
+
73
+ Args:
74
+ model: Transcription model to use (e.g., "gpt-4o-transcribe", "whisper-1").
75
+ language: Optional language code for transcription.
76
+ prompt: Optional transcription hint text.
77
+ """
78
+ super().__init__(model=model, language=language, prompt=prompt)
79
+
80
+
81
+ class TurnDetection(BaseModel):
82
+ """Server-side voice activity detection configuration.
83
+
84
+ Parameters:
85
+ type: Detection type, must be "server_vad".
86
+ threshold: Voice activity detection threshold (0.0-1.0). Defaults to 0.5.
87
+ prefix_padding_ms: Padding before speech starts in milliseconds. Defaults to 300.
88
+ silence_duration_ms: Silence duration to detect speech end in milliseconds. Defaults to 500.
89
+ """
90
+
91
+ type: Optional[Literal["server_vad"]] = "server_vad"
92
+ threshold: Optional[float] = 0.5
93
+ prefix_padding_ms: Optional[int] = 300
94
+ silence_duration_ms: Optional[int] = 500
95
+
96
+
97
+ class SemanticTurnDetection(BaseModel):
98
+ """Semantic-based turn detection configuration.
99
+
100
+ Parameters:
101
+ type: Detection type, must be "semantic_vad".
102
+ eagerness: Turn detection eagerness level. Can be "low", "medium", "high", or "auto".
103
+ create_response: Whether to automatically create responses on turn detection.
104
+ interrupt_response: Whether to interrupt ongoing responses on turn detection.
105
+ """
106
+
107
+ type: Optional[Literal["semantic_vad"]] = "semantic_vad"
108
+ eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
109
+ create_response: Optional[bool] = None
110
+ interrupt_response: Optional[bool] = None
111
+
112
+
113
+ class InputAudioNoiseReduction(BaseModel):
114
+ """Input audio noise reduction configuration.
115
+
116
+ Parameters:
117
+ type: Noise reduction type for different microphone scenarios.
118
+ """
119
+
120
+ type: Optional[Literal["near_field", "far_field"]]
121
+
122
+
123
+ class AudioInput(BaseModel):
124
+ """Audio input configuration.
125
+
126
+ Parameters:
127
+ format: The format of the input audio.
128
+ transcription: Configuration for input audio transcription.
129
+ noise_reduction: Configuration for input audio noise reduction.
130
+ turn_detection: Configuration for turn detection, or False to disable.
131
+ """
132
+
133
+ format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None
134
+ transcription: Optional[InputAudioTranscription] = None
135
+ noise_reduction: Optional[InputAudioNoiseReduction] = None
136
+ turn_detection: Optional[Union[TurnDetection, SemanticTurnDetection, bool]] = None
137
+
138
+
139
+ class AudioOutput(BaseModel):
140
+ """Audio output configuration.
141
+
142
+ Parameters:
143
+ format: The format of the output audio.
144
+ voice: The voice the model uses to respond.
145
+ speed: The speed of the model's spoken response.
146
+ """
147
+
148
+ format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None
149
+ voice: Optional[str] = None
150
+ speed: Optional[float] = None
151
+
152
+
153
+ class AudioConfiguration(BaseModel):
154
+ """Audio configuration for input and output.
155
+
156
+ Parameters:
157
+ input: Configuration for input audio.
158
+ output: Configuration for output audio.
159
+ """
160
+
161
+ input: Optional[AudioInput] = None
162
+ output: Optional[AudioOutput] = None
163
+
164
+
165
+ class SessionProperties(BaseModel):
166
+ """Configuration properties for an OpenAI Realtime session.
167
+
168
+ Parameters:
169
+ type: The type of session, always "realtime".
170
+ object: Object type identifier, always "realtime.session".
171
+ id: Unique identifier for the session.
172
+ model: The Realtime model used for this session.
173
+ output_modalities: The set of modalities the model can respond with.
174
+ instructions: System instructions for the assistant.
175
+ audio: Configuration for input and output audio.
176
+ tools: Available function tools for the assistant.
177
+ tool_choice: Tool usage strategy ("auto", "none", or "required").
178
+ max_output_tokens: Maximum tokens in response or "inf" for unlimited.
179
+ tracing: Configuration options for tracing.
180
+ prompt: Reference to a prompt template and its variables.
181
+ expires_at: Session expiration timestamp.
182
+ include: Additional fields to include in server outputs.
183
+ """
184
+
185
+ type: Optional[Literal["realtime"]] = "realtime"
186
+ object: Optional[Literal["realtime.session"]] = None
187
+ id: Optional[str] = None
188
+ model: Optional[str] = None
189
+ output_modalities: Optional[List[Literal["text", "audio"]]] = None
190
+ instructions: Optional[str] = None
191
+ audio: Optional[AudioConfiguration] = None
192
+ tools: Optional[List[Dict]] = None
193
+ tool_choice: Optional[Literal["auto", "none", "required"]] = None
194
+ max_output_tokens: Optional[Union[int, Literal["inf"]]] = None
195
+ tracing: Optional[Union[Literal["auto"], Dict]] = None
196
+ prompt: Optional[Dict] = None
197
+ expires_at: Optional[int] = None
198
+ include: Optional[List[str]] = None
199
+
200
+
201
+ #
202
+ # context
203
+ #
204
+
205
+
206
+ class ItemContent(BaseModel):
207
+ """Content within a conversation item.
208
+
209
+ Parameters:
210
+ type: Content type (text, audio, input_text, input_audio, output_text, or output_audio).
211
+ text: Text content for text-based items.
212
+ audio: Base64-encoded audio data for audio items.
213
+ transcript: Transcribed text for audio items.
214
+ """
215
+
216
+ type: Literal["text", "audio", "input_text", "input_audio", "output_text", "output_audio"]
217
+ text: Optional[str] = None
218
+ audio: Optional[str] = None # base64-encoded audio
219
+ transcript: Optional[str] = None
220
+
221
+
222
+ class ConversationItem(BaseModel):
223
+ """A conversation item in the realtime session.
224
+
225
+ Parameters:
226
+ id: Unique identifier for the item, auto-generated if not provided.
227
+ object: Object type identifier for the realtime API.
228
+ type: Item type (message, function_call, or function_call_output).
229
+ status: Current status of the item.
230
+ role: Speaker role for message items (user, assistant, or system).
231
+ content: Content list for message items.
232
+ call_id: Function call identifier for function_call items.
233
+ name: Function name for function_call items.
234
+ arguments: Function arguments as JSON string for function_call items.
235
+ output: Function output as JSON string for function_call_output items.
236
+ """
237
+
238
+ id: str = Field(default_factory=lambda: str(uuid.uuid4().hex))
239
+ object: Optional[Literal["realtime.item"]] = None
240
+ type: Literal["message", "function_call", "function_call_output"]
241
+ status: Optional[Literal["completed", "in_progress", "incomplete"]] = None
242
+ # role and content are present for message items
243
+ role: Optional[Literal["user", "assistant", "system"]] = None
244
+ content: Optional[List[ItemContent]] = None
245
+ # these four fields are present for function_call items
246
+ call_id: Optional[str] = None
247
+ name: Optional[str] = None
248
+ arguments: Optional[str] = None
249
+ output: Optional[str] = None
250
+
251
+
252
+ class RealtimeConversation(BaseModel):
253
+ """A realtime conversation session.
254
+
255
+ Parameters:
256
+ id: Unique identifier for the conversation.
257
+ object: Object type identifier, always "realtime.conversation".
258
+ """
259
+
260
+ id: str
261
+ object: Literal["realtime.conversation"]
262
+
263
+
264
+ class ResponseProperties(BaseModel):
265
+ """Properties for configuring assistant responses.
266
+
267
+ Parameters:
268
+ output_modalities: Output modalities for the response. Must be either ["text"] or ["audio"]. Defaults to ["audio"].
269
+ instructions: Specific instructions for this response.
270
+ audio: Audio configuration for this response.
271
+ tools: Available tools for this response.
272
+ tool_choice: Tool usage strategy for this response.
273
+ temperature: Sampling temperature for this response.
274
+ max_output_tokens: Maximum tokens for this response.
275
+ """
276
+
277
+ output_modalities: Optional[List[Literal["text", "audio"]]] = ["audio"]
278
+ instructions: Optional[str] = None
279
+ audio: Optional[AudioConfiguration] = None
280
+ tools: Optional[List[Dict]] = None
281
+ tool_choice: Optional[Literal["auto", "none", "required"]] = None
282
+ temperature: Optional[float] = None
283
+ max_output_tokens: Optional[Union[int, Literal["inf"]]] = None
284
+
285
+
286
+ #
287
+ # error class
288
+ #
289
+ class RealtimeError(BaseModel):
290
+ """Error information from the realtime API.
291
+
292
+ Parameters:
293
+ type: Error type identifier.
294
+ code: Specific error code.
295
+ message: Human-readable error message.
296
+ param: Parameter name that caused the error, if applicable.
297
+ event_id: Event ID associated with the error, if applicable.
298
+ """
299
+
300
+ type: str
301
+ code: Optional[str] = ""
302
+ message: str
303
+ param: Optional[str] = None
304
+ event_id: Optional[str] = None
305
+
306
+
307
+ #
308
+ # client events
309
+ #
310
+
311
+
312
+ class ClientEvent(BaseModel):
313
+ """Base class for client events sent to the realtime API.
314
+
315
+ Parameters:
316
+ event_id: Unique identifier for the event, auto-generated if not provided.
317
+ """
318
+
319
+ event_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
320
+
321
+
322
+ class SessionUpdateEvent(ClientEvent):
323
+ """Event to update session properties.
324
+
325
+ Parameters:
326
+ type: Event type, always "session.update".
327
+ session: Updated session properties.
328
+ """
329
+
330
+ type: Literal["session.update"] = "session.update"
331
+ session: SessionProperties
332
+
333
+ def model_dump(self, *args, **kwargs) -> Dict[str, Any]:
334
+ """Serialize the event to a dictionary.
335
+
336
+ Handles special serialization for turn_detection where False becomes null.
337
+
338
+ Args:
339
+ *args: Positional arguments passed to parent model_dump.
340
+ **kwargs: Keyword arguments passed to parent model_dump.
341
+
342
+ Returns:
343
+ Dictionary representation of the event.
344
+ """
345
+ dump = super().model_dump(*args, **kwargs)
346
+
347
+ # Handle turn_detection in audio.input so that False becomes null
348
+ if "audio" in dump["session"] and dump["session"]["audio"]:
349
+ if "input" in dump["session"]["audio"] and dump["session"]["audio"]["input"]:
350
+ if "turn_detection" in dump["session"]["audio"]["input"]:
351
+ if dump["session"]["audio"]["input"]["turn_detection"] is False:
352
+ dump["session"]["audio"]["input"]["turn_detection"] = None
353
+
354
+ return dump
355
+
356
+
357
+ class InputAudioBufferAppendEvent(ClientEvent):
358
+ """Event to append audio data to the input buffer.
359
+
360
+ Parameters:
361
+ type: Event type, always "input_audio_buffer.append".
362
+ audio: Base64-encoded audio data to append.
363
+ """
364
+
365
+ type: Literal["input_audio_buffer.append"] = "input_audio_buffer.append"
366
+ audio: str # base64-encoded audio
367
+
368
+
369
+ class InputAudioBufferCommitEvent(ClientEvent):
370
+ """Event to commit the current input audio buffer.
371
+
372
+ Parameters:
373
+ type: Event type, always "input_audio_buffer.commit".
374
+ """
375
+
376
+ type: Literal["input_audio_buffer.commit"] = "input_audio_buffer.commit"
377
+
378
+
379
+ class InputAudioBufferClearEvent(ClientEvent):
380
+ """Event to clear the input audio buffer.
381
+
382
+ Parameters:
383
+ type: Event type, always "input_audio_buffer.clear".
384
+ """
385
+
386
+ type: Literal["input_audio_buffer.clear"] = "input_audio_buffer.clear"
387
+
388
+
389
+ class ConversationItemCreateEvent(ClientEvent):
390
+ """Event to create a new conversation item.
391
+
392
+ Parameters:
393
+ type: Event type, always "conversation.item.create".
394
+ previous_item_id: ID of the item to insert after, if any.
395
+ item: The conversation item to create.
396
+ """
397
+
398
+ type: Literal["conversation.item.create"] = "conversation.item.create"
399
+ previous_item_id: Optional[str] = None
400
+ item: ConversationItem
401
+
402
+
403
+ class ConversationItemTruncateEvent(ClientEvent):
404
+ """Event to truncate a conversation item's audio content.
405
+
406
+ Parameters:
407
+ type: Event type, always "conversation.item.truncate".
408
+ item_id: ID of the item to truncate.
409
+ content_index: Index of the content to truncate within the item.
410
+ audio_end_ms: End time in milliseconds for the truncated audio.
411
+ """
412
+
413
+ type: Literal["conversation.item.truncate"] = "conversation.item.truncate"
414
+ item_id: str
415
+ content_index: int
416
+ audio_end_ms: int
417
+
418
+
419
+ class ConversationItemDeleteEvent(ClientEvent):
420
+ """Event to delete a conversation item.
421
+
422
+ Parameters:
423
+ type: Event type, always "conversation.item.delete".
424
+ item_id: ID of the item to delete.
425
+ """
426
+
427
+ type: Literal["conversation.item.delete"] = "conversation.item.delete"
428
+ item_id: str
429
+
430
+
431
+ class ConversationItemRetrieveEvent(ClientEvent):
432
+ """Event to retrieve a conversation item by ID.
433
+
434
+ Parameters:
435
+ type: Event type, always "conversation.item.retrieve".
436
+ item_id: ID of the item to retrieve.
437
+ """
438
+
439
+ type: Literal["conversation.item.retrieve"] = "conversation.item.retrieve"
440
+ item_id: str
441
+
442
+
443
+ class ResponseCreateEvent(ClientEvent):
444
+ """Event to create a new assistant response.
445
+
446
+ Parameters:
447
+ type: Event type, always "response.create".
448
+ response: Optional response configuration properties.
449
+ """
450
+
451
+ type: Literal["response.create"] = "response.create"
452
+ response: Optional[ResponseProperties] = None
453
+
454
+
455
+ class ResponseCancelEvent(ClientEvent):
456
+ """Event to cancel the current assistant response.
457
+
458
+ Parameters:
459
+ type: Event type, always "response.cancel".
460
+ """
461
+
462
+ type: Literal["response.cancel"] = "response.cancel"
463
+
464
+
465
+ #
466
+ # server events
467
+ #
468
+
469
+
470
+ class ServerEvent(BaseModel):
471
+ """Base class for server events received from the realtime API.
472
+
473
+ Parameters:
474
+ event_id: Unique identifier for the event.
475
+ type: Type of the server event.
476
+ """
477
+
478
+ model_config = ConfigDict(arbitrary_types_allowed=True)
479
+
480
+ event_id: str
481
+ type: str
482
+
483
+
484
+ class SessionCreatedEvent(ServerEvent):
485
+ """Event indicating a session has been created.
486
+
487
+ Parameters:
488
+ type: Event type, always "session.created".
489
+ session: The created session properties.
490
+ """
491
+
492
+ type: Literal["session.created"]
493
+ session: SessionProperties
494
+
495
+
496
+ class SessionUpdatedEvent(ServerEvent):
497
+ """Event indicating a session has been updated.
498
+
499
+ Parameters:
500
+ type: Event type, always "session.updated".
501
+ session: The updated session properties.
502
+ """
503
+
504
+ type: Literal["session.updated"]
505
+ session: SessionProperties
506
+
507
+
508
+ class ConversationCreated(ServerEvent):
509
+ """Event indicating a conversation has been created.
510
+
511
+ Parameters:
512
+ type: Event type, always "conversation.created".
513
+ conversation: The created conversation.
514
+ """
515
+
516
+ type: Literal["conversation.created"]
517
+ conversation: RealtimeConversation
518
+
519
+
520
+ class ConversationItemAdded(ServerEvent):
521
+ """Event indicating a conversation item has been added.
522
+
523
+ Parameters:
524
+ type: Event type, always "conversation.item.added".
525
+ previous_item_id: ID of the previous item, if any.
526
+ item: The added conversation item.
527
+ """
528
+
529
+ type: Literal["conversation.item.added"]
530
+ previous_item_id: Optional[str] = None
531
+ item: ConversationItem
532
+
533
+
534
+ class ConversationItemDone(ServerEvent):
535
+ """Event indicating a conversation item is done processing.
536
+
537
+ Parameters:
538
+ type: Event type, always "conversation.item.done".
539
+ previous_item_id: ID of the previous item, if any.
540
+ item: The completed conversation item.
541
+ """
542
+
543
+ type: Literal["conversation.item.done"]
544
+ previous_item_id: Optional[str] = None
545
+ item: ConversationItem
546
+
547
+
548
+ class ConversationItemInputAudioTranscriptionDelta(ServerEvent):
549
+ """Event containing incremental input audio transcription.
550
+
551
+ Parameters:
552
+ type: Event type, always "conversation.item.input_audio_transcription.delta".
553
+ item_id: ID of the conversation item being transcribed.
554
+ content_index: Index of the content within the item.
555
+ delta: Incremental transcription text.
556
+ """
557
+
558
+ type: Literal["conversation.item.input_audio_transcription.delta"]
559
+ item_id: str
560
+ content_index: int
561
+ delta: str
562
+
563
+
564
+ class ConversationItemInputAudioTranscriptionCompleted(ServerEvent):
565
+ """Event indicating input audio transcription is complete.
566
+
567
+ Parameters:
568
+ type: Event type, always "conversation.item.input_audio_transcription.completed".
569
+ item_id: ID of the conversation item that was transcribed.
570
+ content_index: Index of the content within the item.
571
+ transcript: Complete transcription text.
572
+ """
573
+
574
+ type: Literal["conversation.item.input_audio_transcription.completed"]
575
+ item_id: str
576
+ content_index: int
577
+ transcript: str
578
+
579
+
580
+ class ConversationItemInputAudioTranscriptionFailed(ServerEvent):
581
+ """Event indicating input audio transcription failed.
582
+
583
+ Parameters:
584
+ type: Event type, always "conversation.item.input_audio_transcription.failed".
585
+ item_id: ID of the conversation item that failed transcription.
586
+ content_index: Index of the content within the item.
587
+ error: Error details for the transcription failure.
588
+ """
589
+
590
+ type: Literal["conversation.item.input_audio_transcription.failed"]
591
+ item_id: str
592
+ content_index: int
593
+ error: RealtimeError
594
+
595
+
596
+ class ConversationItemTruncated(ServerEvent):
597
+ """Event indicating a conversation item has been truncated.
598
+
599
+ Parameters:
600
+ type: Event type, always "conversation.item.truncated".
601
+ item_id: ID of the truncated conversation item.
602
+ content_index: Index of the content within the item.
603
+ audio_end_ms: End time in milliseconds for the truncated audio.
604
+ """
605
+
606
+ type: Literal["conversation.item.truncated"]
607
+ item_id: str
608
+ content_index: int
609
+ audio_end_ms: int
610
+
611
+
612
+ class ConversationItemDeleted(ServerEvent):
613
+ """Event indicating a conversation item has been deleted.
614
+
615
+ Parameters:
616
+ type: Event type, always "conversation.item.deleted".
617
+ item_id: ID of the deleted conversation item.
618
+ """
619
+
620
+ type: Literal["conversation.item.deleted"]
621
+ item_id: str
622
+
623
+
624
+ class ConversationItemRetrieved(ServerEvent):
625
+ """Event containing a retrieved conversation item.
626
+
627
+ Parameters:
628
+ type: Event type, always "conversation.item.retrieved".
629
+ item: The retrieved conversation item.
630
+ """
631
+
632
+ type: Literal["conversation.item.retrieved"]
633
+ item: ConversationItem
634
+
635
+
636
+ class ResponseCreated(ServerEvent):
637
+ """Event indicating an assistant response has been created.
638
+
639
+ Parameters:
640
+ type: Event type, always "response.created".
641
+ response: The created response object.
642
+ """
643
+
644
+ type: Literal["response.created"]
645
+ response: "Response"
646
+
647
+
648
+ class ResponseDone(ServerEvent):
649
+ """Event indicating an assistant response is complete.
650
+
651
+ Parameters:
652
+ type: Event type, always "response.done".
653
+ response: The completed response object.
654
+ """
655
+
656
+ type: Literal["response.done"]
657
+ response: "Response"
658
+
659
+
660
+ class ResponseOutputItemAdded(ServerEvent):
661
+ """Event indicating an output item has been added to a response.
662
+
663
+ Parameters:
664
+ type: Event type, always "response.output_item.added".
665
+ response_id: ID of the response.
666
+ output_index: Index of the output item.
667
+ item: The added conversation item.
668
+ """
669
+
670
+ type: Literal["response.output_item.added"]
671
+ response_id: str
672
+ output_index: int
673
+ item: ConversationItem
674
+
675
+
676
+ class ResponseOutputItemDone(ServerEvent):
677
+ """Event indicating an output item is complete.
678
+
679
+ Parameters:
680
+ type: Event type, always "response.output_item.done".
681
+ response_id: ID of the response.
682
+ output_index: Index of the output item.
683
+ item: The completed conversation item.
684
+ """
685
+
686
+ type: Literal["response.output_item.done"]
687
+ response_id: str
688
+ output_index: int
689
+ item: ConversationItem
690
+
691
+
692
+ class ResponseContentPartAdded(ServerEvent):
693
+ """Event indicating a content part has been added to a response.
694
+
695
+ Parameters:
696
+ type: Event type, always "response.content_part.added".
697
+ response_id: ID of the response.
698
+ item_id: ID of the conversation item.
699
+ output_index: Index of the output item.
700
+ content_index: Index of the content part.
701
+ part: The added content part.
702
+ """
703
+
704
+ type: Literal["response.content_part.added"]
705
+ response_id: str
706
+ item_id: str
707
+ output_index: int
708
+ content_index: int
709
+ part: ItemContent
710
+
711
+
712
+ class ResponseContentPartDone(ServerEvent):
713
+ """Event indicating a content part is complete.
714
+
715
+ Parameters:
716
+ type: Event type, always "response.content_part.done".
717
+ response_id: ID of the response.
718
+ item_id: ID of the conversation item.
719
+ output_index: Index of the output item.
720
+ content_index: Index of the content part.
721
+ part: The completed content part.
722
+ """
723
+
724
+ type: Literal["response.content_part.done"]
725
+ response_id: str
726
+ item_id: str
727
+ output_index: int
728
+ content_index: int
729
+ part: ItemContent
730
+
731
+
732
+ class ResponseTextDelta(ServerEvent):
733
+ """Event containing incremental text from a response.
734
+
735
+ Parameters:
736
+ type: Event type, always "response.output_text.delta".
737
+ response_id: ID of the response.
738
+ item_id: ID of the conversation item.
739
+ output_index: Index of the output item.
740
+ content_index: Index of the content part.
741
+ delta: Incremental text content.
742
+ """
743
+
744
+ type: Literal["response.output_text.delta"]
745
+ response_id: str
746
+ item_id: str
747
+ output_index: int
748
+ content_index: int
749
+ delta: str
750
+
751
+
752
+ class ResponseTextDone(ServerEvent):
753
+ """Event indicating text content is complete.
754
+
755
+ Parameters:
756
+ type: Event type, always "response.output_text.done".
757
+ response_id: ID of the response.
758
+ item_id: ID of the conversation item.
759
+ output_index: Index of the output item.
760
+ content_index: Index of the content part.
761
+ text: Complete text content.
762
+ """
763
+
764
+ type: Literal["response.output_text.done"]
765
+ response_id: str
766
+ item_id: str
767
+ output_index: int
768
+ content_index: int
769
+ text: str
770
+
771
+
772
+ class ResponseAudioTranscriptDelta(ServerEvent):
773
+ """Event containing incremental audio transcript from a response.
774
+
775
+ Parameters:
776
+ type: Event type, always "response.output_audio_transcript.delta".
777
+ response_id: ID of the response.
778
+ item_id: ID of the conversation item.
779
+ output_index: Index of the output item.
780
+ content_index: Index of the content part.
781
+ delta: Incremental transcript text.
782
+ """
783
+
784
+ type: Literal["response.output_audio_transcript.delta"]
785
+ response_id: str
786
+ item_id: str
787
+ output_index: int
788
+ content_index: int
789
+ delta: str
790
+
791
+
792
+ class ResponseAudioTranscriptDone(ServerEvent):
793
+ """Event indicating audio transcript is complete.
794
+
795
+ Parameters:
796
+ type: Event type, always "response.output_audio_transcript.done".
797
+ response_id: ID of the response.
798
+ item_id: ID of the conversation item.
799
+ output_index: Index of the output item.
800
+ content_index: Index of the content part.
801
+ transcript: Complete transcript text.
802
+ """
803
+
804
+ type: Literal["response.output_audio_transcript.done"]
805
+ response_id: str
806
+ item_id: str
807
+ output_index: int
808
+ content_index: int
809
+ transcript: str
810
+
811
+
812
+ class ResponseAudioDelta(ServerEvent):
813
+ """Event containing incremental audio data from a response.
814
+
815
+ Parameters:
816
+ type: Event type, always "response.output_audio.delta".
817
+ response_id: ID of the response.
818
+ item_id: ID of the conversation item.
819
+ output_index: Index of the output item.
820
+ content_index: Index of the content part.
821
+ delta: Base64-encoded incremental audio data.
822
+ """
823
+
824
+ type: Literal["response.output_audio.delta"]
825
+ response_id: str
826
+ item_id: str
827
+ output_index: int
828
+ content_index: int
829
+ delta: str # base64-encoded audio
830
+
831
+
832
+ class ResponseAudioDone(ServerEvent):
833
+ """Event indicating audio content is complete.
834
+
835
+ Parameters:
836
+ type: Event type, always "response.output_audio.done".
837
+ response_id: ID of the response.
838
+ item_id: ID of the conversation item.
839
+ output_index: Index of the output item.
840
+ content_index: Index of the content part.
841
+ """
842
+
843
+ type: Literal["response.output_audio.done"]
844
+ response_id: str
845
+ item_id: str
846
+ output_index: int
847
+ content_index: int
848
+
849
+
850
+ class ResponseFunctionCallArgumentsDelta(ServerEvent):
851
+ """Event containing incremental function call arguments.
852
+
853
+ Parameters:
854
+ type: Event type, always "response.function_call_arguments.delta".
855
+ response_id: ID of the response.
856
+ item_id: ID of the conversation item.
857
+ output_index: Index of the output item.
858
+ call_id: ID of the function call.
859
+ delta: Incremental function arguments as JSON.
860
+ """
861
+
862
+ type: Literal["response.function_call_arguments.delta"]
863
+ response_id: str
864
+ item_id: str
865
+ output_index: int
866
+ call_id: str
867
+ delta: str
868
+
869
+
870
+ class ResponseFunctionCallArgumentsDone(ServerEvent):
871
+ """Event indicating function call arguments are complete.
872
+
873
+ Parameters:
874
+ type: Event type, always "response.function_call_arguments.done".
875
+ response_id: ID of the response.
876
+ item_id: ID of the conversation item.
877
+ output_index: Index of the output item.
878
+ call_id: ID of the function call.
879
+ arguments: Complete function arguments as JSON string.
880
+ """
881
+
882
+ type: Literal["response.function_call_arguments.done"]
883
+ response_id: str
884
+ item_id: str
885
+ output_index: int
886
+ call_id: str
887
+ arguments: str
888
+
889
+
890
+ class InputAudioBufferSpeechStarted(ServerEvent):
891
+ """Event indicating speech has started in the input audio buffer.
892
+
893
+ Parameters:
894
+ type: Event type, always "input_audio_buffer.speech_started".
895
+ audio_start_ms: Start time of speech in milliseconds.
896
+ item_id: ID of the associated conversation item.
897
+ """
898
+
899
+ type: Literal["input_audio_buffer.speech_started"]
900
+ audio_start_ms: int
901
+ item_id: str
902
+
903
+
904
+ class InputAudioBufferSpeechStopped(ServerEvent):
905
+ """Event indicating speech has stopped in the input audio buffer.
906
+
907
+ Parameters:
908
+ type: Event type, always "input_audio_buffer.speech_stopped".
909
+ audio_end_ms: End time of speech in milliseconds.
910
+ item_id: ID of the associated conversation item.
911
+ """
912
+
913
+ type: Literal["input_audio_buffer.speech_stopped"]
914
+ audio_end_ms: int
915
+ item_id: str
916
+
917
+
918
+ class InputAudioBufferCommitted(ServerEvent):
919
+ """Event indicating the input audio buffer has been committed.
920
+
921
+ Parameters:
922
+ type: Event type, always "input_audio_buffer.committed".
923
+ previous_item_id: ID of the previous item, if any.
924
+ item_id: ID of the committed conversation item.
925
+ """
926
+
927
+ type: Literal["input_audio_buffer.committed"]
928
+ previous_item_id: Optional[str] = None
929
+ item_id: str
930
+
931
+
932
+ class InputAudioBufferCleared(ServerEvent):
933
+ """Event indicating the input audio buffer has been cleared.
934
+
935
+ Parameters:
936
+ type: Event type, always "input_audio_buffer.cleared".
937
+ """
938
+
939
+ type: Literal["input_audio_buffer.cleared"]
940
+
941
+
942
+ class ErrorEvent(ServerEvent):
943
+ """Event indicating an error occurred.
944
+
945
+ Parameters:
946
+ type: Event type, always "error".
947
+ error: Error details.
948
+ """
949
+
950
+ type: Literal["error"]
951
+ error: RealtimeError
952
+
953
+
954
+ class RateLimitsUpdated(ServerEvent):
955
+ """Event indicating rate limits have been updated.
956
+
957
+ Parameters:
958
+ type: Event type, always "rate_limits.updated".
959
+ rate_limits: List of rate limit information.
960
+ """
961
+
962
+ type: Literal["rate_limits.updated"]
963
+ rate_limits: List[Dict[str, Any]]
964
+
965
+
966
+ class CachedTokensDetails(BaseModel):
967
+ """Details about cached tokens.
968
+
969
+ Parameters:
970
+ text_tokens: Number of cached text tokens.
971
+ audio_tokens: Number of cached audio tokens.
972
+ """
973
+
974
+ text_tokens: Optional[int] = 0
975
+ audio_tokens: Optional[int] = 0
976
+
977
+
978
+ class TokenDetails(BaseModel):
979
+ """Detailed token usage information.
980
+
981
+ Parameters:
982
+ cached_tokens: Number of cached tokens used. Defaults to 0.
983
+ text_tokens: Number of text tokens used. Defaults to 0.
984
+ audio_tokens: Number of audio tokens used. Defaults to 0.
985
+ cached_tokens_details: Detailed breakdown of cached tokens.
986
+ image_tokens: Number of image tokens used (for input only).
987
+ """
988
+
989
+ cached_tokens: Optional[int] = 0
990
+ text_tokens: Optional[int] = 0
991
+ audio_tokens: Optional[int] = 0
992
+ cached_tokens_details: Optional[CachedTokensDetails] = None
993
+ image_tokens: Optional[int] = 0
994
+
995
+ class Config:
996
+ """Pydantic configuration for TokenDetails."""
997
+
998
+ extra = "allow"
999
+
1000
+
1001
+ class Usage(BaseModel):
1002
+ """Token usage statistics for a response.
1003
+
1004
+ Parameters:
1005
+ total_tokens: Total number of tokens used.
1006
+ input_tokens: Number of input tokens used.
1007
+ output_tokens: Number of output tokens used.
1008
+ input_token_details: Detailed breakdown of input token usage.
1009
+ output_token_details: Detailed breakdown of output token usage.
1010
+ """
1011
+
1012
+ total_tokens: int
1013
+ input_tokens: int
1014
+ output_tokens: int
1015
+ input_token_details: TokenDetails
1016
+ output_token_details: TokenDetails
1017
+
1018
+
1019
+ class Response(BaseModel):
1020
+ """A complete assistant response.
1021
+
1022
+ Parameters:
1023
+ id: Unique identifier for the response.
1024
+ object: Object type, always "realtime.response".
1025
+ status: Current status of the response.
1026
+ status_details: Additional status information.
1027
+ output: List of conversation items in the response.
1028
+ conversation_id: Which conversation the response is added to.
1029
+ output_modalities: The set of modalities the model used to respond.
1030
+ max_output_tokens: Maximum number of output tokens used.
1031
+ audio: Audio configuration for the response.
1032
+ usage: Token usage statistics for the response.
1033
+ voice: The voice the model used to respond.
1034
+ temperature: Sampling temperature used for the response.
1035
+ output_audio_format: The format of output audio.
1036
+ """
1037
+
1038
+ id: str
1039
+ object: Literal["realtime.response"]
1040
+ status: Literal["completed", "in_progress", "incomplete", "cancelled", "failed"]
1041
+ status_details: Any
1042
+ output: List[ConversationItem]
1043
+ output_modalities: Optional[List[Literal["text", "audio"]]] = None
1044
+ max_output_tokens: Optional[Union[int, Literal["inf"]]] = None
1045
+ audio: Optional[AudioConfiguration] = None
1046
+ usage: Optional[Usage] = None
1047
+ voice: Optional[str] = None
1048
+ temperature: Optional[float] = None
1049
+ output_audio_format: Optional[str] = None
1050
+
1051
+
1052
+ _server_event_types = {
1053
+ "error": ErrorEvent,
1054
+ "session.created": SessionCreatedEvent,
1055
+ "session.updated": SessionUpdatedEvent,
1056
+ "conversation.created": ConversationCreated,
1057
+ "input_audio_buffer.committed": InputAudioBufferCommitted,
1058
+ "input_audio_buffer.cleared": InputAudioBufferCleared,
1059
+ "input_audio_buffer.speech_started": InputAudioBufferSpeechStarted,
1060
+ "input_audio_buffer.speech_stopped": InputAudioBufferSpeechStopped,
1061
+ "conversation.item.added": ConversationItemAdded,
1062
+ "conversation.item.done": ConversationItemDone,
1063
+ "conversation.item.input_audio_transcription.delta": ConversationItemInputAudioTranscriptionDelta,
1064
+ "conversation.item.input_audio_transcription.completed": ConversationItemInputAudioTranscriptionCompleted,
1065
+ "conversation.item.input_audio_transcription.failed": ConversationItemInputAudioTranscriptionFailed,
1066
+ "conversation.item.truncated": ConversationItemTruncated,
1067
+ "conversation.item.deleted": ConversationItemDeleted,
1068
+ "conversation.item.retrieved": ConversationItemRetrieved,
1069
+ "response.created": ResponseCreated,
1070
+ "response.done": ResponseDone,
1071
+ "response.output_item.added": ResponseOutputItemAdded,
1072
+ "response.output_item.done": ResponseOutputItemDone,
1073
+ "response.content_part.added": ResponseContentPartAdded,
1074
+ "response.content_part.done": ResponseContentPartDone,
1075
+ "response.output_text.delta": ResponseTextDelta,
1076
+ "response.output_text.done": ResponseTextDone,
1077
+ "response.output_audio_transcript.delta": ResponseAudioTranscriptDelta,
1078
+ "response.output_audio_transcript.done": ResponseAudioTranscriptDone,
1079
+ "response.output_audio.delta": ResponseAudioDelta,
1080
+ "response.output_audio.done": ResponseAudioDone,
1081
+ "response.function_call_arguments.delta": ResponseFunctionCallArgumentsDelta,
1082
+ "response.function_call_arguments.done": ResponseFunctionCallArgumentsDone,
1083
+ "rate_limits.updated": RateLimitsUpdated,
1084
+ }
1085
+
1086
+
1087
+ def parse_server_event(str):
1088
+ """Parse a server event from JSON string.
1089
+
1090
+ Args:
1091
+ str: JSON string containing the server event.
1092
+
1093
+ Returns:
1094
+ Parsed server event object of the appropriate type.
1095
+
1096
+ Raises:
1097
+ Exception: If the event type is unimplemented or parsing fails.
1098
+ """
1099
+ try:
1100
+ event = json.loads(str)
1101
+ event_type = event["type"]
1102
+ if event_type not in _server_event_types:
1103
+ raise Exception(f"Unimplemented server event type: {event_type}")
1104
+ return _server_event_types[event_type].model_validate(event)
1105
+ except Exception as e:
1106
+ raise Exception(f"{e} \n\n{str}")