PraisonAI 2.0.61__cp313-cp313-manylinux_2_39_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of PraisonAI might be problematic. Click here for more details.

Files changed (89) hide show
  1. praisonai/__init__.py +6 -0
  2. praisonai/__main__.py +10 -0
  3. praisonai/agents_generator.py +648 -0
  4. praisonai/api/call.py +292 -0
  5. praisonai/auto.py +238 -0
  6. praisonai/chainlit_ui.py +304 -0
  7. praisonai/cli.py +518 -0
  8. praisonai/deploy.py +138 -0
  9. praisonai/inbuilt_tools/__init__.py +24 -0
  10. praisonai/inbuilt_tools/autogen_tools.py +117 -0
  11. praisonai/inc/__init__.py +2 -0
  12. praisonai/inc/config.py +96 -0
  13. praisonai/inc/models.py +128 -0
  14. praisonai/public/android-chrome-192x192.png +0 -0
  15. praisonai/public/android-chrome-512x512.png +0 -0
  16. praisonai/public/apple-touch-icon.png +0 -0
  17. praisonai/public/fantasy.svg +3 -0
  18. praisonai/public/favicon-16x16.png +0 -0
  19. praisonai/public/favicon-32x32.png +0 -0
  20. praisonai/public/favicon.ico +0 -0
  21. praisonai/public/game.svg +3 -0
  22. praisonai/public/logo_dark.png +0 -0
  23. praisonai/public/logo_light.png +0 -0
  24. praisonai/public/movie.svg +3 -0
  25. praisonai/public/praison-ai-agents-architecture-dark.png +0 -0
  26. praisonai/public/praison-ai-agents-architecture.png +0 -0
  27. praisonai/public/thriller.svg +3 -0
  28. praisonai/setup/__init__.py +1 -0
  29. praisonai/setup/build.py +21 -0
  30. praisonai/setup/config.yaml +60 -0
  31. praisonai/setup/post_install.py +23 -0
  32. praisonai/setup/setup_conda_env.py +25 -0
  33. praisonai/setup/setup_conda_env.sh +72 -0
  34. praisonai/setup.py +16 -0
  35. praisonai/test.py +105 -0
  36. praisonai/train.py +276 -0
  37. praisonai/ui/README.md +21 -0
  38. praisonai/ui/agents.py +822 -0
  39. praisonai/ui/callbacks.py +57 -0
  40. praisonai/ui/chat.py +387 -0
  41. praisonai/ui/code.py +440 -0
  42. praisonai/ui/colab.py +474 -0
  43. praisonai/ui/colab_chainlit.py +81 -0
  44. praisonai/ui/components/aicoder.py +269 -0
  45. praisonai/ui/config/.chainlit/config.toml +120 -0
  46. praisonai/ui/config/.chainlit/translations/bn.json +231 -0
  47. praisonai/ui/config/.chainlit/translations/en-US.json +229 -0
  48. praisonai/ui/config/.chainlit/translations/gu.json +231 -0
  49. praisonai/ui/config/.chainlit/translations/he-IL.json +231 -0
  50. praisonai/ui/config/.chainlit/translations/hi.json +231 -0
  51. praisonai/ui/config/.chainlit/translations/kn.json +231 -0
  52. praisonai/ui/config/.chainlit/translations/ml.json +231 -0
  53. praisonai/ui/config/.chainlit/translations/mr.json +231 -0
  54. praisonai/ui/config/.chainlit/translations/ta.json +231 -0
  55. praisonai/ui/config/.chainlit/translations/te.json +231 -0
  56. praisonai/ui/config/.chainlit/translations/zh-CN.json +229 -0
  57. praisonai/ui/config/chainlit.md +1 -0
  58. praisonai/ui/config/translations/bn.json +231 -0
  59. praisonai/ui/config/translations/en-US.json +229 -0
  60. praisonai/ui/config/translations/gu.json +231 -0
  61. praisonai/ui/config/translations/he-IL.json +231 -0
  62. praisonai/ui/config/translations/hi.json +231 -0
  63. praisonai/ui/config/translations/kn.json +231 -0
  64. praisonai/ui/config/translations/ml.json +231 -0
  65. praisonai/ui/config/translations/mr.json +231 -0
  66. praisonai/ui/config/translations/ta.json +231 -0
  67. praisonai/ui/config/translations/te.json +231 -0
  68. praisonai/ui/config/translations/zh-CN.json +229 -0
  69. praisonai/ui/context.py +283 -0
  70. praisonai/ui/db.py +291 -0
  71. praisonai/ui/public/fantasy.svg +3 -0
  72. praisonai/ui/public/game.svg +3 -0
  73. praisonai/ui/public/logo_dark.png +0 -0
  74. praisonai/ui/public/logo_light.png +0 -0
  75. praisonai/ui/public/movie.svg +3 -0
  76. praisonai/ui/public/praison.css +3 -0
  77. praisonai/ui/public/thriller.svg +3 -0
  78. praisonai/ui/realtime.py +476 -0
  79. praisonai/ui/realtimeclient/__init__.py +653 -0
  80. praisonai/ui/realtimeclient/realtimedocs.txt +1484 -0
  81. praisonai/ui/realtimeclient/tools.py +236 -0
  82. praisonai/ui/sql_alchemy.py +707 -0
  83. praisonai/ui/tools.md +133 -0
  84. praisonai/version.py +1 -0
  85. praisonai-2.0.61.dist-info/LICENSE +20 -0
  86. praisonai-2.0.61.dist-info/METADATA +679 -0
  87. praisonai-2.0.61.dist-info/RECORD +89 -0
  88. praisonai-2.0.61.dist-info/WHEEL +4 -0
  89. praisonai-2.0.61.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,653 @@
1
+ # Derived from https://github.com/openai/openai-realtime-console. Will integrate with Chainlit when more mature.
2
+
3
+ import os
4
+ import asyncio
5
+ import inspect
6
+ import numpy as np
7
+ import json
8
+ import websockets
9
+ from datetime import datetime
10
+ from collections import defaultdict
11
+ import base64
12
+
13
+ from chainlit.logger import logger
14
+ from chainlit.config import config
15
+
16
+
17
+ def float_to_16bit_pcm(float32_array):
18
+ """
19
+ Converts a numpy array of float32 amplitude data to a numpy array in int16 format.
20
+ :param float32_array: numpy array of float32
21
+ :return: numpy array of int16
22
+ """
23
+ int16_array = np.clip(float32_array, -1, 1) * 32767
24
+ return int16_array.astype(np.int16)
25
+
26
+ def base64_to_array_buffer(base64_string):
27
+ """
28
+ Converts a base64 string to a numpy array buffer.
29
+ :param base64_string: base64 encoded string
30
+ :return: numpy array of uint8
31
+ """
32
+ binary_data = base64.b64decode(base64_string)
33
+ return np.frombuffer(binary_data, dtype=np.uint8)
34
+
35
+ def array_buffer_to_base64(array_buffer):
36
+ """
37
+ Converts a numpy array buffer to a base64 string.
38
+ :param array_buffer: numpy array
39
+ :return: base64 encoded string
40
+ """
41
+ if array_buffer.dtype == np.float32:
42
+ array_buffer = float_to_16bit_pcm(array_buffer)
43
+ elif array_buffer.dtype == np.int16:
44
+ array_buffer = array_buffer.tobytes()
45
+ else:
46
+ array_buffer = array_buffer.tobytes()
47
+
48
+ return base64.b64encode(array_buffer).decode('utf-8')
49
+
50
+ def merge_int16_arrays(left, right):
51
+ """
52
+ Merge two numpy arrays of int16.
53
+ :param left: numpy array of int16
54
+ :param right: numpy array of int16
55
+ :return: merged numpy array of int16
56
+ """
57
+ if isinstance(left, np.ndarray) and left.dtype == np.int16 and isinstance(right, np.ndarray) and right.dtype == np.int16:
58
+ return np.concatenate((left, right))
59
+ else:
60
+ raise ValueError("Both items must be numpy arrays of int16")
61
+
62
+
63
+ class RealtimeEventHandler:
64
+ def __init__(self):
65
+ self.event_handlers = defaultdict(list)
66
+
67
+ def on(self, event_name, handler):
68
+ self.event_handlers[event_name].append(handler)
69
+
70
+ def clear_event_handlers(self):
71
+ self.event_handlers = defaultdict(list)
72
+
73
+ def dispatch(self, event_name, event):
74
+ for handler in self.event_handlers[event_name]:
75
+ if inspect.iscoroutinefunction(handler):
76
+ asyncio.create_task(handler(event))
77
+ else:
78
+ handler(event)
79
+
80
+ async def wait_for_next(self, event_name):
81
+ future = asyncio.Future()
82
+
83
+ def handler(event):
84
+ if not future.done():
85
+ future.set_result(event)
86
+
87
+ self.on(event_name, handler)
88
+ return await future
89
+
90
+
91
+ class RealtimeAPI(RealtimeEventHandler):
92
+ def __init__(self, url=None, api_key=None):
93
+ super().__init__()
94
+ self.default_url = 'wss://api.openai.com/v1/realtime'
95
+ self.url = url or self.default_url
96
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
97
+ self.ws = None
98
+
99
+ def is_connected(self):
100
+ return self.ws is not None
101
+
102
+ def log(self, *args):
103
+ logger.debug(f"[Websocket/{datetime.utcnow().isoformat()}]", *args)
104
+
105
+ async def connect(self, model='gpt-4o-realtime-preview-2024-10-01'):
106
+ if self.is_connected():
107
+ raise Exception("Already connected")
108
+ self.ws = await websockets.connect(f"{self.url}?model={model}", extra_headers={
109
+ 'Authorization': f'Bearer {self.api_key}',
110
+ 'OpenAI-Beta': 'realtime=v1'
111
+ })
112
+ self.log(f"Connected to {self.url}")
113
+ asyncio.create_task(self._receive_messages())
114
+
115
+ async def _receive_messages(self):
116
+ async for message in self.ws:
117
+ event = json.loads(message)
118
+ if event['type'] == "error":
119
+ logger.error("ERROR", event)
120
+ self.log("received:", event)
121
+ self.dispatch(f"server.{event['type']}", event)
122
+ self.dispatch("server.*", event)
123
+
124
+ async def send(self, event_name, data=None):
125
+ if not self.is_connected():
126
+ raise Exception("RealtimeAPI is not connected")
127
+ data = data or {}
128
+ if not isinstance(data, dict):
129
+ raise Exception("data must be a dictionary")
130
+ event = {
131
+ "event_id": self._generate_id("evt_"),
132
+ "type": event_name,
133
+ **data
134
+ }
135
+ self.dispatch(f"client.{event_name}", event)
136
+ self.dispatch("client.*", event)
137
+ self.log("sent:", event)
138
+ await self.ws.send(json.dumps(event))
139
+
140
+ def _generate_id(self, prefix):
141
+ return f"{prefix}{int(datetime.utcnow().timestamp() * 1000)}"
142
+
143
+ async def disconnect(self):
144
+ if self.ws:
145
+ await self.ws.close()
146
+ self.ws = None
147
+ self.log(f"Disconnected from {self.url}")
148
+
149
+ class RealtimeConversation:
150
+ default_frequency = config.features.audio.sample_rate
151
+
152
+ EventProcessors = {
153
+ 'conversation.item.created': lambda self, event: self._process_item_created(event),
154
+ 'conversation.item.truncated': lambda self, event: self._process_item_truncated(event),
155
+ 'conversation.item.deleted': lambda self, event: self._process_item_deleted(event),
156
+ 'conversation.item.input_audio_transcription.completed': lambda self, event: self._process_input_audio_transcription_completed(event),
157
+ 'input_audio_buffer.speech_started': lambda self, event: self._process_speech_started(event),
158
+ 'input_audio_buffer.speech_stopped': lambda self, event, input_audio_buffer: self._process_speech_stopped(event, input_audio_buffer),
159
+ 'response.created': lambda self, event: self._process_response_created(event),
160
+ 'response.output_item.added': lambda self, event: self._process_output_item_added(event),
161
+ 'response.output_item.done': lambda self, event: self._process_output_item_done(event),
162
+ 'response.content_part.added': lambda self, event: self._process_content_part_added(event),
163
+ 'response.audio_transcript.delta': lambda self, event: self._process_audio_transcript_delta(event),
164
+ 'response.audio.delta': lambda self, event: self._process_audio_delta(event),
165
+ 'response.text.delta': lambda self, event: self._process_text_delta(event),
166
+ 'response.function_call_arguments.delta': lambda self, event: self._process_function_call_arguments_delta(event),
167
+ }
168
+
169
+ def __init__(self):
170
+ self.clear()
171
+
172
+ def clear(self):
173
+ self.item_lookup = {}
174
+ self.items = []
175
+ self.response_lookup = {}
176
+ self.responses = []
177
+ self.queued_speech_items = {}
178
+ self.queued_transcript_items = {}
179
+ self.queued_input_audio = None
180
+
181
+ def queue_input_audio(self, input_audio):
182
+ self.queued_input_audio = input_audio
183
+
184
+ def process_event(self, event, *args):
185
+ event_processor = self.EventProcessors.get(event['type'])
186
+ if not event_processor:
187
+ raise Exception(f"Missing conversation event processor for {event['type']}")
188
+ return event_processor(self, event, *args)
189
+
190
+ def get_item(self, id):
191
+ return self.item_lookup.get(id)
192
+
193
+ def get_items(self):
194
+ return self.items[:]
195
+
196
+ def _process_item_created(self, event):
197
+ item = event['item']
198
+ new_item = item.copy()
199
+ if new_item['id'] not in self.item_lookup:
200
+ self.item_lookup[new_item['id']] = new_item
201
+ self.items.append(new_item)
202
+ new_item['formatted'] = {
203
+ 'audio': [],
204
+ 'text': '',
205
+ 'transcript': ''
206
+ }
207
+ if new_item['id'] in self.queued_speech_items:
208
+ new_item['formatted']['audio'] = self.queued_speech_items[new_item['id']]['audio']
209
+ del self.queued_speech_items[new_item['id']]
210
+ if 'content' in new_item:
211
+ text_content = [c for c in new_item['content'] if c['type'] in ['text', 'input_text']]
212
+ for content in text_content:
213
+ new_item['formatted']['text'] += content['text']
214
+ if new_item['id'] in self.queued_transcript_items:
215
+ new_item['formatted']['transcript'] = self.queued_transcript_items[new_item['id']]['transcript']
216
+ del self.queued_transcript_items[new_item['id']]
217
+ if new_item['type'] == 'message':
218
+ if new_item['role'] == 'user':
219
+ new_item['status'] = 'completed'
220
+ if self.queued_input_audio:
221
+ new_item['formatted']['audio'] = self.queued_input_audio
222
+ self.queued_input_audio = None
223
+ else:
224
+ new_item['status'] = 'in_progress'
225
+ elif new_item['type'] == 'function_call':
226
+ new_item['formatted']['tool'] = {
227
+ 'type': 'function',
228
+ 'name': new_item['name'],
229
+ 'call_id': new_item['call_id'],
230
+ 'arguments': ''
231
+ }
232
+ new_item['status'] = 'in_progress'
233
+ elif new_item['type'] == 'function_call_output':
234
+ new_item['status'] = 'completed'
235
+ new_item['formatted']['output'] = new_item['output']
236
+ return new_item, None
237
+
238
+ def _process_item_truncated(self, event):
239
+ item_id = event['item_id']
240
+ audio_end_ms = event['audio_end_ms']
241
+ item = self.item_lookup.get(item_id)
242
+ if not item:
243
+ raise Exception(f'item.truncated: Item "{item_id}" not found')
244
+ end_index = (audio_end_ms * self.default_frequency) // 1000
245
+ item['formatted']['transcript'] = ''
246
+ item['formatted']['audio'] = item['formatted']['audio'][:end_index]
247
+ return item, None
248
+
249
+ def _process_item_deleted(self, event):
250
+ item_id = event['item_id']
251
+ item = self.item_lookup.get(item_id)
252
+ if not item:
253
+ raise Exception(f'item.deleted: Item "{item_id}" not found')
254
+ del self.item_lookup[item['id']]
255
+ self.items.remove(item)
256
+ return item, None
257
+
258
+ def _process_input_audio_transcription_completed(self, event):
259
+ item_id = event['item_id']
260
+ content_index = event['content_index']
261
+ transcript = event['transcript']
262
+ formatted_transcript = transcript or ' '
263
+ item = self.item_lookup.get(item_id)
264
+ if not item:
265
+ self.queued_transcript_items[item_id] = {'transcript': formatted_transcript}
266
+ return None, None
267
+ item['content'][content_index]['transcript'] = transcript
268
+ item['formatted']['transcript'] = formatted_transcript
269
+ return item, {'transcript': transcript}
270
+
271
+ def _process_speech_started(self, event):
272
+ item_id = event['item_id']
273
+ audio_start_ms = event['audio_start_ms']
274
+ self.queued_speech_items[item_id] = {'audio_start_ms': audio_start_ms}
275
+ return None, None
276
+
277
+ def _process_speech_stopped(self, event, input_audio_buffer):
278
+ item_id = event['item_id']
279
+ audio_end_ms = event['audio_end_ms']
280
+ speech = self.queued_speech_items[item_id]
281
+ speech['audio_end_ms'] = audio_end_ms
282
+ if input_audio_buffer:
283
+ start_index = (speech['audio_start_ms'] * self.default_frequency) // 1000
284
+ end_index = (speech['audio_end_ms'] * self.default_frequency) // 1000
285
+ speech['audio'] = input_audio_buffer[start_index:end_index]
286
+ return None, None
287
+
288
+ def _process_response_created(self, event):
289
+ response = event['response']
290
+ if response['id'] not in self.response_lookup:
291
+ self.response_lookup[response['id']] = response
292
+ self.responses.append(response)
293
+ return None, None
294
+
295
+ def _process_output_item_added(self, event):
296
+ response_id = event['response_id']
297
+ item = event['item']
298
+ response = self.response_lookup.get(response_id)
299
+ if not response:
300
+ raise Exception(f'response.output_item.added: Response "{response_id}" not found')
301
+ response['output'].append(item['id'])
302
+ return None, None
303
+
304
+ def _process_output_item_done(self, event):
305
+ item = event['item']
306
+ if not item:
307
+ raise Exception('response.output_item.done: Missing "item"')
308
+ found_item = self.item_lookup.get(item['id'])
309
+ if not found_item:
310
+ raise Exception(f'response.output_item.done: Item "{item["id"]}" not found')
311
+ found_item['status'] = item['status']
312
+ return found_item, None
313
+
314
+ def _process_content_part_added(self, event):
315
+ item_id = event['item_id']
316
+ part = event['part']
317
+ item = self.item_lookup.get(item_id)
318
+ if not item:
319
+ raise Exception(f'response.content_part.added: Item "{item_id}" not found')
320
+ item['content'].append(part)
321
+ return item, None
322
+
323
+ def _process_audio_transcript_delta(self, event):
324
+ item_id = event['item_id']
325
+ content_index = event['content_index']
326
+ delta = event['delta']
327
+ item = self.item_lookup.get(item_id)
328
+ if not item:
329
+ raise Exception(f'response.audio_transcript.delta: Item "{item_id}" not found')
330
+ item['content'][content_index]['transcript'] += delta
331
+ item['formatted']['transcript'] += delta
332
+ return item, {'transcript': delta}
333
+
334
+ def _process_audio_delta(self, event):
335
+ item_id = event['item_id']
336
+ content_index = event['content_index']
337
+ delta = event['delta']
338
+ item = self.item_lookup.get(item_id)
339
+ if not item:
340
+ logger.debug(f'response.audio.delta: Item "{item_id}" not found')
341
+ return None, None
342
+ array_buffer = base64_to_array_buffer(delta)
343
+ append_values = array_buffer.tobytes()
344
+ # TODO: make it work
345
+ # item['formatted']['audio'] = merge_int16_arrays(item['formatted']['audio'], append_values)
346
+ return item, {'audio': append_values}
347
+
348
+ def _process_text_delta(self, event):
349
+ item_id = event['item_id']
350
+ content_index = event['content_index']
351
+ delta = event['delta']
352
+ item = self.item_lookup.get(item_id)
353
+ if not item:
354
+ raise Exception(f'response.text.delta: Item "{item_id}" not found')
355
+ item['content'][content_index]['text'] += delta
356
+ item['formatted']['text'] += delta
357
+ return item, {'text': delta}
358
+
359
+ def _process_function_call_arguments_delta(self, event):
360
+ item_id = event['item_id']
361
+ delta = event['delta']
362
+ item = self.item_lookup.get(item_id)
363
+ if not item:
364
+ raise Exception(f'response.function_call_arguments.delta: Item "{item_id}" not found')
365
+ item['arguments'] += delta
366
+ item['formatted']['tool']['arguments'] += delta
367
+ return item, {'arguments': delta}
368
+
369
+
370
+ class RealtimeClient(RealtimeEventHandler):
371
+ def __init__(self, url=None, api_key=None):
372
+ super().__init__()
373
+ self.default_session_config = {
374
+ "modalities": ["text", "audio"],
375
+ "instructions": "System settings:\nTool use: enabled.\n\nInstructions:\n- You are an artificial intelligence agent responsible for helping test realtime voice capabilities\n- Please make sure to respond with a helpful voice via audio\n- Be kind, helpful, and curteous\n- It is okay to ask the user questions\n- Use tools and functions you have available liberally, it is part of the training apparatus\n- Be open to exploration and conversation\n- Remember: this is just for fun and testing!\n\nPersonality:\n- Be upbeat and genuine\n- Try speaking quickly as if excited\n",
376
+ "voice": "shimmer",
377
+ "input_audio_format": "pcm16",
378
+ "output_audio_format": "pcm16",
379
+ "input_audio_transcription": { "model": 'whisper-1' },
380
+ "turn_detection": { "type": 'server_vad' },
381
+ "tools": [],
382
+ "tool_choice": "auto",
383
+ "temperature": 0.8,
384
+ "max_response_output_tokens": 4096,
385
+ }
386
+ self.session_config = {}
387
+ self.transcription_models = [{"model": "whisper-1"}]
388
+ self.default_server_vad_config = {
389
+ "type": "server_vad",
390
+ "threshold": 0.5,
391
+ "prefix_padding_ms": 300,
392
+ "silence_duration_ms": 200,
393
+ }
394
+ self.realtime = RealtimeAPI(url, api_key)
395
+ self.conversation = RealtimeConversation()
396
+ self._reset_config()
397
+ self._add_api_event_handlers()
398
+
399
+ def _reset_config(self):
400
+ self.session_created = False
401
+ self.tools = {}
402
+ self.session_config = self.default_session_config.copy()
403
+ self.input_audio_buffer = bytearray()
404
+ return True
405
+
406
+ def _add_api_event_handlers(self):
407
+ self.realtime.on("client.*", self._log_event)
408
+ self.realtime.on("server.*", self._log_event)
409
+ self.realtime.on("server.session.created", self._on_session_created)
410
+ self.realtime.on("server.response.created", self._process_event)
411
+ self.realtime.on("server.response.output_item.added", self._process_event)
412
+ self.realtime.on("server.response.content_part.added", self._process_event)
413
+ self.realtime.on("server.input_audio_buffer.speech_started", self._on_speech_started)
414
+ self.realtime.on("server.input_audio_buffer.speech_stopped", self._on_speech_stopped)
415
+ self.realtime.on("server.conversation.item.created", self._on_item_created)
416
+ self.realtime.on("server.conversation.item.truncated", self._process_event)
417
+ self.realtime.on("server.conversation.item.deleted", self._process_event)
418
+ self.realtime.on("server.conversation.item.input_audio_transcription.completed", self._process_event)
419
+ self.realtime.on("server.response.audio_transcript.delta", self._process_event)
420
+ self.realtime.on("server.response.audio.delta", self._process_event)
421
+ self.realtime.on("server.response.text.delta", self._process_event)
422
+ self.realtime.on("server.response.function_call_arguments.delta", self._process_event)
423
+ self.realtime.on("server.response.output_item.done", self._on_output_item_done)
424
+
425
+ def _log_event(self, event):
426
+ realtime_event = {
427
+ "time": datetime.utcnow().isoformat(),
428
+ "source": "client" if event["type"].startswith("client.") else "server",
429
+ "event": event,
430
+ }
431
+ self.dispatch("realtime.event", realtime_event)
432
+
433
+ def _on_session_created(self, event):
434
+ print(f"Session created: {event}")
435
+ logger.debug(f"Session created: {event}")
436
+ self.session_created = True
437
+
438
+ def _process_event(self, event, *args):
439
+ item, delta = self.conversation.process_event(event, *args)
440
+ if item:
441
+ self.dispatch("conversation.updated", {"item": item, "delta": delta})
442
+ return item, delta
443
+
444
+ def _on_speech_started(self, event):
445
+ self._process_event(event)
446
+ self.dispatch("conversation.interrupted", event)
447
+
448
+ def _on_speech_stopped(self, event):
449
+ self._process_event(event, self.input_audio_buffer)
450
+
451
+ def _on_item_created(self, event):
452
+ item, delta = self._process_event(event)
453
+ self.dispatch("conversation.item.appended", {"item": item})
454
+ if item and item["status"] == "completed":
455
+ self.dispatch("conversation.item.completed", {"item": item})
456
+
457
+ async def _on_output_item_done(self, event):
458
+ item, delta = self._process_event(event)
459
+ if item and item["status"] == "completed":
460
+ self.dispatch("conversation.item.completed", {"item": item})
461
+ if item and item.get("formatted", {}).get("tool"):
462
+ await self._call_tool(item["formatted"]["tool"])
463
+
464
+ async def _call_tool(self, tool):
465
+ try:
466
+ json_arguments = json.loads(tool["arguments"])
467
+ tool_config = self.tools.get(tool["name"])
468
+ if not tool_config:
469
+ raise Exception(f'Tool "{tool["name"]}" has not been added')
470
+ result = await tool_config["handler"](**json_arguments)
471
+ await self.realtime.send("conversation.item.create", {
472
+ "item": {
473
+ "type": "function_call_output",
474
+ "call_id": tool["call_id"],
475
+ "output": json.dumps(result),
476
+ }
477
+ })
478
+ except Exception as e:
479
+ error_message = json.dumps({"error": str(e)})
480
+ logger.error(f"Tool call error: {error_message}")
481
+ await self.realtime.send("conversation.item.create", {
482
+ "item": {
483
+ "type": "function_call_output",
484
+ "call_id": tool["call_id"],
485
+ "output": error_message,
486
+ }
487
+ })
488
+ await self.create_response()
489
+
490
+ def is_connected(self):
491
+ return self.realtime.is_connected()
492
+
493
+ def reset(self):
494
+ self.disconnect()
495
+ self.realtime.clear_event_handlers()
496
+ self._reset_config()
497
+ self._add_api_event_handlers()
498
+ return True
499
+
500
+ async def connect(self):
501
+ if self.is_connected():
502
+ raise Exception("Already connected, use .disconnect() first")
503
+ await self.realtime.connect()
504
+ await self.update_session()
505
+ return True
506
+
507
+ async def wait_for_session_created(self):
508
+ if not self.is_connected():
509
+ raise Exception("Not connected, use .connect() first")
510
+ while not self.session_created:
511
+ await asyncio.sleep(0.001)
512
+ return True
513
+
514
+ async def disconnect(self):
515
+ self.session_created = False
516
+ self.conversation.clear()
517
+ if self.realtime.is_connected():
518
+ await self.realtime.disconnect()
519
+
520
+ def get_turn_detection_type(self):
521
+ return self.session_config.get("turn_detection", {}).get("type")
522
+
523
+ async def add_tool(self, definition, handler):
524
+ if not definition.get("name"):
525
+ raise Exception("Missing tool name in definition")
526
+ name = definition["name"]
527
+ if name in self.tools:
528
+ raise Exception(f'Tool "{name}" already added. Please use .removeTool("{name}") before trying to add again.')
529
+ if not callable(handler):
530
+ raise Exception(f'Tool "{name}" handler must be a function')
531
+ self.tools[name] = {"definition": definition, "handler": handler}
532
+ await self.update_session()
533
+ return self.tools[name]
534
+
535
+ def remove_tool(self, name):
536
+ if name not in self.tools:
537
+ raise Exception(f'Tool "{name}" does not exist, can not be removed.')
538
+ del self.tools[name]
539
+ return True
540
+
541
+ async def delete_item(self, id):
542
+ await self.realtime.send("conversation.item.delete", {"item_id": id})
543
+ return True
544
+
545
+ async def update_session(self, **kwargs):
546
+ self.session_config.update(kwargs)
547
+ use_tools = [
548
+ {**tool_definition, "type": "function"}
549
+ for tool_definition in self.session_config.get("tools", [])
550
+ ] + [
551
+ {**self.tools[key]["definition"], "type": "function"}
552
+ for key in self.tools
553
+ ]
554
+ session = {**self.session_config, "tools": use_tools}
555
+ logger.debug(f"Updating session: {session}")
556
+ if self.realtime.is_connected():
557
+ await self.realtime.send("session.update", {"session": session})
558
+ return True
559
+
560
+ async def create_conversation_item(self, item):
561
+ await self.realtime.send("conversation.item.create", {
562
+ "item": item
563
+ })
564
+
565
+ async def send_user_message_content(self, content=[]):
566
+ if content:
567
+ for c in content:
568
+ if c["type"] == "input_audio":
569
+ if isinstance(c["audio"], (bytes, bytearray)):
570
+ c["audio"] = array_buffer_to_base64(c["audio"])
571
+ await self.realtime.send("conversation.item.create", {
572
+ "item": {
573
+ "type": "message",
574
+ "role": "user",
575
+ "content": content,
576
+ }
577
+ })
578
+ await self.create_response()
579
+ return True
580
+
581
+ async def append_input_audio(self, array_buffer):
582
+ if len(array_buffer) > 0:
583
+ await self.realtime.send("input_audio_buffer.append", {
584
+ "audio": array_buffer_to_base64(np.array(array_buffer)),
585
+ })
586
+ self.input_audio_buffer.extend(array_buffer)
587
+ return True
588
+
589
+ async def create_response(self):
590
+ if self.get_turn_detection_type() is None and len(self.input_audio_buffer) > 0:
591
+ await self.realtime.send("input_audio_buffer.commit")
592
+ self.conversation.queue_input_audio(self.input_audio_buffer)
593
+ self.input_audio_buffer = bytearray()
594
+ await self.realtime.send("response.create")
595
+ return True
596
+
597
+ async def cancel_response(self, id=None, sample_count=0):
598
+ if not id:
599
+ await self.realtime.send("response.cancel")
600
+ return {"item": None}
601
+ else:
602
+ item = self.conversation.get_item(id)
603
+ if not item:
604
+ raise Exception(f'Could not find item "{id}"')
605
+ if item["type"] != "message":
606
+ raise Exception('Can only cancelResponse messages with type "message"')
607
+ if item["role"] != "assistant":
608
+ raise Exception('Can only cancelResponse messages with role "assistant"')
609
+ await self.realtime.send("response.cancel")
610
+ audio_index = next((i for i, c in enumerate(item["content"]) if c["type"] == "audio"), -1)
611
+ if audio_index == -1:
612
+ raise Exception("Could not find audio on item to cancel")
613
+ await self.realtime.send("conversation.item.truncate", {
614
+ "item_id": id,
615
+ "content_index": audio_index,
616
+ "audio_end_ms": int((sample_count / self.conversation.default_frequency) * 1000),
617
+ })
618
+ return {"item": item}
619
+
620
+ async def wait_for_next_item(self):
621
+ event = await self.wait_for_next("conversation.item.appended")
622
+ return {"item": event["item"]}
623
+
624
+ async def wait_for_next_completed_item(self):
625
+ event = await self.wait_for_next("conversation.item.completed")
626
+ return {"item": event["item"]}
627
+
628
+ async def _send_chainlit_message(self, item):
629
+ import chainlit as cl
630
+
631
+ # Debug logging
632
+ logger.debug(f"Received item structure: {json.dumps({k: type(v).__name__ for k, v in item.items()}, indent=2)}")
633
+
634
+ if "type" in item and item["type"] == "function_call_output":
635
+ # Don't send function call outputs directly to Chainlit
636
+ logger.debug(f"Function call output received: {item.get('output', '')}")
637
+ elif "role" in item:
638
+ if item["role"] == "user":
639
+ content = item.get("formatted", {}).get("text", "") or item.get("formatted", {}).get("transcript", "")
640
+ if content:
641
+ await cl.Message(content=content, author="User").send()
642
+ elif item["role"] == "assistant":
643
+ content = item.get("formatted", {}).get("text", "") or item.get("formatted", {}).get("transcript", "")
644
+ if content:
645
+ await cl.Message(content=content, author="AI").send()
646
+ else:
647
+ logger.warning(f"Unhandled role: {item['role']}")
648
+ else:
649
+ # Handle items without a 'role' or 'type'
650
+ logger.debug(f"Unhandled item type:\n{json.dumps(item, indent=2)}")
651
+
652
+ # Additional debug logging
653
+ logger.debug(f"Processed Chainlit message for item: {item.get('id', 'unknown')}")