dv-pipecat-ai 0.0.85.dev698__py3-none-any.whl → 0.0.85.dev814__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (45) hide show
  1. {dv_pipecat_ai-0.0.85.dev698.dist-info → dv_pipecat_ai-0.0.85.dev814.dist-info}/METADATA +23 -18
  2. {dv_pipecat_ai-0.0.85.dev698.dist-info → dv_pipecat_ai-0.0.85.dev814.dist-info}/RECORD +45 -43
  3. pipecat/adapters/services/aws_nova_sonic_adapter.py +116 -6
  4. pipecat/pipeline/runner.py +6 -2
  5. pipecat/pipeline/task.py +40 -55
  6. pipecat/processors/aggregators/llm_context.py +40 -2
  7. pipecat/processors/frameworks/rtvi.py +1 -0
  8. pipecat/runner/daily.py +59 -20
  9. pipecat/runner/run.py +149 -67
  10. pipecat/runner/types.py +5 -5
  11. pipecat/services/assemblyai/models.py +6 -0
  12. pipecat/services/assemblyai/stt.py +13 -5
  13. pipecat/services/asyncai/tts.py +3 -0
  14. pipecat/services/aws/llm.py +33 -16
  15. pipecat/services/aws/nova_sonic/context.py +69 -0
  16. pipecat/services/aws/nova_sonic/llm.py +199 -89
  17. pipecat/services/aws/stt.py +2 -0
  18. pipecat/services/aws_nova_sonic/context.py +8 -12
  19. pipecat/services/cartesia/stt.py +77 -70
  20. pipecat/services/cartesia/tts.py +3 -1
  21. pipecat/services/deepgram/flux/stt.py +4 -0
  22. pipecat/services/elevenlabs/tts.py +82 -41
  23. pipecat/services/fish/tts.py +3 -0
  24. pipecat/services/google/stt.py +4 -0
  25. pipecat/services/lmnt/tts.py +2 -0
  26. pipecat/services/neuphonic/tts.py +3 -0
  27. pipecat/services/openai/tts.py +37 -6
  28. pipecat/services/piper/tts.py +7 -9
  29. pipecat/services/playht/tts.py +3 -0
  30. pipecat/services/rime/tts.py +9 -8
  31. pipecat/services/riva/stt.py +3 -1
  32. pipecat/services/salesforce/__init__.py +9 -0
  33. pipecat/services/salesforce/llm.py +465 -0
  34. pipecat/services/sarvam/tts.py +87 -10
  35. pipecat/services/speechmatics/stt.py +3 -1
  36. pipecat/services/stt_service.py +23 -10
  37. pipecat/services/tts_service.py +64 -13
  38. pipecat/transports/base_input.py +3 -0
  39. pipecat/transports/base_output.py +71 -77
  40. pipecat/transports/smallwebrtc/connection.py +5 -0
  41. pipecat/transports/smallwebrtc/request_handler.py +42 -0
  42. pipecat/utils/string.py +1 -0
  43. {dv_pipecat_ai-0.0.85.dev698.dist-info → dv_pipecat_ai-0.0.85.dev814.dist-info}/WHEEL +0 -0
  44. {dv_pipecat_ai-0.0.85.dev698.dist-info → dv_pipecat_ai-0.0.85.dev814.dist-info}/licenses/LICENSE +0 -0
  45. {dv_pipecat_ai-0.0.85.dev698.dist-info → dv_pipecat_ai-0.0.85.dev814.dist-info}/top_level.txt +0 -0
@@ -25,7 +25,7 @@ from loguru import logger
25
25
  from pydantic import BaseModel, Field
26
26
 
27
27
  from pipecat.adapters.schemas.tools_schema import ToolsSchema
28
- from pipecat.adapters.services.aws_nova_sonic_adapter import AWSNovaSonicLLMAdapter
28
+ from pipecat.adapters.services.aws_nova_sonic_adapter import AWSNovaSonicLLMAdapter, Role
29
29
  from pipecat.frames.frames import (
30
30
  BotStoppedSpeakingFrame,
31
31
  CancelFrame,
@@ -33,35 +33,30 @@ from pipecat.frames.frames import (
33
33
  Frame,
34
34
  FunctionCallFromLLM,
35
35
  InputAudioRawFrame,
36
- InterimTranscriptionFrame,
36
+ InterruptionFrame,
37
37
  LLMContextFrame,
38
38
  LLMFullResponseEndFrame,
39
39
  LLMFullResponseStartFrame,
40
- LLMTextFrame,
41
40
  StartFrame,
42
41
  TranscriptionFrame,
43
42
  TTSAudioRawFrame,
44
43
  TTSStartedFrame,
45
44
  TTSStoppedFrame,
46
45
  TTSTextFrame,
46
+ UserStartedSpeakingFrame,
47
+ UserStoppedSpeakingFrame,
47
48
  )
49
+ from pipecat.processors.aggregators.llm_context import LLMContext
48
50
  from pipecat.processors.aggregators.llm_response import (
49
51
  LLMAssistantAggregatorParams,
50
52
  LLMUserAggregatorParams,
51
53
  )
54
+ from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
52
55
  from pipecat.processors.aggregators.openai_llm_context import (
53
56
  OpenAILLMContext,
54
57
  OpenAILLMContextFrame,
55
58
  )
56
59
  from pipecat.processors.frame_processor import FrameDirection
57
- from pipecat.services.aws.nova_sonic.context import (
58
- AWSNovaSonicAssistantContextAggregator,
59
- AWSNovaSonicContextAggregatorPair,
60
- AWSNovaSonicLLMContext,
61
- AWSNovaSonicUserContextAggregator,
62
- Role,
63
- )
64
- from pipecat.services.aws.nova_sonic.frames import AWSNovaSonicFunctionCallResultFrame
65
60
  from pipecat.services.llm_service import LLMService
66
61
  from pipecat.utils.time import time_now_iso8601
67
62
 
@@ -217,6 +212,11 @@ class AWSNovaSonicLLMService(LLMService):
217
212
  system_instruction: System-level instruction for the model.
218
213
  tools: Available tools/functions for the model to use.
219
214
  send_transcription_frames: Whether to emit transcription frames.
215
+
216
+ .. deprecated:: 0.0.91
217
+ This parameter is deprecated and will be removed in a future version.
218
+ Transcription frames are always sent.
219
+
220
220
  **kwargs: Additional arguments passed to the parent LLMService.
221
221
  """
222
222
  super().__init__(**kwargs)
@@ -230,8 +230,20 @@ class AWSNovaSonicLLMService(LLMService):
230
230
  self._params = params or Params()
231
231
  self._system_instruction = system_instruction
232
232
  self._tools = tools
233
- self._send_transcription_frames = send_transcription_frames
234
- self._context: Optional[AWSNovaSonicLLMContext] = None
233
+
234
+ if not send_transcription_frames:
235
+ import warnings
236
+
237
+ with warnings.catch_warnings():
238
+ warnings.simplefilter("always")
239
+ warnings.warn(
240
+ "`send_transcription_frames` is deprecated and will be removed in a future version. "
241
+ "Transcription frames are always sent.",
242
+ DeprecationWarning,
243
+ stacklevel=2,
244
+ )
245
+
246
+ self._context: Optional[LLMContext] = None
235
247
  self._stream: Optional[
236
248
  DuplexEventStream[
237
249
  InvokeModelWithBidirectionalStreamInput,
@@ -244,12 +256,17 @@ class AWSNovaSonicLLMService(LLMService):
244
256
  self._input_audio_content_name: Optional[str] = None
245
257
  self._content_being_received: Optional[CurrentContent] = None
246
258
  self._assistant_is_responding = False
259
+ self._may_need_repush_assistant_text = False
247
260
  self._ready_to_send_context = False
248
261
  self._handling_bot_stopped_speaking = False
249
262
  self._triggering_assistant_response = False
263
+ self._waiting_for_trigger_transcription = False
250
264
  self._disconnecting = False
251
265
  self._connected_time: Optional[float] = None
252
266
  self._wants_connection = False
267
+ self._user_text_buffer = ""
268
+ self._assistant_text_buffer = ""
269
+ self._completed_tool_calls = set()
253
270
 
254
271
  file_path = files("pipecat.services.aws.nova_sonic").joinpath("ready.wav")
255
272
  with wave.open(file_path.open("rb"), "rb") as wav_file:
@@ -302,12 +319,12 @@ class AWSNovaSonicLLMService(LLMService):
302
319
  logger.debug("Resetting conversation")
303
320
  await self._handle_bot_stopped_speaking(delay_to_catch_trailing_assistant_text=False)
304
321
 
305
- # Carry over previous context through disconnect
322
+ # Grab context to carry through disconnect/reconnect
306
323
  context = self._context
307
- await self._disconnect()
308
- self._context = context
309
324
 
325
+ await self._disconnect()
310
326
  await self._start_connecting()
327
+ await self._handle_context(context)
311
328
 
312
329
  #
313
330
  # frame processing
@@ -322,28 +339,35 @@ class AWSNovaSonicLLMService(LLMService):
322
339
  """
323
340
  await super().process_frame(frame, direction)
324
341
 
325
- if isinstance(frame, OpenAILLMContextFrame):
326
- await self._handle_context(frame.context)
327
- elif isinstance(frame, LLMContextFrame):
328
- raise NotImplementedError(
329
- "Universal LLMContext is not yet supported for AWS Nova Sonic."
342
+ if isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)):
343
+ context = (
344
+ frame.context
345
+ if isinstance(frame, LLMContextFrame)
346
+ else LLMContext.from_openai_context(frame.context)
330
347
  )
348
+ await self._handle_context(context)
331
349
  elif isinstance(frame, InputAudioRawFrame):
332
350
  await self._handle_input_audio_frame(frame)
333
351
  elif isinstance(frame, BotStoppedSpeakingFrame):
334
352
  await self._handle_bot_stopped_speaking(delay_to_catch_trailing_assistant_text=True)
335
- elif isinstance(frame, AWSNovaSonicFunctionCallResultFrame):
336
- await self._handle_function_call_result(frame)
353
+ elif isinstance(frame, InterruptionFrame):
354
+ await self._handle_interruption_frame()
337
355
 
338
356
  await self.push_frame(frame, direction)
339
357
 
340
- async def _handle_context(self, context: OpenAILLMContext):
358
+ async def _handle_context(self, context: LLMContext):
359
+ if self._disconnecting:
360
+ return
361
+
341
362
  if not self._context:
342
- # We got our initial context - try to finish connecting
343
- self._context = AWSNovaSonicLLMContext.upgrade_to_nova_sonic(
344
- context, self._system_instruction
345
- )
363
+ # We got our initial context
364
+ # Try to finish connecting
365
+ self._context = context
346
366
  await self._finish_connecting_if_context_available()
367
+ else:
368
+ # We got an updated context
369
+ # Send results for any newly-completed function calls
370
+ await self._process_completed_function_calls(send_new_results=True)
347
371
 
348
372
  async def _handle_input_audio_frame(self, frame: InputAudioRawFrame):
349
373
  # Wait until we're done sending the assistant response trigger audio before sending audio
@@ -393,9 +417,9 @@ class AWSNovaSonicLLMService(LLMService):
393
417
  else:
394
418
  await finalize_assistant_response()
395
419
 
396
- async def _handle_function_call_result(self, frame: AWSNovaSonicFunctionCallResultFrame):
397
- result = frame.result_frame
398
- await self._send_tool_result(tool_call_id=result.tool_call_id, result=result.result)
420
+ async def _handle_interruption_frame(self):
421
+ if self._assistant_is_responding:
422
+ self._may_need_repush_assistant_text = True
399
423
 
400
424
  #
401
425
  # LLM communication: lifecycle
@@ -431,6 +455,17 @@ class AWSNovaSonicLLMService(LLMService):
431
455
  logger.error(f"{self} initialization error: {e}")
432
456
  await self._disconnect()
433
457
 
458
+ async def _process_completed_function_calls(self, send_new_results: bool):
459
+ # Check for set of completed function calls in the context
460
+ for message in self._context.get_messages():
461
+ if message.get("role") and message.get("content") != "IN_PROGRESS":
462
+ tool_call_id = message.get("tool_call_id")
463
+ if tool_call_id and tool_call_id not in self._completed_tool_calls:
464
+ # Found a newly-completed function call - send the result to the service
465
+ if send_new_results:
466
+ await self._send_tool_result(tool_call_id, message.get("content"))
467
+ self._completed_tool_calls.add(tool_call_id)
468
+
434
469
  async def _finish_connecting_if_context_available(self):
435
470
  # We can only finish connecting once we've gotten our initial context and we're ready to
436
471
  # send it
@@ -439,30 +474,38 @@ class AWSNovaSonicLLMService(LLMService):
439
474
 
440
475
  logger.info("Finishing connecting (setting up session)...")
441
476
 
477
+ # Initialize our bookkeeping of already-completed tool calls in the
478
+ # context
479
+ await self._process_completed_function_calls(send_new_results=False)
480
+
442
481
  # Read context
443
- history = self._context.get_messages_for_initializing_history()
482
+ adapter: AWSNovaSonicLLMAdapter = self.get_llm_adapter()
483
+ llm_connection_params = adapter.get_llm_invocation_params(self._context)
444
484
 
445
485
  # Send prompt start event, specifying tools.
446
486
  # Tools from context take priority over self._tools.
447
487
  tools = (
448
- self._context.tools
449
- if self._context.tools
450
- else self.get_llm_adapter().from_standard_tools(self._tools)
488
+ llm_connection_params["tools"]
489
+ if llm_connection_params["tools"]
490
+ else adapter.from_standard_tools(self._tools)
451
491
  )
452
492
  logger.debug(f"Using tools: {tools}")
453
493
  await self._send_prompt_start_event(tools)
454
494
 
455
495
  # Send system instruction.
456
496
  # Instruction from context takes priority over self._system_instruction.
457
- # (NOTE: this prioritizing occurred automatically behind the scenes: the context was
458
- # initialized with self._system_instruction and then updated itself from its messages when
459
- # get_messages_for_initializing_history() was called).
460
- logger.debug(f"Using system instruction: {history.system_instruction}")
461
- if history.system_instruction:
462
- await self._send_text_event(text=history.system_instruction, role=Role.SYSTEM)
497
+ system_instruction = (
498
+ llm_connection_params["system_instruction"]
499
+ if llm_connection_params["system_instruction"]
500
+ else self._system_instruction
501
+ )
502
+ logger.debug(f"Using system instruction: {system_instruction}")
503
+ if system_instruction:
504
+ await self._send_text_event(text=system_instruction, role=Role.SYSTEM)
463
505
 
464
506
  # Send conversation history
465
- for message in history.messages:
507
+ for message in llm_connection_params["messages"]:
508
+ # logger.debug(f"Seeding conversation history with message: {message}")
466
509
  await self._send_text_event(text=message.text, role=message.role)
467
510
 
468
511
  # Start audio input
@@ -492,9 +535,12 @@ class AWSNovaSonicLLMService(LLMService):
492
535
  await self._send_session_end_events()
493
536
  self._client = None
494
537
 
538
+ # Clean up context
539
+ self._context = None
540
+
495
541
  # Clean up stream
496
542
  if self._stream:
497
- await self._stream.input_stream.close()
543
+ await self._stream.close()
498
544
  self._stream = None
499
545
 
500
546
  # NOTE: see explanation of HACK, below
@@ -510,15 +556,23 @@ class AWSNovaSonicLLMService(LLMService):
510
556
  self._receive_task = None
511
557
 
512
558
  # Reset remaining connection-specific state
559
+ # Should be all private state except:
560
+ # - _wants_connection
561
+ # - _assistant_response_trigger_audio
513
562
  self._prompt_name = None
514
563
  self._input_audio_content_name = None
515
564
  self._content_being_received = None
516
565
  self._assistant_is_responding = False
566
+ self._may_need_repush_assistant_text = False
517
567
  self._ready_to_send_context = False
518
568
  self._handling_bot_stopped_speaking = False
519
569
  self._triggering_assistant_response = False
570
+ self._waiting_for_trigger_transcription = False
520
571
  self._disconnecting = False
521
572
  self._connected_time = None
573
+ self._user_text_buffer = ""
574
+ self._assistant_text_buffer = ""
575
+ self._completed_tool_calls = set()
522
576
 
523
577
  logger.info("Finished disconnecting")
524
578
  except Exception as e:
@@ -826,6 +880,10 @@ class AWSNovaSonicLLMService(LLMService):
826
880
  # Handle the LLM completion ending
827
881
  await self._handle_completion_end_event(event_json)
828
882
  except Exception as e:
883
+ if self._disconnecting:
884
+ # Errors are kind of expected while disconnecting, so just
885
+ # ignore them and do nothing
886
+ return
829
887
  logger.error(f"{self} error processing responses: {e}")
830
888
  if self._wants_connection:
831
889
  await self.reset_conversation()
@@ -956,7 +1014,7 @@ class AWSNovaSonicLLMService(LLMService):
956
1014
  async def _report_assistant_response_started(self):
957
1015
  logger.debug("Assistant response started")
958
1016
 
959
- # Report that the assistant has started their response.
1017
+ # Report the start of the assistant response.
960
1018
  await self.push_frame(LLMFullResponseStartFrame())
961
1019
 
962
1020
  # Report that equivalent of TTS (this is a speech-to-speech model) started
@@ -968,23 +1026,16 @@ class AWSNovaSonicLLMService(LLMService):
968
1026
 
969
1027
  logger.debug(f"Assistant response text added: {text}")
970
1028
 
971
- # Report some text added to the ongoing assistant response
972
- await self.push_frame(LLMTextFrame(text))
973
-
974
- # Report some text added to the *equivalent* of TTS (this is a speech-to-speech model)
1029
+ # Report the text of the assistant response.
975
1030
  await self.push_frame(TTSTextFrame(text))
976
1031
 
977
- # TODO: this is a (hopefully temporary) HACK. Here we directly manipulate the context rather
978
- # than relying on the frames pushed to the assistant context aggregator. The pattern of
979
- # receiving full-sentence text after the assistant has spoken does not easily fit with the
980
- # Pipecat expectation of chunks of text streaming in while the assistant is speaking.
981
- # Interruption handling was especially challenging. Rather than spend days trying to fit a
982
- # square peg in a round hole, I decided on this hack for the time being. We can most cleanly
983
- # abandon this hack if/when AWS Nova Sonic implements streaming smaller text chunks
984
- # interspersed with audio. Note that when we move away from this hack, we need to make sure
985
- # that on an interruption we avoid sending LLMFullResponseEndFrame, which gets the
986
- # LLMAssistantContextAggregator into a bad state.
987
- self._context.buffer_assistant_text(text)
1032
+ # HACK: here we're also buffering the assistant text ourselves as a
1033
+ # backup rather than relying solely on the assistant context aggregator
1034
+ # to do it, because the text arrives from Nova Sonic only after all the
1035
+ # assistant audio frames have been pushed, meaning that if an
1036
+ # interruption frame were to arrive we would lose all of it (the text
1037
+ # frames sitting in the queue would be wiped).
1038
+ self._assistant_text_buffer += text
988
1039
 
989
1040
  async def _report_assistant_response_ended(self):
990
1041
  if not self._context: # should never happen
@@ -992,14 +1043,34 @@ class AWSNovaSonicLLMService(LLMService):
992
1043
 
993
1044
  logger.debug("Assistant response ended")
994
1045
 
995
- # Report that the assistant has finished their response.
1046
+ # If an interruption frame arrived while the assistant was responding
1047
+ # we may have lost all of the assistant text (see HACK, above), so
1048
+ # re-push it downstream to the aggregator now.
1049
+ if self._may_need_repush_assistant_text:
1050
+ # Just in case, check that assistant text hasn't already made it
1051
+ # into the context (sometimes it does, despite the interruption).
1052
+ messages = self._context.get_messages()
1053
+ last_message = messages[-1] if messages else None
1054
+ if (
1055
+ not last_message
1056
+ or last_message.get("role") != "assistant"
1057
+ or last_message.get("content") != self._assistant_text_buffer
1058
+ ):
1059
+ # We also need to re-push the LLMFullResponseStartFrame since the
1060
+ # TTSTextFrame would be ignored otherwise (the interruption frame
1061
+ # would have cleared the assistant aggregator state).
1062
+ await self.push_frame(LLMFullResponseStartFrame())
1063
+ await self.push_frame(TTSTextFrame(self._assistant_text_buffer))
1064
+ self._may_need_repush_assistant_text = False
1065
+
1066
+ # Report the end of the assistant response.
996
1067
  await self.push_frame(LLMFullResponseEndFrame())
997
1068
 
998
1069
  # Report that equivalent of TTS (this is a speech-to-speech model) stopped.
999
1070
  await self.push_frame(TTSStoppedFrame())
1000
1071
 
1001
- # For an explanation of this hack, see _report_assistant_response_text_added.
1002
- self._context.flush_aggregated_assistant_text()
1072
+ # Clear out the buffered assistant text
1073
+ self._assistant_text_buffer = ""
1003
1074
 
1004
1075
  #
1005
1076
  # user transcription reporting
@@ -1016,33 +1087,67 @@ class AWSNovaSonicLLMService(LLMService):
1016
1087
 
1017
1088
  logger.debug(f"User transcription text added: {text}")
1018
1089
 
1019
- # Manually add new user transcription text to context.
1020
- # We can't rely on the user context aggregator to do this since it's upstream from the LLM.
1021
- self._context.buffer_user_text(text)
1022
-
1023
- # Report that some new user transcription text is available.
1024
- if self._send_transcription_frames:
1025
- await self.push_frame(
1026
- InterimTranscriptionFrame(text=text, user_id="", timestamp=time_now_iso8601())
1027
- )
1090
+ # HACK: here we're buffering the user text ourselves rather than
1091
+ # relying on the upstream user context aggregator to do it, because the
1092
+ # text arrives in fairly large chunks spaced fairly far apart in time.
1093
+ # That means the user text would be split between different messages in
1094
+ # context. Even if we sent placeholder InterimTranscriptionFrames in
1095
+ # between each TranscriptionFrame to tell the aggregator to hold off on
1096
+ # finalizing the user message, the aggregator would likely get the last
1097
+ # chunk too late.
1098
+ self._user_text_buffer += f" {text}" if self._user_text_buffer else text
1028
1099
 
1029
1100
  async def _report_user_transcription_ended(self):
1030
1101
  if not self._context: # should never happen
1031
1102
  return
1032
1103
 
1033
- # Manually add user transcription to context (if any has been buffered).
1034
- # We can't rely on the user context aggregator to do this since it's upstream from the LLM.
1035
- transcription = self._context.flush_aggregated_user_text()
1036
-
1037
- if not transcription:
1038
- return
1039
-
1040
1104
  logger.debug(f"User transcription ended")
1041
1105
 
1042
- if self._send_transcription_frames:
1043
- await self.push_frame(
1044
- TranscriptionFrame(text=transcription, user_id="", timestamp=time_now_iso8601())
1106
+ # Report to the upstream user context aggregator that some new user
1107
+ # transcription text is available.
1108
+
1109
+ # HACK: Check if this transcription was triggered by our own
1110
+ # assistant response trigger. If so, we need to wrap it with
1111
+ # UserStarted/StoppedSpeakingFrames; otherwise the user aggregator
1112
+ # would fire an EmulatedUserStartedSpeakingFrame, which would
1113
+ # trigger an interruption, which would prevent us from writing the
1114
+ # assistant response to context.
1115
+ #
1116
+ # Sending an EmulateUserStartedSpeakingFrame ourselves doesn't
1117
+ # work: it just causes the interruption we're trying to avoid.
1118
+ #
1119
+ # Setting enable_emulated_vad_interruptions also doesn't work: at
1120
+ # the time the user aggregator receives the TranscriptionFrame, it
1121
+ # doesn't yet know the assistant has started responding, so it
1122
+ # doesn't know that emulating the user starting to speak would
1123
+ # cause an interruption.
1124
+ should_wrap_in_user_started_stopped_speaking_frames = (
1125
+ self._waiting_for_trigger_transcription
1126
+ and self._user_text_buffer.strip().lower() == "ready"
1127
+ )
1128
+
1129
+ # Start wrapping the upstream transcription in UserStarted/StoppedSpeakingFrames if needed
1130
+ if should_wrap_in_user_started_stopped_speaking_frames:
1131
+ logger.debug(
1132
+ "Wrapping assistant response trigger transcription with upstream UserStarted/StoppedSpeakingFrames"
1045
1133
  )
1134
+ await self.push_frame(UserStartedSpeakingFrame(), direction=FrameDirection.UPSTREAM)
1135
+
1136
+ # Send the transcription upstream for the user context aggregator
1137
+ frame = TranscriptionFrame(
1138
+ text=self._user_text_buffer, user_id="", timestamp=time_now_iso8601()
1139
+ )
1140
+ await self.push_frame(frame, direction=FrameDirection.UPSTREAM)
1141
+
1142
+ # Finish wrapping the upstream transcription in UserStarted/StoppedSpeakingFrames if needed
1143
+ if should_wrap_in_user_started_stopped_speaking_frames:
1144
+ await self.push_frame(UserStoppedSpeakingFrame(), direction=FrameDirection.UPSTREAM)
1145
+
1146
+ # Clear out the buffered user text
1147
+ self._user_text_buffer = ""
1148
+
1149
+ # We're no longer waiting for a trigger transcription
1150
+ self._waiting_for_trigger_transcription = False
1046
1151
 
1047
1152
  #
1048
1153
  # context
@@ -1054,23 +1159,26 @@ class AWSNovaSonicLLMService(LLMService):
1054
1159
  *,
1055
1160
  user_params: LLMUserAggregatorParams = LLMUserAggregatorParams(),
1056
1161
  assistant_params: LLMAssistantAggregatorParams = LLMAssistantAggregatorParams(),
1057
- ) -> AWSNovaSonicContextAggregatorPair:
1162
+ ) -> LLMContextAggregatorPair:
1058
1163
  """Create context aggregator pair for managing conversation context.
1059
1164
 
1165
+ NOTE: this method exists only for backward compatibility. New code
1166
+ should instead do:
1167
+ context = LLMContext(...)
1168
+ context_aggregator = LLMContextAggregatorPair(context)
1169
+
1060
1170
  Args:
1061
- context: The OpenAI LLM context to upgrade.
1171
+ context: The OpenAI LLM context.
1062
1172
  user_params: Parameters for the user context aggregator.
1063
1173
  assistant_params: Parameters for the assistant context aggregator.
1064
1174
 
1065
1175
  Returns:
1066
1176
  A pair of user and assistant context aggregators.
1067
1177
  """
1068
- context.set_llm_adapter(self.get_llm_adapter())
1069
-
1070
- user = AWSNovaSonicUserContextAggregator(context=context, params=user_params)
1071
- assistant = AWSNovaSonicAssistantContextAggregator(context=context, params=assistant_params)
1072
-
1073
- return AWSNovaSonicContextAggregatorPair(user, assistant)
1178
+ context = LLMContext.from_openai_context(context)
1179
+ return LLMContextAggregatorPair(
1180
+ context, user_params=user_params, assistant_params=assistant_params
1181
+ )
1074
1182
 
1075
1183
  #
1076
1184
  # assistant response trigger (HACK)
@@ -1108,6 +1216,8 @@ class AWSNovaSonicLLMService(LLMService):
1108
1216
  try:
1109
1217
  logger.debug("Sending assistant response trigger...")
1110
1218
 
1219
+ self._waiting_for_trigger_transcription = True
1220
+
1111
1221
  chunk_duration = 0.02 # what we might get from InputAudioRawFrame
1112
1222
  chunk_size = int(
1113
1223
  chunk_duration
@@ -286,6 +286,7 @@ class AWSTranscribeSTTService(STTService):
286
286
 
287
287
  logger.info(f"{self} Successfully connected to AWS Transcribe")
288
288
 
289
+ await self._call_event_handler("on_connected")
289
290
  except Exception as e:
290
291
  logger.error(f"{self} Failed to connect to AWS Transcribe: {e}")
291
292
  await self._disconnect()
@@ -310,6 +311,7 @@ class AWSTranscribeSTTService(STTService):
310
311
  logger.warning(f"{self} Error closing WebSocket connection: {e}")
311
312
  finally:
312
313
  self._ws_client = None
314
+ await self._call_event_handler("on_disconnected")
313
315
 
314
316
  def language_to_service_language(self, language: Language) -> str | None:
315
317
  """Convert internal language enum to AWS Transcribe language code.
@@ -8,18 +8,14 @@
8
8
 
9
9
  This module provides specialized context aggregators and message handling for AWS Nova Sonic,
10
10
  including conversation history management and role-specific message processing.
11
- """
12
11
 
13
- import warnings
12
+ .. deprecated:: 0.0.91
13
+ AWS Nova Sonic no longer uses types from this module under the hood.
14
+ It now uses `LLMContext` and `LLMContextAggregatorPair`.
15
+ Using the new patterns should allow you to not need types from this module.
14
16
 
15
- from pipecat.services.aws.nova_sonic.context import *
17
+ See deprecation warning in pipecat.services.aws.nova_sonic.context for more
18
+ details.
19
+ """
16
20
 
17
- with warnings.catch_warnings():
18
- warnings.simplefilter("always")
19
- warnings.warn(
20
- "Types in pipecat.services.aws_nova_sonic.context are deprecated. "
21
- "Please use the equivalent types from "
22
- "pipecat.services.aws.nova_sonic.context instead.",
23
- DeprecationWarning,
24
- stacklevel=2,
25
- )
21
+ from pipecat.services.aws.nova_sonic.context import *