dv-pipecat-ai 0.0.85.dev820__py3-none-any.whl → 0.0.85.dev822__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dv-pipecat-ai
3
- Version: 0.0.85.dev820
3
+ Version: 0.0.85.dev822
4
4
  Summary: An open source framework for voice (and multimodal) assistants
5
5
  License-Expression: BSD-2-Clause
6
6
  Project-URL: Source, https://github.com/pipecat-ai/pipecat
@@ -1,4 +1,4 @@
1
- dv_pipecat_ai-0.0.85.dev820.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
1
+ dv_pipecat_ai-0.0.85.dev822.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
2
2
  pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
3
3
  pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -79,7 +79,7 @@ pipecat/extensions/voicemail/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
79
79
  pipecat/extensions/voicemail/voicemail_detector.py,sha256=JxmU2752iWP_1_GmzZReNESUTFAeyEa4XBPL20_C208,30004
80
80
  pipecat/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
81
  pipecat/frames/frames.proto,sha256=JXZm3VXLR8zMOUcOuhVoe2mhM3MQIQGMJXLopdJO_5Y,839
82
- pipecat/frames/frames.py,sha256=_GbvjOe1HRDSVCTqF5nvRaA-oCFrtyWfl457Uq0qkGw,49229
82
+ pipecat/frames/frames.py,sha256=CxlrFst5DuD6kDp2CE6kWigVezF94y-Snf6h8w1pwVU,49522
83
83
  pipecat/frames/protobufs/frames_pb2.py,sha256=VHgGV_W7qQ4sfQK6RHb5_DggLm3PiSYMr6aBZ8_p1cQ,2590
84
84
  pipecat/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  pipecat/metrics/metrics.py,sha256=bdZNciEtLTtA-xgoKDz2RJAy6fKrXkTwz3pryVHzc2M,2713
@@ -113,7 +113,7 @@ pipecat/processors/idle_frame_processor.py,sha256=z8AuhGap61lA5K35P6XCaOpn4kkmK_
113
113
  pipecat/processors/logger.py,sha256=8xa4KKekXQIETlQR7zoGnwUpLNo8CeDVm7YjyXePN-w,2385
114
114
  pipecat/processors/producer_processor.py,sha256=iIIOHZd77APvUGP7JqFbznAHUnCULcq_qYiSEjwXHcc,3265
115
115
  pipecat/processors/text_transformer.py,sha256=LnfWJYzntJhZhrQ1lgSSY4D4VbHtrQJgrC227M69ZYU,1718
116
- pipecat/processors/transcript_processor.py,sha256=9F00tY3cxt63ZhYvFGSSAnuUTt3J16mEOUHMzIMndMY,11720
116
+ pipecat/processors/transcript_processor.py,sha256=fr5JtlTOfmKnfmYG8ZwRj4DpZWP-uuGi6aNNKtlLxRg,12491
117
117
  pipecat/processors/two_stage_user_idle_processor.py,sha256=uf2aZh_lfW-eMxmFogP3R4taAJ1yXOSqjKsR7oXtD0Y,2938
118
118
  pipecat/processors/user_idle_processor.py,sha256=Dl-Kcg0B4JZqWXXiyGuvYszGimbu2oKOyOJC92R9_hE,9140
119
119
  pipecat/processors/aggregators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -122,7 +122,7 @@ pipecat/processors/aggregators/gated.py,sha256=tii0sRrBkRW6y9Xq5iTWPnqlOEejU4VqP
122
122
  pipecat/processors/aggregators/gated_llm_context.py,sha256=CPv6sMA8irD1zZ3fU1gSv6D7qcPvCA0MdpFhBtJ_ekI,3007
123
123
  pipecat/processors/aggregators/gated_open_ai_llm_context.py,sha256=DgqmdPj1u3fP_SVmxtfP7NjHqnyhN_RVVTDfmjbkxAs,361
124
124
  pipecat/processors/aggregators/llm_context.py,sha256=wNbZA0Vt0FzNc5cu06xiv1z7DIClIlfqR1ZD8EusbVw,11085
125
- pipecat/processors/aggregators/llm_response.py,sha256=igjIcBwzXzULWQIzM6XxXlXCHbR4Q5tAHP8PBHaPVNQ,47314
125
+ pipecat/processors/aggregators/llm_response.py,sha256=cBNGU8Ld4zT36-QsE1EJemrNA12q7lc9i-vLM9qmLcQ,48075
126
126
  pipecat/processors/aggregators/llm_response_universal.py,sha256=5PqmpATpekD8BVWyBExZgatKHsNbZem8M-A7_VwTbiQ,34334
127
127
  pipecat/processors/aggregators/openai_llm_context.py,sha256=cC8DXdVPERRN04i0i-1Ys6kusvnbMALeH-Z8Pu5K684,12999
128
128
  pipecat/processors/aggregators/sentence.py,sha256=E7e3knfQl6HEGpYMKPklF1aO_gOn-rr7SnynErwfkQk,2235
@@ -353,7 +353,7 @@ pipecat/transcriptions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
353
353
  pipecat/transcriptions/language.py,sha256=-mWI1MiZbasuoqZTOBH69dAmoM7-UJzWq9rSCcrnmh4,8228
354
354
  pipecat/transports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
355
355
  pipecat/transports/base_input.py,sha256=BDex3CfCefwXyjK0M_FBXYEjqXYKUKR5ODtxcRp66uI,20086
356
- pipecat/transports/base_output.py,sha256=1Ho9sxI80B1HaooLG1uqF_8ALLXjtyidsyJtQEd2C24,35214
356
+ pipecat/transports/base_output.py,sha256=mNlIOo7tETlbYPbDyOtA2H-TkBGFKmjuCMDzQUtiwmk,35423
357
357
  pipecat/transports/base_transport.py,sha256=JlNiH0DysTfr6azwHauJqY_Z9HJC702O29Q0qrsLrg4,7530
358
358
  pipecat/transports/daily/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
359
359
  pipecat/transports/daily/transport.py,sha256=VanO33ff9g6px-vwGgT6M7cMVg786pOGfMU7Okm7a78,91917
@@ -415,7 +415,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
415
415
  pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
416
416
  pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
417
417
  pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
418
- dv_pipecat_ai-0.0.85.dev820.dist-info/METADATA,sha256=sMGSGf41v4Y2mATJi9LrojFUXNojnruqIaMHHXstmmM,32924
419
- dv_pipecat_ai-0.0.85.dev820.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
420
- dv_pipecat_ai-0.0.85.dev820.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
421
- dv_pipecat_ai-0.0.85.dev820.dist-info/RECORD,,
418
+ dv_pipecat_ai-0.0.85.dev822.dist-info/METADATA,sha256=32ww2Lem8OVrVN8fSto1BPjfW5dXjPi4fS9Me8Zz-YE,32924
419
+ dv_pipecat_ai-0.0.85.dev822.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
420
+ dv_pipecat_ai-0.0.85.dev822.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
421
+ dv_pipecat_ai-0.0.85.dev822.dist-info/RECORD,,
pipecat/frames/frames.py CHANGED
@@ -457,6 +457,7 @@ class TranscriptionMessage:
457
457
  content: str
458
458
  user_id: Optional[str] = None
459
459
  timestamp: Optional[str] = None
460
+ message_id: Optional[int] = None
460
461
 
461
462
 
462
463
  @dataclass
@@ -510,6 +511,17 @@ class TranscriptionUpdateFrame(DataFrame):
510
511
  return f"{self.name}(pts: {pts}, messages: {len(self.messages)})"
511
512
 
512
513
 
514
+ @dataclass
515
+ class TranscriptDropFrame(DataFrame):
516
+ """Frame indicating previously emitted transcript chunks should be discarded.
517
+
518
+ Parameters:
519
+ transcript_ids: List of frame/message identifiers to drop.
520
+ """
521
+
522
+ transcript_ids: List[int]
523
+
524
+
513
525
  @dataclass
514
526
  class LLMContextFrame(Frame):
515
527
  """Frame containing a universal LLM context.
@@ -48,9 +48,10 @@ from pipecat.frames.frames import (
48
48
  LLMTextFrame,
49
49
  OpenAILLMContextAssistantTimestampFrame,
50
50
  SpeechControlParamsFrame,
51
- StartInterruptionFrame,
52
51
  StartFrame,
52
+ StartInterruptionFrame,
53
53
  TextFrame,
54
+ TranscriptDropFrame,
54
55
  TranscriptionFrame,
55
56
  UserImageRawFrame,
56
57
  UserStartedSpeakingFrame,
@@ -446,6 +447,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
446
447
  self._latest_final_transcript = ""
447
448
  self._last_user_speaking_time = 0
448
449
  self._last_aggregation_push_time = 0
450
+ self._pending_transcription_ids: List[int] = []
449
451
 
450
452
  async def reset(self):
451
453
  """Reset the aggregation state and interruption strategies."""
@@ -453,6 +455,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
453
455
  self._was_bot_speaking = False
454
456
  self._seen_interim_results = False
455
457
  self._waiting_for_aggregation = False
458
+ self._pending_transcription_ids.clear()
456
459
  [await s.reset() for s in self._interruption_strategies]
457
460
 
458
461
  async def handle_aggregation(self, aggregation: str):
@@ -588,6 +591,17 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
588
591
 
589
592
  return any([await should_interrupt(s) for s in self._interruption_strategies])
590
593
 
594
+ async def _discard_pending_transcriptions(self, reason: str):
595
+ """Notify upstream processors that pending transcripts should be dropped."""
596
+ if self._pending_transcription_ids:
597
+ drop_frame = TranscriptDropFrame(transcript_ids=list(self._pending_transcription_ids))
598
+ self.logger.debug(
599
+ f"Dropping {len(self._pending_transcription_ids)} transcript chunk(s) due to {reason}"
600
+ )
601
+ await self.push_frame(drop_frame, FrameDirection.UPSTREAM)
602
+ self._pending_transcription_ids.clear()
603
+ self._aggregation = ""
604
+
591
605
  async def _start(self, frame: StartFrame):
592
606
  self._create_aggregation_task()
593
607
 
@@ -616,8 +630,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
616
630
 
617
631
  async def _handle_user_started_speaking(self, frame: UserStartedSpeakingFrame):
618
632
  if len(self._aggregation) > 0:
619
- self.logger.debug(f"Dropping {self._aggregation}")
620
- self._aggregation = ""
633
+ await self._discard_pending_transcriptions("user_started_speaking")
621
634
  self._latest_final_transcript = ""
622
635
  self._last_user_speaking_time = time.time()
623
636
  self._user_speaking = True
@@ -662,6 +675,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
662
675
  return
663
676
 
664
677
  self._aggregation += f" {text}" if self._aggregation else text
678
+ self._pending_transcription_ids.append(frame.id)
665
679
  # We just got a final result, so let's reset interim results.
666
680
  self._seen_interim_results = False
667
681
 
@@ -20,6 +20,7 @@ from pipecat.frames.frames import (
20
20
  EndFrame,
21
21
  Frame,
22
22
  InterruptionFrame,
23
+ TranscriptDropFrame,
23
24
  TranscriptionFrame,
24
25
  TranscriptionMessage,
25
26
  TranscriptionUpdateFrame,
@@ -44,6 +45,7 @@ class BaseTranscriptProcessor(FrameProcessor):
44
45
  super().__init__(**kwargs)
45
46
  self._processed_messages: List[TranscriptionMessage] = []
46
47
  self._register_event_handler("on_transcript_update")
48
+ self._register_event_handler("on_transcript_drop")
47
49
 
48
50
  async def _emit_update(self, messages: List[TranscriptionMessage]):
49
51
  """Emit transcript updates for new messages.
@@ -57,6 +59,18 @@ class BaseTranscriptProcessor(FrameProcessor):
57
59
  await self._call_event_handler("on_transcript_update", update_frame)
58
60
  await self.push_frame(update_frame)
59
61
 
62
+ async def _handle_transcript_drop(self, frame: TranscriptDropFrame):
63
+ """Handle transcript drop notifications by removing stored messages."""
64
+ if not frame.transcript_ids:
65
+ return
66
+
67
+ drop_ids = set(frame.transcript_ids)
68
+ if drop_ids:
69
+ self._processed_messages = [
70
+ msg for msg in self._processed_messages if msg.message_id not in drop_ids
71
+ ]
72
+ await self._call_event_handler("on_transcript_drop", frame)
73
+
60
74
 
61
75
  class UserTranscriptProcessor(BaseTranscriptProcessor):
62
76
  """Processes user transcription frames into timestamped conversation messages."""
@@ -72,9 +86,15 @@ class UserTranscriptProcessor(BaseTranscriptProcessor):
72
86
 
73
87
  if isinstance(frame, TranscriptionFrame):
74
88
  message = TranscriptionMessage(
75
- role="user", user_id=frame.user_id, content=frame.text, timestamp=frame.timestamp
89
+ role="user",
90
+ user_id=frame.user_id,
91
+ content=frame.text,
92
+ timestamp=frame.timestamp,
93
+ message_id=frame.id,
76
94
  )
77
95
  await self._emit_update([message])
96
+ elif isinstance(frame, TranscriptDropFrame):
97
+ await self._handle_transcript_drop(frame)
78
98
 
79
99
  await self.push_frame(frame, direction)
80
100
 
@@ -84,6 +84,7 @@ class BaseOutputTransport(FrameProcessor):
84
84
  # us to send multiple streams at the same time if the transport allows
85
85
  # it.
86
86
  self._media_senders: Dict[Any, "BaseOutputTransport.MediaSender"] = {}
87
+ self._register_event_handler("on_output_terminated")
87
88
 
88
89
  @property
89
90
  def sample_rate(self) -> int:
@@ -301,10 +302,12 @@ class BaseOutputTransport(FrameProcessor):
301
302
  await self.start(frame)
302
303
  elif isinstance(frame, EndFrame):
303
304
  await self.stop(frame)
305
+ await self._call_event_handler("on_output_terminated", frame)
304
306
  # Keep pushing EndFrame down so all the pipeline stops nicely.
305
307
  await self.push_frame(frame, direction)
306
308
  elif isinstance(frame, CancelFrame):
307
309
  await self.cancel(frame)
310
+ await self._call_event_handler("on_output_terminated", frame)
308
311
  await self.push_frame(frame, direction)
309
312
  elif isinstance(frame, InterruptionFrame):
310
313
  await self.push_frame(frame, direction)