dv-pipecat-ai 0.0.85.dev11__py3-none-any.whl → 0.0.85.dev12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dv-pipecat-ai
3
- Version: 0.0.85.dev11
3
+ Version: 0.0.85.dev12
4
4
  Summary: An open source framework for voice (and multimodal) assistants
5
5
  License-Expression: BSD-2-Clause
6
6
  Project-URL: Source, https://github.com/pipecat-ai/pipecat
@@ -1,4 +1,4 @@
1
- dv_pipecat_ai-0.0.85.dev11.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
1
+ dv_pipecat_ai-0.0.85.dev12.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
2
2
  pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
3
3
  pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -77,7 +77,7 @@ pipecat/extensions/voicemail/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
77
77
  pipecat/extensions/voicemail/voicemail_detector.py,sha256=g3L1m3cPJzsadeB5a8WRC9klH0D8m7xfPgB2YEaL6Do,29983
78
78
  pipecat/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
79
79
  pipecat/frames/frames.proto,sha256=JXZm3VXLR8zMOUcOuhVoe2mhM3MQIQGMJXLopdJO_5Y,839
80
- pipecat/frames/frames.py,sha256=oqoo7p-uJOqak50mxhCGq7S0TusM0I4qp3QAftKHQnw,45428
80
+ pipecat/frames/frames.py,sha256=2aXsBpZB6dU7I1PZRFh8RqALDktxxoRgZPa1pf_cdCM,45804
81
81
  pipecat/frames/protobufs/frames_pb2.py,sha256=VHgGV_W7qQ4sfQK6RHb5_DggLm3PiSYMr6aBZ8_p1cQ,2590
82
82
  pipecat/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
83
  pipecat/metrics/metrics.py,sha256=bdZNciEtLTtA-xgoKDz2RJAy6fKrXkTwz3pryVHzc2M,2713
@@ -111,7 +111,7 @@ pipecat/processors/idle_frame_processor.py,sha256=z8AuhGap61lA5K35P6XCaOpn4kkmK_
111
111
  pipecat/processors/logger.py,sha256=VGNwxQSc_F0rS3KBmfqas7f5aFyRQKfeljozOxfGXk4,2393
112
112
  pipecat/processors/producer_processor.py,sha256=iIIOHZd77APvUGP7JqFbznAHUnCULcq_qYiSEjwXHcc,3265
113
113
  pipecat/processors/text_transformer.py,sha256=LnfWJYzntJhZhrQ1lgSSY4D4VbHtrQJgrC227M69ZYU,1718
114
- pipecat/processors/transcript_processor.py,sha256=CG9yej6WOiy_HhagNXjxkISHkHii0JDfK_V6opseC2E,11740
114
+ pipecat/processors/transcript_processor.py,sha256=SDbqFLzasptZWqeiA6BESJ4hky-Uh-ZUNYBZR0q0Dnw,12508
115
115
  pipecat/processors/two_stage_user_idle_processor.py,sha256=uf2aZh_lfW-eMxmFogP3R4taAJ1yXOSqjKsR7oXtD0Y,2938
116
116
  pipecat/processors/user_idle_processor.py,sha256=PQItBx5bL1y_lFTtHf0zgLubjCXv60jU1jrVtm-K4bg,9268
117
117
  pipecat/processors/aggregators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -119,7 +119,7 @@ pipecat/processors/aggregators/dtmf_aggregator.py,sha256=nngjLiaOtcZtuCNpYPyfUVL
119
119
  pipecat/processors/aggregators/gated.py,sha256=tii0sRrBkRW6y9Xq5iTWPnqlOEejU4VqPIPtdOa61pc,3073
120
120
  pipecat/processors/aggregators/gated_openai_llm_context.py,sha256=cr6MT8J6SpPzZbppKPOKe3_pt_5qXC9g6a4wvZDyrec,3005
121
121
  pipecat/processors/aggregators/llm_context.py,sha256=eDf1cQElcISLx3onaA9LCWuepzb2G_JGszLzpNXggXo,9723
122
- pipecat/processors/aggregators/llm_response.py,sha256=0StzYtq7EzlAFSWp10I0yY0pV1jysw1ySEWv5R50h_s,47360
122
+ pipecat/processors/aggregators/llm_response.py,sha256=W0bqc5IZTE1cIB8egBefUOaPZiLklzge5npFWvAbCcw,48248
123
123
  pipecat/processors/aggregators/llm_response_universal.py,sha256=fBnB3rZVdxj4iEKIWcnR7yTpqyKupbcg7IUv6XVxrDQ,34287
124
124
  pipecat/processors/aggregators/openai_llm_context.py,sha256=cC8DXdVPERRN04i0i-1Ys6kusvnbMALeH-Z8Pu5K684,12999
125
125
  pipecat/processors/aggregators/sentence.py,sha256=E7e3knfQl6HEGpYMKPklF1aO_gOn-rr7SnynErwfkQk,2235
@@ -384,7 +384,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=HwDCqLGijhYD3F8nxDuQmEw-YkRw0
384
384
  pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
385
385
  pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
386
386
  pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
387
- dv_pipecat_ai-0.0.85.dev11.dist-info/METADATA,sha256=_scIy5gP8k7GUtLAA9NzNVT_T1y__8ROU0gPj1G6FCw,32858
388
- dv_pipecat_ai-0.0.85.dev11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
389
- dv_pipecat_ai-0.0.85.dev11.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
390
- dv_pipecat_ai-0.0.85.dev11.dist-info/RECORD,,
387
+ dv_pipecat_ai-0.0.85.dev12.dist-info/METADATA,sha256=pj9DcBZS3A1SFUXm5aZDJS5K9e4YldFoPBcrrBiFxPI,32858
388
+ dv_pipecat_ai-0.0.85.dev12.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
389
+ dv_pipecat_ai-0.0.85.dev12.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
390
+ dv_pipecat_ai-0.0.85.dev12.dist-info/RECORD,,
pipecat/frames/frames.py CHANGED
@@ -451,12 +451,14 @@ class TranscriptionMessage:
451
451
  content: The message content/text.
452
452
  user_id: Optional identifier for the user.
453
453
  timestamp: Optional timestamp when the message was created.
454
+ message_id: Optional unique identifier for tracking and dropping messages.
454
455
  """
455
456
 
456
457
  role: Literal["user", "assistant"]
457
458
  content: str
458
459
  user_id: Optional[str] = None
459
460
  timestamp: Optional[str] = None
461
+ message_id: Optional[int] = None
460
462
 
461
463
 
462
464
  @dataclass
@@ -510,6 +512,17 @@ class TranscriptionUpdateFrame(DataFrame):
510
512
  return f"{self.name}(pts: {pts}, messages: {len(self.messages)})"
511
513
 
512
514
 
515
+ @dataclass
516
+ class TranscriptDropFrame(DataFrame):
517
+ """Frame indicating previously emitted transcript chunks should be discarded.
518
+
519
+ Parameters:
520
+ transcript_ids: List of frame/message identifiers to drop.
521
+ """
522
+
523
+ transcript_ids: List[int]
524
+
525
+
513
526
  @dataclass
514
527
  class LLMContextFrame(Frame):
515
528
  """Frame containing a universal LLM context.
@@ -51,6 +51,7 @@ from pipecat.frames.frames import (
51
51
  StartFrame,
52
52
  StartInterruptionFrame,
53
53
  TextFrame,
54
+ TranscriptDropFrame,
54
55
  TranscriptionFrame,
55
56
  UserImageRawFrame,
56
57
  UserStartedSpeakingFrame,
@@ -446,6 +447,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
446
447
  self._latest_final_transcript = ""
447
448
  self._last_user_speaking_time = 0
448
449
  self._last_aggregation_push_time = 0
450
+ self._pending_transcription_ids: List[int] = []
449
451
 
450
452
  async def reset(self):
451
453
  """Reset the aggregation state and interruption strategies."""
@@ -453,6 +455,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
453
455
  self._was_bot_speaking = False
454
456
  self._seen_interim_results = False
455
457
  self._waiting_for_aggregation = False
458
+ self._pending_transcription_ids.clear()
456
459
  [await s.reset() for s in self._interruption_strategies]
457
460
 
458
461
  async def handle_aggregation(self, aggregation: str):
@@ -548,7 +551,8 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
548
551
  await self._process_aggregation()
549
552
  else:
550
553
  self.logger.debug("Interruption conditions not met - not pushing aggregation")
551
- # Don't process aggregation, just reset it
554
+ # Don't process aggregation, discard pending transcriptions and reset
555
+ await self._discard_pending_transcriptions("interruption_conditions_not_met")
552
556
  await self.reset()
553
557
  else:
554
558
  if trigger_interruption:
@@ -614,10 +618,18 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
614
618
  for s in self.interruption_strategies:
615
619
  await s.append_audio(frame.audio, frame.sample_rate)
616
620
 
621
+ async def _discard_pending_transcriptions(self, reason: str):
622
+ """Notify upstream processors that pending transcripts should be dropped."""
623
+ if self._pending_transcription_ids:
624
+ drop_frame = TranscriptDropFrame(transcript_ids=list(self._pending_transcription_ids))
625
+ await self.push_frame(drop_frame, FrameDirection.UPSTREAM)
626
+ self._pending_transcription_ids.clear()
627
+
617
628
  async def _handle_user_started_speaking(self, frame: UserStartedSpeakingFrame):
618
629
  if len(self._aggregation) > 0:
619
630
  self.logger.debug(f"Dropping {self._aggregation}")
620
631
  self._aggregation = ""
632
+ await self._discard_pending_transcriptions("user_started_speaking")
621
633
  self._latest_final_transcript = ""
622
634
  self._last_user_speaking_time = time.time()
623
635
  self._user_speaking = True
@@ -662,6 +674,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
662
674
  return
663
675
 
664
676
  self._aggregation += f" {text}" if self._aggregation else text
677
+ self._pending_transcription_ids.append(frame.id)
665
678
  # We just got a final result, so let's reset interim results.
666
679
  self._seen_interim_results = False
667
680
 
@@ -791,6 +804,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
791
804
  if self._bot_speaking and not self._params.enable_emulated_vad_interruptions:
792
805
  # If emulated VAD interruptions are disabled and bot is speaking, ignore
793
806
  logger.debug("Ignoring user speaking emulation, bot is speaking.")
807
+ await self._discard_pending_transcriptions("emulated_vad_ignored")
794
808
  await self.reset()
795
809
  else:
796
810
  # Either bot is not speaking, or emulated VAD interruptions are enabled
@@ -20,6 +20,7 @@ from pipecat.frames.frames import (
20
20
  EndFrame,
21
21
  Frame,
22
22
  StartInterruptionFrame,
23
+ TranscriptDropFrame,
23
24
  TranscriptionFrame,
24
25
  TranscriptionMessage,
25
26
  TranscriptionUpdateFrame,
@@ -44,6 +45,7 @@ class BaseTranscriptProcessor(FrameProcessor):
44
45
  super().__init__(**kwargs)
45
46
  self._processed_messages: List[TranscriptionMessage] = []
46
47
  self._register_event_handler("on_transcript_update")
48
+ self._register_event_handler("on_transcript_drop")
47
49
 
48
50
  async def _emit_update(self, messages: List[TranscriptionMessage]):
49
51
  """Emit transcript updates for new messages.
@@ -57,6 +59,19 @@ class BaseTranscriptProcessor(FrameProcessor):
57
59
  await self._call_event_handler("on_transcript_update", update_frame)
58
60
  await self.push_frame(update_frame)
59
61
 
62
+ async def _handle_transcript_drop(self, frame: TranscriptDropFrame):
63
+ """Handle transcript drop notifications by removing stored messages."""
64
+ if not frame.transcript_ids:
65
+ return
66
+
67
+ await self._call_event_handler("on_transcript_drop", frame)
68
+
69
+ drop_ids = set(frame.transcript_ids)
70
+ if drop_ids:
71
+ self._processed_messages = [
72
+ msg for msg in self._processed_messages if msg.message_id not in drop_ids
73
+ ]
74
+
60
75
 
61
76
  class UserTranscriptProcessor(BaseTranscriptProcessor):
62
77
  """Processes user transcription frames into timestamped conversation messages."""
@@ -72,9 +87,15 @@ class UserTranscriptProcessor(BaseTranscriptProcessor):
72
87
 
73
88
  if isinstance(frame, TranscriptionFrame):
74
89
  message = TranscriptionMessage(
75
- role="user", user_id=frame.user_id, content=frame.text, timestamp=frame.timestamp
90
+ role="user",
91
+ user_id=frame.user_id,
92
+ content=frame.text,
93
+ timestamp=frame.timestamp,
94
+ message_id=frame.id,
76
95
  )
77
96
  await self._emit_update([message])
97
+ elif isinstance(frame, TranscriptDropFrame):
98
+ await self._handle_transcript_drop(frame)
78
99
 
79
100
  await self.push_frame(frame, direction)
80
101