dv-pipecat-ai 0.0.85.dev850__py3-none-any.whl → 0.0.85.dev851__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dv-pipecat-ai
3
- Version: 0.0.85.dev850
3
+ Version: 0.0.85.dev851
4
4
  Summary: An open source framework for voice (and multimodal) assistants
5
5
  License-Expression: BSD-2-Clause
6
6
  Project-URL: Source, https://github.com/pipecat-ai/pipecat
@@ -1,4 +1,4 @@
1
- dv_pipecat_ai-0.0.85.dev850.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
1
+ dv_pipecat_ai-0.0.85.dev851.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
2
2
  pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
3
3
  pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -217,7 +217,7 @@ pipecat/services/deepgram/flux/stt.py,sha256=yCZodrHAOShgYy_GbdviX8iAuh36dBgDL41
217
217
  pipecat/services/deepseek/__init__.py,sha256=bU5z_oNGzgrF_YpsD9pYIMtEibeZFaUobbRjJ9WcYyE,259
218
218
  pipecat/services/deepseek/llm.py,sha256=5KjpU2blmhUTM3LcRE1ymdsk6OmoFkIzeQgyNOGwQh8,3112
219
219
  pipecat/services/elevenlabs/__init__.py,sha256=cMx5v0HEMh4WetMm5byR9tIjG6_wNVs9UxqWyB3tjlM,313
220
- pipecat/services/elevenlabs/stt.py,sha256=_RhBKpUYEGKMpcO7y4RLxmEOMK11LZFdZqDFIA-DZXk,27303
220
+ pipecat/services/elevenlabs/stt.py,sha256=ZOVDJo3cG-f3ZugBIdxR5jrxJFtbfmDAP8Ps_KLyOgs,30117
221
221
  pipecat/services/elevenlabs/tts.py,sha256=skUndgUatx2F5rjg2tBZLutB8k9B9Cjy-cUeglCDdwc,45314
222
222
  pipecat/services/fal/__init__.py,sha256=z_kfZETvUcKy68Lyvni4B-RtdkOvz3J3eh6sFDVKq6M,278
223
223
  pipecat/services/fal/image.py,sha256=vArKLKrIGoZfw_xeZY_E7zbUzfzVsScj-R7mOmVqjRQ,4585
@@ -416,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
416
416
  pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
417
417
  pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
418
418
  pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
419
- dv_pipecat_ai-0.0.85.dev850.dist-info/METADATA,sha256=rqzfsDkrkClO-BvwwJr5_b2ggADWXFKhgzPgToBwDm0,32955
420
- dv_pipecat_ai-0.0.85.dev850.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
421
- dv_pipecat_ai-0.0.85.dev850.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
422
- dv_pipecat_ai-0.0.85.dev850.dist-info/RECORD,,
419
+ dv_pipecat_ai-0.0.85.dev851.dist-info/METADATA,sha256=lmgj2aZSwfm8h9V1nljEVf_41rQpqAp-13HAtCuXiMw,32955
420
+ dv_pipecat_ai-0.0.85.dev851.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
421
+ dv_pipecat_ai-0.0.85.dev851.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
422
+ dv_pipecat_ai-0.0.85.dev851.dist-info/RECORD,,
@@ -159,10 +159,16 @@ def language_to_elevenlabs_language(language: Language) -> Optional[str]:
159
159
  result = BASE_LANGUAGES.get(language)
160
160
 
161
161
  # If not found in base languages, try to find the base language from a variant
162
+ # For example, Language.EN_US (value "en-US") -> Language("en") -> "eng"
162
163
  if not result:
163
164
  lang_str = str(language.value)
164
- base_code = lang_str.split("-")[0].lower()
165
- result = base_code if base_code in BASE_LANGUAGES.values() else None
165
+ base_code = lang_str.split("-")[0] # Get "en" from "en-US"
166
+ try:
167
+ base_language = Language(base_code)
168
+ result = BASE_LANGUAGES.get(base_language)
169
+ except (ValueError, KeyError):
170
+ # If base language not found in Language enum, return None
171
+ result = None
166
172
 
167
173
  return result
168
174
 
@@ -425,6 +431,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
425
431
  self._pending_final_task: Optional[asyncio.Task] = None
426
432
  self._timestamp_merge_delay_s = 0.25
427
433
  self._ttfb_started = False
434
+ self._waiting_for_timestamps = False
428
435
 
429
436
  @property
430
437
  def commit_strategy(self) -> str:
@@ -474,7 +481,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
474
481
  if isinstance(frame, UserStartedSpeakingFrame):
475
482
  if frame.emulated:
476
483
  return
477
- self._ttfb_started = False
484
+ # Start metrics and set flag to True so we can stop them later
485
+ await self.start_ttfb_metrics()
486
+ self._ttfb_started = True
478
487
  await self.start_processing_metrics()
479
488
  elif isinstance(frame, UserStoppedSpeakingFrame):
480
489
  if frame.emulated:
@@ -488,18 +497,28 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
488
497
  yield None
489
498
  return
490
499
 
491
- await self._ensure_connection()
500
+ if not await self._ensure_connection():
501
+ self.logger.error(f"{self} failed to establish connection, dropping audio")
502
+ yield None
503
+ return
504
+
492
505
  await self._send_audio_chunk(audio)
493
506
  yield None
494
507
 
495
- async def _ensure_connection(self):
508
+ async def _ensure_connection(self) -> bool:
509
+ """Ensure WebSocket connection is established and ready.
510
+
511
+ Returns:
512
+ bool: True if connection is ready, False otherwise.
513
+ """
496
514
  if not self._websocket or self._websocket.state is State.CLOSED:
497
515
  await self._connect()
516
+ return self._websocket is not None and self._websocket.state is State.OPEN
498
517
 
499
518
  async def _connect(self):
500
519
  await self._connect_websocket()
501
- if self._websocket and not self._receive_task:
502
- self._receive_task = asyncio.create_task(self._receive_task_handler(self._report_error))
520
+ if self._websocket and self._websocket.state is State.OPEN and not self._receive_task:
521
+ self._receive_task = self.create_task(self._receive_task_handler(self._report_error))
503
522
 
504
523
  async def _disconnect(self):
505
524
  if self._receive_task:
@@ -512,23 +531,30 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
512
531
  async def _connect_websocket(self):
513
532
  try:
514
533
  if self._websocket and self._websocket.state is State.OPEN:
534
+ self.logger.debug(f"{self} already connected, skipping reconnection")
515
535
  return
516
536
 
517
537
  ws_url = self._build_websocket_url()
518
538
  headers = {"xi-api-key": self._api_key}
519
- self.logger.debug(f"Connecting to ElevenLabs realtime STT at {ws_url}")
539
+ self.logger.info(f"{self} connecting to ElevenLabs realtime STT (WebSocket URL built)")
520
540
  self._websocket = await websocket_connect(ws_url, additional_headers=headers)
541
+ self.logger.info(f"{self} successfully connected to ElevenLabs realtime STT")
521
542
  await self._call_event_handler("on_connected")
522
543
  except Exception as e:
523
544
  self.logger.error(f"{self} unable to connect to ElevenLabs realtime STT: {e}")
524
545
  self._websocket = None
546
+ if self._receive_task:
547
+ await self.cancel_task(self._receive_task)
548
+ self._receive_task = None
549
+ # Push error to pipeline so callers know the connection failed
550
+ await self.push_error(ErrorFrame(f"ElevenLabs connection failed: {e}", fatal=False))
525
551
  await self._call_event_handler("on_connection_error", f"{e}")
526
552
 
527
553
  async def _disconnect_websocket(self):
528
554
  try:
529
555
  await self.stop_all_metrics()
530
556
  if self._websocket and self._websocket.state is State.OPEN:
531
- self.logger.debug("Disconnecting from ElevenLabs realtime STT")
557
+ self.logger.debug(f"{self} disconnecting from ElevenLabs realtime STT")
532
558
  await self._websocket.close()
533
559
  except Exception as e:
534
560
  self.logger.error(f"{self} error closing ElevenLabs realtime websocket: {e}")
@@ -573,6 +599,10 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
573
599
  }:
574
600
  fatal = message_type in {"auth_error", "quota_exceeded", "error"}
575
601
  description = data.get("error", data)
602
+ # Log full error details for debugging
603
+ self.logger.error(
604
+ f"{self} ElevenLabs error - Type: {message_type}, Fatal: {fatal}, Full data: {data}"
605
+ )
576
606
  await self.push_error(
577
607
  ErrorFrame(f"ElevenLabs realtime error: {description}", fatal=fatal)
578
608
  )
@@ -588,7 +618,11 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
588
618
  elevenlabs_language_code_to_language(data.get("language_code"))
589
619
  or self._language_override
590
620
  )
591
- await self.stop_ttfb_metrics()
621
+
622
+ # Only stop TTFB metrics on first partial
623
+ if self._ttfb_started:
624
+ await self.stop_ttfb_metrics()
625
+ self._ttfb_started = False
592
626
 
593
627
  await self.push_frame(
594
628
  InterimTranscriptionFrame(
@@ -604,8 +638,10 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
604
638
  if self._pending_final_message:
605
639
  await self._emit_transcription(self._pending_final_message)
606
640
  self._pending_final_message = None
641
+ self._waiting_for_timestamps = False
607
642
 
608
643
  self._pending_final_message = data
644
+ self._waiting_for_timestamps = True
609
645
  await self._schedule_pending_final_emit()
610
646
 
611
647
  async def _handle_committed_transcript_with_timestamps(self, data: Dict[str, Any]):
@@ -613,12 +649,16 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
613
649
  merged = {**self._pending_final_message, **data}
614
650
  await self._emit_transcription(merged)
615
651
  await self._clear_pending_final()
652
+ elif self._waiting_for_timestamps:
653
+ # Late arrival after timeout - don't emit duplicate
654
+ self.logger.warning(f"{self} timestamps arrived after timeout, skipping duplicate")
655
+ self._waiting_for_timestamps = False
616
656
  else:
617
657
  await self._emit_transcription(data)
618
658
 
619
659
  async def _schedule_pending_final_emit(self):
620
660
  await self._clear_pending_final(timer_only=True)
621
- self._pending_final_task = asyncio.create_task(self._emit_pending_after_delay())
661
+ self._pending_final_task = self.create_task(self._emit_pending_after_delay())
622
662
 
623
663
  async def _emit_pending_after_delay(self):
624
664
  try:
@@ -626,6 +666,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
626
666
  if self._pending_final_message:
627
667
  await self._emit_transcription(self._pending_final_message)
628
668
  self._pending_final_message = None
669
+ self._waiting_for_timestamps = False
629
670
  except asyncio.CancelledError:
630
671
  pass
631
672
  finally:
@@ -638,6 +679,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
638
679
 
639
680
  if not timer_only:
640
681
  self._pending_final_message = None
682
+ self._waiting_for_timestamps = False
641
683
 
642
684
  async def _emit_transcription(self, data: Dict[str, Any]):
643
685
  text = (data.get("text") or data.get("transcript") or "").strip()
@@ -648,7 +690,11 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
648
690
  elevenlabs_language_code_to_language(data.get("language_code"))
649
691
  or self._language_override
650
692
  )
651
- await self.stop_ttfb_metrics()
693
+
694
+ # TTFB should already be stopped by partial, but guard just in case
695
+ if self._ttfb_started:
696
+ await self.stop_ttfb_metrics()
697
+ self._ttfb_started = False
652
698
 
653
699
  frame = TranscriptionFrame(
654
700
  text,
@@ -666,28 +712,39 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
666
712
  if not audio or not self._websocket:
667
713
  return
668
714
 
669
- if not self._ttfb_started:
670
- await self.start_ttfb_metrics()
671
- self._ttfb_started = True
672
-
673
- payload = {
674
- "message_type": "input_audio_chunk",
675
- "audio_base_64": base64.b64encode(audio).decode("ascii"),
676
- "commit": False,
677
- "sample_rate": self.sample_rate,
678
- }
679
- await self._websocket.send(json.dumps(payload))
715
+ try:
716
+ payload = {
717
+ "message_type": "input_audio_chunk",
718
+ "audio_base_64": base64.b64encode(audio).decode("ascii"),
719
+ "commit": False,
720
+ "sample_rate": self.sample_rate,
721
+ }
722
+ await self._websocket.send(json.dumps(payload))
723
+ except Exception as e:
724
+ self.logger.error(f"{self} error sending audio chunk: {e}")
725
+ await self.push_error(ErrorFrame(f"Failed to send audio: {e}"))
726
+ # Trigger reconnection
727
+ await self._disconnect()
728
+ await self._connect()
680
729
 
681
730
  async def _send_commit(self):
682
731
  if not self._websocket:
683
732
  return
684
- payload = {
685
- "message_type": "input_audio_chunk",
686
- "audio_base_64": "",
687
- "commit": True,
688
- "sample_rate": self.sample_rate,
689
- }
690
- await self._websocket.send(json.dumps(payload))
733
+
734
+ try:
735
+ payload = {
736
+ "message_type": "input_audio_chunk",
737
+ "audio_base_64": "",
738
+ "commit": True,
739
+ "sample_rate": self.sample_rate,
740
+ }
741
+ await self._websocket.send(json.dumps(payload))
742
+ except Exception as e:
743
+ self.logger.error(f"{self} error sending commit: {e}")
744
+ await self.push_error(ErrorFrame(f"Failed to send commit: {e}"))
745
+ # Trigger reconnection
746
+ await self._disconnect()
747
+ await self._connect()
691
748
 
692
749
  def _build_websocket_url(self) -> str:
693
750
  if not self.sample_rate: