PyPI - livekit-plugins-aws - Versions diffs - 1.2.5__tar.gz → 1.2.7__tar.gz - Mend

livekit-plugins-aws 1.2.5tar.gz → 1.2.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-aws might be problematic. Click here for more details.

Files changed (18) hide show

{livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-aws
-Version: 1.2.5
+Version: 1.2.7
 Summary: LiveKit Agents Plugin for services from AWS
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -19,8 +19,8 @@ Classifier: Topic :: Multimedia :: Video
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
 Requires-Dist: aioboto3>=14.1.0
-Requires-Dist: amazon-transcribe>=0.6.2
-Requires-Dist: livekit-agents>=1.2.5
+Requires-Dist: amazon-transcribe>=0.6.4
+Requires-Dist: livekit-agents>=1.2.7
 Provides-Extra: realtime
 Requires-Dist: aws-sdk-bedrock-runtime==0.0.2; (python_version >= '3.12') and extra == 'realtime'
 Requires-Dist: boto3>1.35.10; extra == 'realtime'

{livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/models.py RENAMED Viewed

@@ -46,3 +46,4 @@ TTSLanguages = Literal[
 ]
 TTSEncoding = Literal["mp3"]
+TTSTextType = Literal["text", "ssml"]

{livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/stt.py RENAMED Viewed

@@ -192,7 +192,7 @@ class SpeechStream(stt.SpeechStream):
                     self._event_ch.send_nowait(
                         stt.SpeechEvent(
                             type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
-                            alternatives=[_streaming_recognize_response_to_speech_data(resp)],
+                            alternatives=[self._streaming_recognize_response_to_speech_data(resp)],
                         )
                     )
@@ -200,20 +200,22 @@ class SpeechStream(stt.SpeechStream):
                     self._event_ch.send_nowait(
                         stt.SpeechEvent(
                             type=stt.SpeechEventType.FINAL_TRANSCRIPT,
-                            alternatives=[_streaming_recognize_response_to_speech_data(resp)],
+                            alternatives=[self._streaming_recognize_response_to_speech_data(resp)],
                         )
                     )
             if not resp.is_partial:
                 self._event_ch.send_nowait(stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH))
-def _streaming_recognize_response_to_speech_data(resp: Result) -> stt.SpeechData:
-    data = stt.SpeechData(
-        language="en-US",
-        start_time=resp.start_time if resp.start_time else 0.0,
-        end_time=resp.end_time if resp.end_time else 0.0,
-        text=resp.alternatives[0].transcript if resp.alternatives else "",
-    )
-    return data
+    def _streaming_recognize_response_to_speech_data(self, resp: Result) -> stt.SpeechData:
+        confidence = 0.0
+        if resp.alternatives and (items := resp.alternatives[0].items):
+            confidence = items[0].confidence or 0.0
+        return stt.SpeechData(
+            language=resp.language_code or self._opts.language,
+            start_time=resp.start_time if resp.start_time is not None else 0.0,
+            end_time=resp.end_time if resp.end_time is not None else 0.0,
+            text=resp.alternatives[0].transcript if resp.alternatives else "",
+            confidence=confidence,
+        )

{livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/tts.py RENAMED Viewed

@@ -32,11 +32,12 @@ from livekit.agents.types import (
 )
 from livekit.agents.utils import is_given
-from .models import TTSLanguages, TTSSpeechEngine
+from .models import TTSLanguages, TTSSpeechEngine, TTSTextType
 from .utils import _strip_nones
 DEFAULT_SPEECH_ENGINE: TTSSpeechEngine = "generative"
 DEFAULT_VOICE = "Ruth"
+DEFAULT_TEXT_TYPE: TTSTextType = "text"
 @dataclass
@@ -47,6 +48,7 @@ class _TTSOptions:
     region: str | None
     sample_rate: int
     language: TTSLanguages | str | None
+    text_type: TTSTextType
 class TTS(tts.TTS):
@@ -56,6 +58,7 @@ class TTS(tts.TTS):
         voice: str = "Ruth",
         language: NotGivenOr[TTSLanguages | str] = NOT_GIVEN,
         speech_engine: TTSSpeechEngine = "generative",
+        text_type: TTSTextType = "text",
         sample_rate: int = 16000,
         region: str | None = None,
         api_key: str | None = None,
@@ -71,10 +74,11 @@ class TTS(tts.TTS):
         See https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html for more details on the the AWS Polly TTS.
         Args:
-            Voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth".
-            language (TTS_LANGUAGE, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
+            voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth".
+            language (TTSLanguages, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
+            speech_engine(TTSSpeechEngine, optional): The engine to use for the synthesis. Defaults to "generative".
+            text_type(TTSTextType, optional): Type of text to synthesize. Use "ssml" for SSML-enhanced text. Defaults to "text".
             sample_rate(int, optional): The audio frequency specified in Hz. Defaults to 16000.
-            speech_engine(TTS_SPEECH_ENGINE, optional): The engine to use for the synthesis. Defaults to "generative".
             region(str, optional): The region to use for the synthesis. Defaults to "us-east-1".
             api_key(str, optional): AWS access key id.
             api_secret(str, optional): AWS secret access key.
@@ -96,6 +100,7 @@ class TTS(tts.TTS):
         self._opts = _TTSOptions(
             voice=voice,
             speech_engine=speech_engine,
+            text_type=text_type,
             region=region or None,
             language=language or None,
             sample_rate=sample_rate,
@@ -130,7 +135,7 @@ class ChunkedStream(tts.ChunkedStream):
                             "OutputFormat": "mp3",
                             "Engine": self._opts.speech_engine,
                             "VoiceId": self._opts.voice,
-                            "TextType": "text",
+                            "TextType": self._opts.text_type,
                             "SampleRate": str(self._opts.sample_rate),
                             "LanguageCode": self._opts.language,
                         }

{livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.5"
+__version__ = "1.2.7"

{livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/pyproject.toml RENAMED Viewed

@@ -23,9 +23,9 @@ classifiers = [
     "Programming Language :: Python :: 3 :: Only",
 ]
 dependencies = [
-    "livekit-agents>=1.2.5",
+    "livekit-agents>=1.2.7",
     "aioboto3>=14.1.0",
-    "amazon-transcribe>=0.6.2",
+    "amazon-transcribe>=0.6.4",
 ]
 [project.optional-dependencies]