PyPI - livekit-plugins-aws - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

livekit-plugins-aws 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-aws might be problematic. Click here for more details.

Files changed (7) hide show

livekit/plugins/aws/models.py CHANGED Viewed

@@ -45,4 +45,4 @@ TTS_LANGUAGE = Literal[
     "de-CH",
 ]
-TTS_OUTPUT_FORMAT = Literal["pcm", "mp3"]
+TTS_OUTPUT_FORMAT = Literal["mp3"]

livekit/plugins/aws/tts.py CHANGED Viewed

@@ -18,7 +18,6 @@ from typing import Any, Callable, Optional
 import aiohttp
 from aiobotocore.session import AioSession, get_session
-from livekit import rtc
 from livekit.agents import (
     APIConnectionError,
     APIConnectOptions,
@@ -29,10 +28,9 @@ from livekit.agents import (
 )
 from ._utils import _get_aws_credentials
-from .models import TTS_LANGUAGE, TTS_OUTPUT_FORMAT, TTS_SPEECH_ENGINE
+from .models import TTS_LANGUAGE, TTS_SPEECH_ENGINE
 TTS_NUM_CHANNELS: int = 1
-DEFAULT_OUTPUT_FORMAT: TTS_OUTPUT_FORMAT = "pcm"
 DEFAULT_SPEECH_ENGINE: TTS_SPEECH_ENGINE = "generative"
 DEFAULT_SPEECH_REGION = "us-east-1"
 DEFAULT_VOICE = "Ruth"
@@ -43,7 +41,6 @@ DEFAULT_SAMPLE_RATE = 16000
 class _TTSOptions:
     # https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html
     voice: str | None
-    output_format: TTS_OUTPUT_FORMAT
     speech_engine: TTS_SPEECH_ENGINE
     speech_region: str
     sample_rate: int
@@ -56,7 +53,6 @@ class TTS(tts.TTS):
         *,
         voice: str | None = DEFAULT_VOICE,
         language: TTS_LANGUAGE | str | None = None,
-        output_format: TTS_OUTPUT_FORMAT = DEFAULT_OUTPUT_FORMAT,
         speech_engine: TTS_SPEECH_ENGINE = DEFAULT_SPEECH_ENGINE,
         sample_rate: int = DEFAULT_SAMPLE_RATE,
         speech_region: str = DEFAULT_SPEECH_REGION,
@@ -75,7 +71,6 @@ class TTS(tts.TTS):
         Args:
             Voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth".
             language (TTS_LANGUAGE, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
-            output_format(TTS_OUTPUT_FORMAT, optional): The format in which the returned output will be encoded. Defaults to "pcm".
             sample_rate(int, optional): The audio frequency specified in Hz. Defaults to 16000.
             speech_engine(TTS_SPEECH_ENGINE, optional): The engine to use for the synthesis. Defaults to "generative".
             speech_region(str, optional): The region to use for the synthesis. Defaults to "us-east-1".
@@ -96,7 +91,6 @@ class TTS(tts.TTS):
         self._opts = _TTSOptions(
             voice=voice,
-            output_format=output_format,
             speech_engine=speech_engine,
             speech_region=speech_region,
             language=language,
@@ -149,7 +143,7 @@ class ChunkedStream(tts.ChunkedStream):
             async with self._get_client() as client:
                 params = {
                     "Text": self._input_text,
-                    "OutputFormat": self._opts.output_format,
+                    "OutputFormat": "mp3",
                     "Engine": self._opts.speech_engine,
                     "VoiceId": self._opts.voice,
                     "TextType": "text",
@@ -158,32 +152,36 @@ class ChunkedStream(tts.ChunkedStream):
                 }
                 response = await client.synthesize_speech(**_strip_nones(params))
                 if "AudioStream" in response:
-                    decoder = utils.codecs.Mp3StreamDecoder()
-                    async with response["AudioStream"] as resp:
-                        async for data, _ in resp.content.iter_chunks():
-                            if self._opts.output_format == "mp3":
-                                frames = decoder.decode_chunk(data)
-                                for frame in frames:
-                                    self._event_ch.send_nowait(
-                                        tts.SynthesizedAudio(
-                                            request_id=request_id,
-                                            segment_id=self._segment_id,
-                                            frame=frame,
-                                        )
-                                    )
-                            else:
-                                self._event_ch.send_nowait(
-                                    tts.SynthesizedAudio(
-                                        request_id=request_id,
-                                        segment_id=self._segment_id,
-                                        frame=rtc.AudioFrame(
-                                            data=data,
-                                            sample_rate=self._opts.sample_rate,
-                                            num_channels=1,
-                                            samples_per_channel=len(data) // 2,
-                                        ),
-                                    )
-                                )
+                    decoder = utils.codecs.AudioStreamDecoder(
+                        sample_rate=self._opts.sample_rate,
+                        num_channels=1,
+                    )
+                    # Create a task to push data to the decoder
+                    async def push_data():
+                        try:
+                            async with response["AudioStream"] as resp:
+                                async for data, _ in resp.content.iter_chunks():
+                                    decoder.push(data)
+                        finally:
+                            decoder.end_input()
+                    # Start pushing data to the decoder
+                    push_task = asyncio.create_task(push_data())
+                    try:
+                        # Create emitter and process decoded frames
+                        emitter = tts.SynthesizedAudioEmitter(
+                            event_ch=self._event_ch,
+                            request_id=request_id,
+                            segment_id=self._segment_id,
+                        )
+                        async for frame in decoder:
+                            emitter.push(frame)
+                        emitter.flush()
+                        await push_task
+                    finally:
+                        await utils.aio.gracefully_cancel(push_task)
         except asyncio.TimeoutError as e:
             raise APITimeoutError() from e

livekit/plugins/aws/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.1.0"
+__version__ = "0.1.1"

{livekit_plugins_aws-0.1.0.dist-info → livekit_plugins_aws-0.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: livekit-plugins-aws
-Version: 0.1.0
+Version: 0.1.1
 Summary: LiveKit Agents Plugin for services from AWS
 Home-page: https://github.com/livekit/agents
 License: Apache-2.0
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3 :: Only
 Requires-Python: >=3.9.0
 Description-Content-Type: text/markdown
-Requires-Dist: livekit-agents>=0.12.0
+Requires-Dist: livekit-agents[codecs]<1.0.0,>=0.12.16
 Requires-Dist: aiobotocore==2.19.0
 Requires-Dist: boto3==1.36.3
 Requires-Dist: amazon-transcribe>=0.6.2

{livekit_plugins_aws-0.1.0.dist-info → livekit_plugins_aws-0.1.1.dist-info}/RECORD RENAMED Viewed

@@ -2,12 +2,12 @@ livekit/plugins/aws/__init__.py,sha256=Ea-hK7QdutnwdZvvs9K2fiR8RWJqz2JcONxXnV1kX
 livekit/plugins/aws/_utils.py,sha256=iuDuQpPta4wLtgW1Wc2rHspZWoa7KZI76tujQIPY898,7411
 livekit/plugins/aws/llm.py,sha256=yUAiBCtb2jRB1_S9BNrILTMmDffvKOpDod802kYnPVM,13527
 livekit/plugins/aws/log.py,sha256=jFief0Xhv0n_F6sp6UFu9VKxs2bXNVGAfYGmEYfR_2Q,66
-livekit/plugins/aws/models.py,sha256=wb7AfN-z7qgtKMZnUbQsELi6wN8ha5exI3DH8z6Gz3M,711
+livekit/plugins/aws/models.py,sha256=Nf8RFmDulW7h03dG2lERTog3mgDK0TbLvW0eGOncuEE,704
 livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 livekit/plugins/aws/stt.py,sha256=eH7gKtdCjwki20Th6PrCsjjtH-zjXa8ZWu-cu_KaT80,7935
-livekit/plugins/aws/tts.py,sha256=miUYrhstJ7tcLkvJ-8Cpv1UCQxRSdOqaSC2tvHBh9WI,7800
-livekit/plugins/aws/version.py,sha256=vQH9cItKAVYAmrLbOntkbLqmxrUZrPiKb1TjkZ8jRKQ,600
-livekit_plugins_aws-0.1.0.dist-info/METADATA,sha256=FUzLRO0YcUvcIidEEq_EK7Lbp6yPYKjzT_BkclYNGhM,1686
-livekit_plugins_aws-0.1.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-livekit_plugins_aws-0.1.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
-livekit_plugins_aws-0.1.0.dist-info/RECORD,,
+livekit/plugins/aws/tts.py,sha256=m2Z6VXyWsJebqzGTDqE39KvgkBgdQkZ731fuIjbszAY,7243
+livekit/plugins/aws/version.py,sha256=3-nEcobvIJfZdV4yNIRuYpAGQ3svREnYIv2ivxoIZcQ,600
+livekit_plugins_aws-0.1.1.dist-info/METADATA,sha256=9rnNMyDhecj1fQIbGcxvGo_I0cg7d_lI2xEf-tBMQfc,1702
+livekit_plugins_aws-0.1.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
+livekit_plugins_aws-0.1.1.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
+livekit_plugins_aws-0.1.1.dist-info/RECORD,,

{livekit_plugins_aws-0.1.0.dist-info → livekit_plugins_aws-0.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{livekit_plugins_aws-0.1.0.dist-info → livekit_plugins_aws-0.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

livekit-plugins-aws 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

Potentially problematic release.

livekit-plugins-aws 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl