PyPI - livekit-plugins-aws - Versions diffs - 1.2.4__py3-none-any.whl → 1.3.2__py3-none-any.whl - Mend

livekit-plugins-aws 1.2.4py3-none-any.whl → 1.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-aws might be problematic. Click here for more details.

Files changed (9) hide show

livekit/plugins/aws/llm.py CHANGED Viewed

@@ -51,6 +51,8 @@ class _LLMOptions:
     max_output_tokens: NotGivenOr[int]
     top_p: NotGivenOr[float]
     additional_request_fields: NotGivenOr[dict[str, Any]]
+    cache_system: bool
+    cache_tools: bool
 class LLM(llm.LLM):
@@ -66,6 +68,8 @@ class LLM(llm.LLM):
         top_p: NotGivenOr[float] = NOT_GIVEN,
         tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
         additional_request_fields: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
+        cache_system: bool = False,
+        cache_tools: bool = False,
         session: aioboto3.Session | None = None,
     ) -> None:
         """
@@ -87,6 +91,8 @@ class LLM(llm.LLM):
             top_p (float, optional): The nucleus sampling probability for response generation. Defaults to None.
             tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
             additional_request_fields (dict[str, Any], optional): Additional request fields to send to the AWS Bedrock Converse API. Defaults to None.
+            cache_system (bool, optional): Caches system messages to reduce token usage. Defaults to False.
+            cache_tools (bool, optional): Caches tool definitions to reduce token usage. Defaults to False.
             session (aioboto3.Session, optional): Optional aioboto3 session to use.
         """  # noqa: E501
         super().__init__()
@@ -111,12 +117,18 @@ class LLM(llm.LLM):
             max_output_tokens=max_output_tokens,
             top_p=top_p,
             additional_request_fields=additional_request_fields,
+            cache_system=cache_system,
+            cache_tools=cache_tools,
         )
     @property
     def model(self) -> str:
         return self._opts.model
+    @property
+    def provider(self) -> str:
+        return "AWS Bedrock"
     def chat(
         self,
         *,
@@ -140,7 +152,11 @@ class LLM(llm.LLM):
             if not tools:
                 return None
-            tool_config: dict[str, Any] = {"tools": to_fnc_ctx(tools)}
+            tools_list = to_fnc_ctx(tools)
+            if self._opts.cache_tools:
+                tools_list.append({"cachePoint": {"type": "default"}})
+            tool_config: dict[str, Any] = {"tools": tools_list}
             tool_choice = (
                 cast(ToolChoice, tool_choice) if is_given(tool_choice) else self._opts.tool_choice
             )
@@ -162,7 +178,12 @@ class LLM(llm.LLM):
         messages, extra_data = chat_ctx.to_provider_format(format="aws")
         opts["messages"] = messages
         if extra_data.system_messages:
-            opts["system"] = [{"text": content} for content in extra_data.system_messages]
+            system_messages: list[dict[str, str | dict]] = [
+                {"text": content} for content in extra_data.system_messages
+            ]
+            if self._opts.cache_system:
+                system_messages.append({"cachePoint": {"type": "default"}})
+            opts["system"] = system_messages
         inference_config: dict[str, Any] = {}
         if is_given(self._opts.max_output_tokens):

livekit/plugins/aws/models.py CHANGED Viewed

@@ -46,3 +46,4 @@ TTSLanguages = Literal[
 ]
 TTSEncoding = Literal["mp3"]
+TTSTextType = Literal["text", "ssml"]

livekit/plugins/aws/stt.py CHANGED Viewed

@@ -16,10 +16,11 @@ import asyncio
 import os
 from dataclasses import dataclass
-from amazon_transcribe.auth import AwsCrtCredentialResolver
+from amazon_transcribe.auth import AwsCrtCredentialResolver, CredentialResolver, Credentials
 from amazon_transcribe.client import TranscribeStreamingClient
 from amazon_transcribe.exceptions import BadRequestException
 from amazon_transcribe.model import Result, StartStreamTranscriptionEventStream, TranscriptEvent
+from awscrt.auth import AwsCredentialsProvider  # type: ignore[import-untyped]
 from livekit import rtc
 from livekit.agents import (
@@ -71,6 +72,7 @@ class STT(stt.STT):
         enable_partial_results_stabilization: NotGivenOr[bool] = NOT_GIVEN,
         partial_results_stability: NotGivenOr[str] = NOT_GIVEN,
         language_model_name: NotGivenOr[str] = NOT_GIVEN,
+        credentials: NotGivenOr[Credentials] = NOT_GIVEN,
     ):
         super().__init__(capabilities=stt.STTCapabilities(streaming=True, interim_results=True))
@@ -94,6 +96,20 @@ class STT(stt.STT):
             region=region,
         )
+        self._credentials = credentials if is_given(credentials) else None
+    @property
+    def model(self) -> str:
+        return (
+            self._config.language_model_name
+            if is_given(self._config.language_model_name)
+            else "unknown"
+        )
+    @property
+    def provider(self) -> str:
+        return "Amazon Transcribe"
     async def aclose(self) -> None:
         await super().aclose()
@@ -112,7 +128,9 @@ class STT(stt.STT):
         language: NotGivenOr[str] = NOT_GIVEN,
         conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
     ) -> SpeechStream:
-        return SpeechStream(stt=self, conn_options=conn_options, opts=self._config)
+        return SpeechStream(
+            stt=self, conn_options=conn_options, opts=self._config, credentials=self._credentials
+        )
 class SpeechStream(stt.SpeechStream):
@@ -121,15 +139,37 @@ class SpeechStream(stt.SpeechStream):
         stt: STT,
         opts: STTOptions,
         conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
+        credentials: Credentials | None = None,
     ) -> None:
         super().__init__(stt=stt, conn_options=conn_options, sample_rate=opts.sample_rate)
         self._opts = opts
+        self._credentials = credentials
+    def _credential_resolver(self) -> CredentialResolver:
+        if self._credentials is None:
+            return AwsCrtCredentialResolver(None)  # type: ignore
+        credentials = self._credentials
+        class CustomAwsCrtCredentialResolver(CredentialResolver):
+            def __init__(self) -> None:
+                self._crt_resolver = AwsCredentialsProvider.new_static(
+                    credentials.access_key_id,
+                    credentials.secret_access_key,
+                    credentials.session_token,
+                )
+            async def get_credentials(self) -> Credentials | None:
+                credentials = await asyncio.wrap_future(self._crt_resolver.get_credentials())
+                return credentials  # type: ignore[no-any-return]
+        return CustomAwsCrtCredentialResolver()
     async def _run(self) -> None:
         while True:
             client = TranscribeStreamingClient(
                 region=self._opts.region,
-                credential_resolver=AwsCrtCredentialResolver(None),  # type: ignore
+                credential_resolver=self._credential_resolver(),
             )
             live_config = {
@@ -192,7 +232,7 @@ class SpeechStream(stt.SpeechStream):
                     self._event_ch.send_nowait(
                         stt.SpeechEvent(
                             type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
-                            alternatives=[_streaming_recognize_response_to_speech_data(resp)],
+                            alternatives=[self._streaming_recognize_response_to_speech_data(resp)],
                         )
                     )
@@ -200,20 +240,22 @@ class SpeechStream(stt.SpeechStream):
                     self._event_ch.send_nowait(
                         stt.SpeechEvent(
                             type=stt.SpeechEventType.FINAL_TRANSCRIPT,
-                            alternatives=[_streaming_recognize_response_to_speech_data(resp)],
+                            alternatives=[self._streaming_recognize_response_to_speech_data(resp)],
                         )
                     )
             if not resp.is_partial:
                 self._event_ch.send_nowait(stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH))
+    def _streaming_recognize_response_to_speech_data(self, resp: Result) -> stt.SpeechData:
+        confidence = 0.0
+        if resp.alternatives and (items := resp.alternatives[0].items):
+            confidence = items[0].confidence or 0.0
-def _streaming_recognize_response_to_speech_data(resp: Result) -> stt.SpeechData:
-    data = stt.SpeechData(
-        language="en-US",
-        start_time=resp.start_time if resp.start_time else 0.0,
-        end_time=resp.end_time if resp.end_time else 0.0,
-        text=resp.alternatives[0].transcript if resp.alternatives else "",
-    )
-    return data
+        return stt.SpeechData(
+            language=resp.language_code or self._opts.language,
+            start_time=resp.start_time if resp.start_time is not None else 0.0,
+            end_time=resp.end_time if resp.end_time is not None else 0.0,
+            text=resp.alternatives[0].transcript if resp.alternatives else "",
+            confidence=confidence,
+        )

livekit/plugins/aws/tts.py CHANGED Viewed

@@ -13,6 +13,7 @@
 from __future__ import annotations
 from dataclasses import dataclass, replace
+from typing import cast
 import aioboto3  # type: ignore
 import botocore  # type: ignore
@@ -32,11 +33,12 @@ from livekit.agents.types import (
 )
 from livekit.agents.utils import is_given
-from .models import TTSLanguages, TTSSpeechEngine
+from .models import TTSLanguages, TTSSpeechEngine, TTSTextType
 from .utils import _strip_nones
 DEFAULT_SPEECH_ENGINE: TTSSpeechEngine = "generative"
 DEFAULT_VOICE = "Ruth"
+DEFAULT_TEXT_TYPE: TTSTextType = "text"
 @dataclass
@@ -47,6 +49,7 @@ class _TTSOptions:
     region: str | None
     sample_rate: int
     language: TTSLanguages | str | None
+    text_type: TTSTextType
 class TTS(tts.TTS):
@@ -56,6 +59,7 @@ class TTS(tts.TTS):
         voice: str = "Ruth",
         language: NotGivenOr[TTSLanguages | str] = NOT_GIVEN,
         speech_engine: TTSSpeechEngine = "generative",
+        text_type: TTSTextType = "text",
         sample_rate: int = 16000,
         region: str | None = None,
         api_key: str | None = None,
@@ -71,10 +75,11 @@ class TTS(tts.TTS):
         See https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html for more details on the the AWS Polly TTS.
         Args:
-            Voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth".
-            language (TTS_LANGUAGE, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
+            voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth".
+            language (TTSLanguages, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
+            speech_engine(TTSSpeechEngine, optional): The engine to use for the synthesis. Defaults to "generative".
+            text_type(TTSTextType, optional): Type of text to synthesize. Use "ssml" for SSML-enhanced text. Defaults to "text".
             sample_rate(int, optional): The audio frequency specified in Hz. Defaults to 16000.
-            speech_engine(TTS_SPEECH_ENGINE, optional): The engine to use for the synthesis. Defaults to "generative".
             region(str, optional): The region to use for the synthesis. Defaults to "us-east-1".
             api_key(str, optional): AWS access key id.
             api_secret(str, optional): AWS secret access key.
@@ -96,16 +101,42 @@ class TTS(tts.TTS):
         self._opts = _TTSOptions(
             voice=voice,
             speech_engine=speech_engine,
+            text_type=text_type,
             region=region or None,
             language=language or None,
             sample_rate=sample_rate,
         )
+    @property
+    def model(self) -> str:
+        return self._opts.speech_engine
+    @property
+    def provider(self) -> str:
+        return "Amazon Polly"
     def synthesize(
         self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
     ) -> ChunkedStream:
         return ChunkedStream(tts=self, text=text, conn_options=conn_options)
+    def update_options(
+        self,
+        *,
+        voice: NotGivenOr[str] = NOT_GIVEN,
+        language: NotGivenOr[str] = NOT_GIVEN,
+        speech_engine: NotGivenOr[TTSSpeechEngine] = NOT_GIVEN,
+        text_type: NotGivenOr[TTSTextType] = NOT_GIVEN,
+    ) -> None:
+        if is_given(voice):
+            self._opts.voice = voice
+        if is_given(language):
+            self._opts.language = language
+        if is_given(speech_engine):
+            self._opts.speech_engine = cast(TTSSpeechEngine, speech_engine)
+        if is_given(text_type):
+            self._opts.text_type = cast(TTSTextType, text_type)
 class ChunkedStream(tts.ChunkedStream):
     def __init__(
@@ -130,7 +161,7 @@ class ChunkedStream(tts.ChunkedStream):
                             "OutputFormat": "mp3",
                             "Engine": self._opts.speech_engine,
                             "VoiceId": self._opts.voice,
-                            "TextType": "text",
+                            "TextType": self._opts.text_type,
                             "SampleRate": str(self._opts.sample_rate),
                             "LanguageCode": self._opts.language,
                         }

livekit/plugins/aws/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.2.4"
+__version__ = "1.3.2"

{livekit_plugins_aws-1.2.4.dist-info → livekit_plugins_aws-1.3.2.dist-info}/METADATA RENAMED Viewed

@@ -1,13 +1,13 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-aws
-Version: 1.2.4
+Version: 1.3.2
 Summary: LiveKit Agents Plugin for services from AWS
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
 Project-URL: Source, https://github.com/livekit/agents
 Author-email: LiveKit <hello@livekit.io>
 License-Expression: Apache-2.0
-Keywords: audio,aws,livekit,nova,realtime,sonic,video,webrtc
+Keywords: ai,audio,aws,livekit,nova,realtime,sonic,video,voice
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Programming Language :: Python :: 3
@@ -19,10 +19,11 @@ Classifier: Topic :: Multimedia :: Video
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
 Requires-Dist: aioboto3>=14.1.0
-Requires-Dist: amazon-transcribe>=0.6.2
-Requires-Dist: livekit-agents>=1.2.4
+Requires-Dist: amazon-transcribe>=0.6.4
+Requires-Dist: livekit-agents>=1.3.2
 Provides-Extra: realtime
 Requires-Dist: aws-sdk-bedrock-runtime==0.0.2; (python_version >= '3.12') and extra == 'realtime'
+Requires-Dist: aws-sdk-signers==0.0.3; (python_version >= '3.12') and extra == 'realtime'
 Requires-Dist: boto3>1.35.10; extra == 'realtime'
 Description-Content-Type: text/markdown

{livekit_plugins_aws-1.2.4.dist-info → livekit_plugins_aws-1.3.2.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 livekit/plugins/aws/__init__.py,sha256=dCZISj1yZG0WZTojk3sU-Ub4PK1ThCVhamrl9k_NbBw,2047
-livekit/plugins/aws/llm.py,sha256=SUPWhJTbQ6HZJEK7WYUADDo2BJZJl2EaRvfG05IobzU,12150
+livekit/plugins/aws/llm.py,sha256=9adQTcg3hJA6XTw4xaRjCIKkxedbzpNBBW0Yub9pkhA,13001
 livekit/plugins/aws/log.py,sha256=S5ICcsnwshZhMG0HPmc_lI3mtHmcY4oQMJBsnnho-bM,289
-livekit/plugins/aws/models.py,sha256=ztI9n3r8OHGlQewWXiyQP3SaavFKHkw-VVuK9Xz4K1I,696
+livekit/plugins/aws/models.py,sha256=J4yzik9sR68RPZpR1ubRQ9hdn14D9IwA3KaRvAf5tAE,734
 livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/aws/stt.py,sha256=zlUrIVVYsSMhtVzXVxHTypW_E2YpMD9EAqvW3lZYj1c,8888
-livekit/plugins/aws/tts.py,sha256=T5dVpTuIuzQimYNnkfXi5dRLmRldWySL4IcbkXjmJLM,6083
+livekit/plugins/aws/stt.py,sha256=WXE25wXwCgKeTcoMe3AH9EUeomvvRtPpOTZ5JAfOUxk,10629
+livekit/plugins/aws/tts.py,sha256=oav-XWf9ysVGCmERWej6BgACu8vsLbRo9vFGpo9N6Ec,7184
 livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
-livekit/plugins/aws/version.py,sha256=RNHljWBeimzzNkMMbX3wjVENjAQ3D1xYW3wp8ijSE3s,600
+livekit/plugins/aws/version.py,sha256=fSSiY4SPcmDoVdQmTTNkDwxlIEOPaqT_xdow1m-W9JQ,600
 livekit/plugins/aws/experimental/realtime/__init__.py,sha256=mm_TGZc9QAWSO-VOO3PdE8Y5R6xlWckXRZuiFUIHa-Q,287
 livekit/plugins/aws/experimental/realtime/events.py,sha256=ltdGEipE3ZOkjn7K6rKN6WSCUPJkVg-S88mUmQ_V00s,15981
 livekit/plugins/aws/experimental/realtime/pretty_printer.py,sha256=KN7KPrfQu8cU7ff34vFAtfrd1umUSTVNKXQU7D8AMiM,1442
-livekit/plugins/aws/experimental/realtime/realtime_model.py,sha256=JkFv4LnlME17v-yXVvZiFdFyKHZBkKlOffmbUAd7qYw,60403
+livekit/plugins/aws/experimental/realtime/realtime_model.py,sha256=ksdw7X-wm5wiDoCur9srYTRV2eVadHOjAIIanNS9dUo,64568
 livekit/plugins/aws/experimental/realtime/turn_tracker.py,sha256=bcufaap-coeIYuK3ct1Is9W_UoefGYRmnJu7Mn5DCYU,6002
-livekit_plugins_aws-1.2.4.dist-info/METADATA,sha256=ExAhD6Tb7l1DuATSpAUjHXECCXHEzrcA43oNlmPmrFQ,1989
-livekit_plugins_aws-1.2.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_aws-1.2.4.dist-info/RECORD,,
+livekit_plugins_aws-1.3.2.dist-info/METADATA,sha256=BdW-6sTdtruSTfvIgdDU_m1VQb2A6E4z9HaNSeVPpNQ,2081
+livekit_plugins_aws-1.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_aws-1.3.2.dist-info/RECORD,,

{livekit_plugins_aws-1.2.4.dist-info → livekit_plugins_aws-1.3.2.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-aws 1.2.4__py3-none-any.whl → 1.3.2__py3-none-any.whl

Potentially problematic release.

livekit-plugins-aws 1.2.4py3-none-any.whl → 1.3.2py3-none-any.whl