PyPI - livekit-plugins-aws - Versions diffs - 0.1.0__py3-none-any.whl → 1.0.0.dev4__py3-none-any.whl - Mend

livekit-plugins-aws 0.1.0py3-none-any.whl → 1.0.0.dev4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-aws might be problematic. Click here for more details.

Files changed (12) hide show

livekit/plugins/aws/llm.py +119 -199
livekit/plugins/aws/models.py +1 -1
livekit/plugins/aws/stt.py +34 -53
livekit/plugins/aws/tts.py +40 -41
livekit/plugins/aws/utils.py +135 -0
livekit/plugins/aws/version.py +1 -1
{livekit_plugins_aws-0.1.0.dist-info → livekit_plugins_aws-1.0.0.dev4.dist-info}/METADATA +13 -23
livekit_plugins_aws-1.0.0.dev4.dist-info/RECORD +12 -0
{livekit_plugins_aws-0.1.0.dist-info → livekit_plugins_aws-1.0.0.dev4.dist-info}/WHEEL +1 -2
livekit/plugins/aws/_utils.py +0 -216
livekit_plugins_aws-0.1.0.dist-info/RECORD +0 -13
livekit_plugins_aws-0.1.0.dist-info/top_level.txt +0 -1

livekit/plugins/aws/llm.py CHANGED Viewed

@@ -17,47 +17,50 @@ from __future__ import annotations
 import asyncio
 import os
 from dataclasses import dataclass
-from typing import Any, Literal, MutableSet, Union
+from typing import Any, Literal
 import boto3
-from livekit.agents import (
-    APIConnectionError,
-    APIStatusError,
-    llm,
+from livekit.agents import APIConnectionError, APIStatusError, llm
+from livekit.agents.llm import ChatContext, FunctionTool, FunctionToolCall, ToolChoice
+from livekit.agents.types import (
+    DEFAULT_API_CONNECT_OPTIONS,
+    NOT_GIVEN,
+    APIConnectOptions,
+    NotGivenOr,
 )
-from livekit.agents.llm import LLMCapabilities, ToolChoice, _create_ai_function_info
-from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, APIConnectOptions
+from livekit.agents.utils import is_given
-from ._utils import _build_aws_ctx, _build_tools, _get_aws_credentials
 from .log import logger
+from .utils import get_aws_credentials, to_chat_ctx, to_fnc_ctx
 TEXT_MODEL = Literal["anthropic.claude-3-5-sonnet-20241022-v2:0"]
 DEFAULT_REGION = "us-east-1"
 @dataclass
-class LLMOptions:
-    model: TEXT_MODEL | str
-    temperature: float | None
-    tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] = "auto"
-    max_output_tokens: int | None = None
-    top_p: float | None = None
-    additional_request_fields: dict[str, Any] | None = None
+class _LLMOptions:
+    model: str | TEXT_MODEL
+    temperature: NotGivenOr[float]
+    tool_choice: NotGivenOr[ToolChoice | Literal["auto", "required", "none"]]
+    max_output_tokens: NotGivenOr[int]
+    top_p: NotGivenOr[float]
+    additional_request_fields: NotGivenOr[dict[str, Any]]
 class LLM(llm.LLM):
     def __init__(
         self,
         *,
-        model: TEXT_MODEL | str = "anthropic.claude-3-5-sonnet-20240620-v1:0",
-        api_key: str | None = None,
-        api_secret: str | None = None,
-        region: str = "us-east-1",
-        temperature: float = 0.8,
-        max_output_tokens: int | None = None,
-        top_p: float | None = None,
-        tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]] = "auto",
-        additional_request_fields: dict[str, Any] | None = None,
+        model: NotGivenOr[str | TEXT_MODEL] = NOT_GIVEN,
+        api_key: NotGivenOr[str] = NOT_GIVEN,
+        api_secret: NotGivenOr[str] = NOT_GIVEN,
+        region: NotGivenOr[str] = NOT_GIVEN,
+        temperature: NotGivenOr[float] = NOT_GIVEN,
+        max_output_tokens: NotGivenOr[int] = NOT_GIVEN,
+        top_p: NotGivenOr[float] = NOT_GIVEN,
+        tool_choice: NotGivenOr[ToolChoice | Literal["auto", "required", "none"]] = NOT_GIVEN,
+        additional_request_fields: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
     ) -> None:
         """
         Create a new instance of AWS Bedrock LLM.
@@ -65,7 +68,7 @@ class LLM(llm.LLM):
         ``api_key``  and ``api_secret`` must be set to your AWS Access key id and secret access key, either using the argument or by setting the
         ``AWS_ACCESS_KEY_ID`` and ``AWS_SECRET_ACCESS_KEY`` environmental variables.
-        See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse_stream.html for more details on the the AWS Bedrock Runtime API.
+        See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse_stream.html for more details on the AWS Bedrock Runtime API.
         Args:
             model (TEXT_MODEL, optional): model or inference profile arn to use(https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-use.html). Defaults to 'anthropic.claude-3-5-sonnet-20240620-v1:0'.
@@ -78,64 +81,89 @@ class LLM(llm.LLM):
             tool_choice (ToolChoice or Literal["auto", "required", "none"], optional): Specifies whether to use tools during response generation. Defaults to "auto".
             additional_request_fields (dict[str, Any], optional): Additional request fields to send to the AWS Bedrock Converse API. Defaults to None.
         """
-        super().__init__(
-            capabilities=LLMCapabilities(
-                supports_choices_on_int=True,
-                requires_persistent_functions=True,
-            )
-        )
-        self._api_key, self._api_secret = _get_aws_credentials(
+        super().__init__()
+        self._api_key, self._api_secret, self._region = get_aws_credentials(
             api_key, api_secret, region
         )
-        self._model = model or os.environ.get("BEDROCK_INFERENCE_PROFILE_ARN")
-        if not self._model:
+        model = model or os.environ.get("BEDROCK_INFERENCE_PROFILE_ARN")
+        if not is_given(model):
             raise ValueError(
                 "model or inference profile arn must be set using the argument or by setting the BEDROCK_INFERENCE_PROFILE_ARN environment variable."
             )
-        self._opts = LLMOptions(
-            model=self._model,
+        self._opts = _LLMOptions(
+            model=model,
             temperature=temperature,
             tool_choice=tool_choice,
             max_output_tokens=max_output_tokens,
             top_p=top_p,
             additional_request_fields=additional_request_fields,
         )
-        self._region = region
-        self._running_fncs: MutableSet[asyncio.Task[Any]] = set()
     def chat(
         self,
         *,
-        chat_ctx: llm.ChatContext,
+        chat_ctx: ChatContext,
+        tools: list[FunctionTool] | None = None,
         conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
-        fnc_ctx: llm.FunctionContext | None = None,
-        temperature: float | None = None,
-        n: int | None = 1,
-        parallel_tool_calls: bool | None = None,
-        tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]]
-        | None = None,
-    ) -> "LLMStream":
-        if tool_choice is None:
-            tool_choice = self._opts.tool_choice
-        if temperature is None:
-            temperature = self._opts.temperature
+        temperature: NotGivenOr[float] = NOT_GIVEN,
+        tool_choice: NotGivenOr[ToolChoice | Literal["auto", "required", "none"]] = NOT_GIVEN,
+    ) -> LLMStream:
+        opts = {}
+        if is_given(self._opts.model):
+            opts["modelId"] = self._opts.model
+        def _get_tool_config() -> dict[str, Any] | None:
+            nonlocal tool_choice
+            if not tools:
+                return None
+            tool_config: dict[str, Any] = {"tools": to_fnc_ctx(tools)}
+            tool_choice = tool_choice if is_given(tool_choice) else self._opts.tool_choice
+            if is_given(tool_choice):
+                if isinstance(tool_choice, ToolChoice):
+                    tool_config["toolChoice"] = {"tool": {"name": tool_choice.name}}
+                elif tool_choice == "required":
+                    tool_config["toolChoice"] = {"any": {}}
+                elif tool_choice == "auto":
+                    tool_config["toolChoice"] = {"auto": {}}
+                else:
+                    return None
+            return tool_config
+        tool_config = _get_tool_config()
+        if tool_config:
+            opts["toolConfig"] = tool_config
+        messages, system_message = to_chat_ctx(chat_ctx, id(self))
+        opts["messages"] = messages
+        if system_message:
+            opts["system"] = [system_message]
+        inference_config = {}
+        if is_given(self._opts.max_output_tokens):
+            inference_config["maxTokens"] = self._opts.max_output_tokens
+        temperature = temperature if is_given(temperature) else self._opts.temperature
+        if is_given(temperature):
+            inference_config["temperature"] = temperature
+        if is_given(self._opts.top_p):
+            inference_config["topP"] = self._opts.top_p
+        opts["inferenceConfig"] = inference_config
+        if is_given(self._opts.additional_request_fields):
+            opts["additionalModelRequestFields"] = self._opts.additional_request_fields
         return LLMStream(
             self,
-            model=self._opts.model,
             aws_access_key_id=self._api_key,
             aws_secret_access_key=self._api_secret,
             region_name=self._region,
-            max_output_tokens=self._opts.max_output_tokens,
-            top_p=self._opts.top_p,
-            additional_request_fields=self._opts.additional_request_fields,
             chat_ctx=chat_ctx,
-            fnc_ctx=fnc_ctx,
+            tools=tools,
             conn_options=conn_options,
-            temperature=temperature,
-            tool_choice=tool_choice,
+            extra_kwargs=opts,
         )
@@ -144,91 +172,33 @@ class LLMStream(llm.LLMStream):
         self,
         llm: LLM,
         *,
-        model: str | TEXT_MODEL,
-        aws_access_key_id: str | None,
-        aws_secret_access_key: str | None,
+        aws_access_key_id: str,
+        aws_secret_access_key: str,
         region_name: str,
-        chat_ctx: llm.ChatContext,
+        chat_ctx: ChatContext,
         conn_options: APIConnectOptions,
-        fnc_ctx: llm.FunctionContext | None,
-        temperature: float | None,
-        max_output_tokens: int | None,
-        top_p: float | None,
-        tool_choice: Union[ToolChoice, Literal["auto", "required", "none"]],
-        additional_request_fields: dict[str, Any] | None,
+        tools: list[FunctionTool] | None,
+        extra_kwargs: dict[str, Any],
     ) -> None:
-        super().__init__(
-            llm, chat_ctx=chat_ctx, fnc_ctx=fnc_ctx, conn_options=conn_options
-        )
+        super().__init__(llm, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
         self._client = boto3.client(
             "bedrock-runtime",
             region_name=region_name,
             aws_access_key_id=aws_access_key_id,
             aws_secret_access_key=aws_secret_access_key,
         )
-        self._model = model
         self._llm: LLM = llm
-        self._max_output_tokens = max_output_tokens
-        self._top_p = top_p
-        self._temperature = temperature
-        self._tool_choice = tool_choice
-        self._additional_request_fields = additional_request_fields
+        self._opts = extra_kwargs
-    async def _run(self) -> None:
         self._tool_call_id: str | None = None
         self._fnc_name: str | None = None
         self._fnc_raw_arguments: str | None = None
         self._text: str = ""
-        retryable = True
+    async def _run(self) -> None:
+        retryable = True
         try:
-            opts: dict[str, Any] = {}
-            messages, system_instruction = _build_aws_ctx(self._chat_ctx, id(self))
-            messages = _merge_messages(messages)
-            def _get_tool_config() -> dict[str, Any] | None:
-                if not (self._fnc_ctx and self._fnc_ctx.ai_functions):
-                    return None
-                tools = _build_tools(self._fnc_ctx)
-                config: dict[str, Any] = {"tools": tools}
-                if isinstance(self._tool_choice, ToolChoice):
-                    config["toolChoice"] = {"tool": {"name": self._tool_choice.name}}
-                elif self._tool_choice == "required":
-                    config["toolChoice"] = {"any": {}}
-                elif self._tool_choice == "auto":
-                    config["toolChoice"] = {"auto": {}}
-                else:
-                    return None
-                return config
-            tool_config = _get_tool_config()
-            if tool_config:
-                opts["toolConfig"] = tool_config
-            if self._additional_request_fields:
-                opts["additionalModelRequestFields"] = _strip_nones(
-                    self._additional_request_fields
-                )
-            if system_instruction:
-                opts["system"] = [system_instruction]
-            inference_config = _strip_nones(
-                {
-                    "maxTokens": self._max_output_tokens,
-                    "temperature": self._temperature,
-                    "topP": self._top_p,
-                }
-            )
-            response = self._client.converse_stream(
-                modelId=self._model,
-                messages=messages,
-                inferenceConfig=inference_config,
-                **_strip_nones(opts),
-            )  # type: ignore
+            response = self._client.converse_stream(**self._opts)  # type: ignore
             request_id = response["ResponseMetadata"]["RequestId"]
             if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
                 raise APIStatusError(
@@ -267,84 +237,34 @@ class LLMStream(llm.LLMStream):
         elif "contentBlockStop" in chunk:
             if self._text:
                 chat_chunk = llm.ChatChunk(
-                    request_id=request_id,
-                    choices=[
-                        llm.Choice(
-                            delta=llm.ChoiceDelta(content=self._text, role="assistant"),
-                            index=chunk["contentBlockStop"]["contentBlockIndex"],
-                        )
-                    ],
+                    id=request_id,
+                    delta=llm.ChoiceDelta(content=self._text, role="assistant"),
                 )
                 self._text = ""
                 return chat_chunk
             elif self._tool_call_id:
-                return self._try_build_function(request_id, chunk)
-        return None
-    def _try_build_function(self, request_id: str, chunk: dict) -> llm.ChatChunk | None:
-        if self._tool_call_id is None:
-            logger.warning("aws bedrock llm: no tool call id in the response")
-            return None
-        if self._fnc_name is None:
-            logger.warning("aws bedrock llm: no function name in the response")
-            return None
-        if self._fnc_raw_arguments is None:
-            logger.warning("aws bedrock llm: no function arguments in the response")
-            return None
-        if self._fnc_ctx is None:
-            logger.warning(
-                "aws bedrock llm: stream tried to run function without function context"
-            )
-            return None
-        fnc_info = _create_ai_function_info(
-            self._fnc_ctx,
-            self._tool_call_id,
-            self._fnc_name,
-            self._fnc_raw_arguments,
-        )
-        self._tool_call_id = self._fnc_name = self._fnc_raw_arguments = None
-        self._function_calls_info.append(fnc_info)
-        return llm.ChatChunk(
-            request_id=request_id,
-            choices=[
-                llm.Choice(
+                if self._tool_call_id is None:
+                    logger.warning("aws bedrock llm: no tool call id in the response")
+                    return None
+                if self._fnc_name is None:
+                    logger.warning("aws bedrock llm: no function name in the response")
+                    return None
+                if self._fnc_raw_arguments is None:
+                    logger.warning("aws bedrock llm: no function arguments in the response")
+                    return None
+                chat_chunk = llm.ChatChunk(
+                    id=request_id,
                     delta=llm.ChoiceDelta(
                         role="assistant",
-                        tool_calls=[fnc_info],
+                        tool_calls=[
+                            FunctionToolCall(
+                                arguments=self._fnc_raw_arguments,
+                                name=self._fnc_name,
+                                call_id=self._tool_call_id,
+                            ),
+                        ],
                     ),
-                    index=chunk["contentBlockStop"]["contentBlockIndex"],
                 )
-            ],
-        )
-def _merge_messages(
-    messages: list[dict],
-) -> list[dict]:
-    # Anthropic enforces alternating messages
-    combined_messages: list[dict] = []
-    for m in messages:
-        if len(combined_messages) == 0 or m["role"] != combined_messages[-1]["role"]:
-            combined_messages.append(m)
-            continue
-        last_message = combined_messages[-1]
-        if not isinstance(last_message["content"], list) or not isinstance(
-            m["content"], list
-        ):
-            logger.error("message content is not a list")
-            continue
-        last_message["content"].extend(m["content"])
-    if len(combined_messages) == 0 or combined_messages[0]["role"] != "user":
-        combined_messages.insert(0, {"role": "user", "content": [{"text": "(empty)"}]})
-    return combined_messages
-def _strip_nones(d: dict[str, Any]) -> dict[str, Any]:
-    return {k: v for k, v in d.items() if v is not None}
+                self._tool_call_id = self._fnc_name = self._fnc_raw_arguments = None
+                return chat_chunk
+        return None

livekit/plugins/aws/models.py CHANGED Viewed

@@ -45,4 +45,4 @@ TTS_LANGUAGE = Literal[
     "de-CH",
 ]
-TTS_OUTPUT_FORMAT = Literal["pcm", "mp3"]
+TTS_OUTPUT_FORMAT = Literal["mp3"]

livekit/plugins/aws/stt.py CHANGED Viewed

@@ -14,20 +14,15 @@ from __future__ import annotations
 import asyncio
 from dataclasses import dataclass
-from typing import Optional
 from amazon_transcribe.client import TranscribeStreamingClient
 from amazon_transcribe.model import Result, TranscriptEvent
 from livekit import rtc
-from livekit.agents import (
-    DEFAULT_API_CONNECT_OPTIONS,
-    APIConnectOptions,
-    stt,
-    utils,
-)
-from ._utils import _get_aws_credentials
+from livekit.agents import DEFAULT_API_CONNECT_OPTIONS, APIConnectOptions, stt, utils
 from .log import logger
+from .utils import get_aws_credentials
 @dataclass
@@ -36,16 +31,16 @@ class STTOptions:
     sample_rate: int
     language: str
     encoding: str
-    vocabulary_name: Optional[str]
-    session_id: Optional[str]
-    vocab_filter_method: Optional[str]
-    vocab_filter_name: Optional[str]
-    show_speaker_label: Optional[bool]
-    enable_channel_identification: Optional[bool]
-    number_of_channels: Optional[int]
-    enable_partial_results_stabilization: Optional[bool]
-    partial_results_stability: Optional[str]
-    language_model_name: Optional[str]
+    vocabulary_name: str | None
+    session_id: str | None
+    vocab_filter_method: str | None
+    vocab_filter_name: str | None
+    show_speaker_label: bool | None
+    enable_channel_identification: bool | None
+    number_of_channels: int | None
+    enable_partial_results_stabilization: bool | None
+    partial_results_stability: str | None
+    language_model_name: str | None
 class STT(stt.STT):
@@ -58,26 +53,24 @@ class STT(stt.STT):
         sample_rate: int = 48000,
         language: str = "en-US",
         encoding: str = "pcm",
-        vocabulary_name: Optional[str] = None,
-        session_id: Optional[str] = None,
-        vocab_filter_method: Optional[str] = None,
-        vocab_filter_name: Optional[str] = None,
-        show_speaker_label: Optional[bool] = None,
-        enable_channel_identification: Optional[bool] = None,
-        number_of_channels: Optional[int] = None,
-        enable_partial_results_stabilization: Optional[bool] = None,
-        partial_results_stability: Optional[str] = None,
-        language_model_name: Optional[str] = None,
+        vocabulary_name: str | None = None,
+        session_id: str | None = None,
+        vocab_filter_method: str | None = None,
+        vocab_filter_name: str | None = None,
+        show_speaker_label: bool | None = None,
+        enable_channel_identification: bool | None = None,
+        number_of_channels: int | None = None,
+        enable_partial_results_stabilization: bool | None = None,
+        partial_results_stability: str | None = None,
+        language_model_name: str | None = None,
     ):
-        super().__init__(
-            capabilities=stt.STTCapabilities(streaming=True, interim_results=True)
-        )
+        super().__init__(capabilities=stt.STTCapabilities(streaming=True, interim_results=True))
-        self._api_key, self._api_secret = _get_aws_credentials(
+        self._api_key, self._api_secret, self._speech_region = get_aws_credentials(
             api_key, api_secret, speech_region
         )
         self._config = STTOptions(
-            speech_region=speech_region,
+            speech_region=self._speech_region,
             language=language,
             sample_rate=sample_rate,
             encoding=encoding,
@@ -100,16 +93,14 @@ class STT(stt.STT):
         language: str | None,
         conn_options: APIConnectOptions,
     ) -> stt.SpeechEvent:
-        raise NotImplementedError(
-            "Amazon Transcribe does not support single frame recognition"
-        )
+        raise NotImplementedError("Amazon Transcribe does not support single frame recognition")
     def stream(
         self,
         *,
         language: str | None = None,
         conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
-    ) -> "SpeechStream":
+    ) -> SpeechStream:
         return SpeechStream(
             stt=self,
             conn_options=conn_options,
@@ -124,9 +115,7 @@ class SpeechStream(stt.SpeechStream):
         opts: STTOptions,
         conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
     ) -> None:
-        super().__init__(
-            stt=stt, conn_options=conn_options, sample_rate=opts.sample_rate
-        )
+        super().__init__(stt=stt, conn_options=conn_options, sample_rate=opts.sample_rate)
         self._opts = opts
         self._client = TranscribeStreamingClient(region=self._opts.speech_region)
@@ -151,9 +140,7 @@ class SpeechStream(stt.SpeechStream):
         async def input_generator():
             async for frame in self._input_ch:
                 if isinstance(frame, rtc.AudioFrame):
-                    await stream.input_stream.send_audio_event(
-                        audio_chunk=frame.data.tobytes()
-                    )
+                    await stream.input_stream.send_audio_event(audio_chunk=frame.data.tobytes())
             await stream.input_stream.end_stream()
         @utils.log_exceptions(logger=logger)
@@ -184,9 +171,7 @@ class SpeechStream(stt.SpeechStream):
                     self._event_ch.send_nowait(
                         stt.SpeechEvent(
                             type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
-                            alternatives=[
-                                _streaming_recognize_response_to_speech_data(resp)
-                            ],
+                            alternatives=[_streaming_recognize_response_to_speech_data(resp)],
                         )
                     )
@@ -194,16 +179,12 @@ class SpeechStream(stt.SpeechStream):
                     self._event_ch.send_nowait(
                         stt.SpeechEvent(
                             type=stt.SpeechEventType.FINAL_TRANSCRIPT,
-                            alternatives=[
-                                _streaming_recognize_response_to_speech_data(resp)
-                            ],
+                            alternatives=[_streaming_recognize_response_to_speech_data(resp)],
                         )
                     )
             if not resp.is_partial:
-                self._event_ch.send_nowait(
-                    stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH)
-                )
+                self._event_ch.send_nowait(stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH))
 def _streaming_recognize_response_to_speech_data(resp: Result) -> stt.SpeechData:

livekit/plugins/aws/tts.py CHANGED Viewed

@@ -14,11 +14,11 @@ from __future__ import annotations
 import asyncio
 from dataclasses import dataclass
-from typing import Any, Callable, Optional
+from typing import Any, Callable
 import aiohttp
 from aiobotocore.session import AioSession, get_session
-from livekit import rtc
 from livekit.agents import (
     APIConnectionError,
     APIConnectOptions,
@@ -28,11 +28,10 @@ from livekit.agents import (
     utils,
 )
-from ._utils import _get_aws_credentials
-from .models import TTS_LANGUAGE, TTS_OUTPUT_FORMAT, TTS_SPEECH_ENGINE
+from .models import TTS_LANGUAGE, TTS_SPEECH_ENGINE
+from .utils import get_aws_credentials
 TTS_NUM_CHANNELS: int = 1
-DEFAULT_OUTPUT_FORMAT: TTS_OUTPUT_FORMAT = "pcm"
 DEFAULT_SPEECH_ENGINE: TTS_SPEECH_ENGINE = "generative"
 DEFAULT_SPEECH_REGION = "us-east-1"
 DEFAULT_VOICE = "Ruth"
@@ -43,7 +42,6 @@ DEFAULT_SAMPLE_RATE = 16000
 class _TTSOptions:
     # https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html
     voice: str | None
-    output_format: TTS_OUTPUT_FORMAT
     speech_engine: TTS_SPEECH_ENGINE
     speech_region: str
     sample_rate: int
@@ -56,7 +54,6 @@ class TTS(tts.TTS):
         *,
         voice: str | None = DEFAULT_VOICE,
         language: TTS_LANGUAGE | str | None = None,
-        output_format: TTS_OUTPUT_FORMAT = DEFAULT_OUTPUT_FORMAT,
         speech_engine: TTS_SPEECH_ENGINE = DEFAULT_SPEECH_ENGINE,
         sample_rate: int = DEFAULT_SAMPLE_RATE,
         speech_region: str = DEFAULT_SPEECH_REGION,
@@ -75,7 +72,6 @@ class TTS(tts.TTS):
         Args:
             Voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth".
             language (TTS_LANGUAGE, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
-            output_format(TTS_OUTPUT_FORMAT, optional): The format in which the returned output will be encoded. Defaults to "pcm".
             sample_rate(int, optional): The audio frequency specified in Hz. Defaults to 16000.
             speech_engine(TTS_SPEECH_ENGINE, optional): The engine to use for the synthesis. Defaults to "generative".
             speech_region(str, optional): The region to use for the synthesis. Defaults to "us-east-1".
@@ -90,15 +86,14 @@ class TTS(tts.TTS):
             num_channels=TTS_NUM_CHANNELS,
         )
-        self._api_key, self._api_secret = _get_aws_credentials(
+        self._api_key, self._api_secret, self._speech_region = get_aws_credentials(
             api_key, api_secret, speech_region
         )
         self._opts = _TTSOptions(
             voice=voice,
-            output_format=output_format,
             speech_engine=speech_engine,
-            speech_region=speech_region,
+            speech_region=self._speech_region,
             language=language,
             sample_rate=sample_rate,
         )
@@ -116,8 +111,8 @@ class TTS(tts.TTS):
         self,
         text: str,
         *,
-        conn_options: Optional[APIConnectOptions] = None,
-    ) -> "ChunkedStream":
+        conn_options: APIConnectOptions | None = None,
+    ) -> ChunkedStream:
         return ChunkedStream(
             tts=self,
             text=text,
@@ -133,7 +128,7 @@ class ChunkedStream(tts.ChunkedStream):
         *,
         tts: TTS,
         text: str,
-        conn_options: Optional[APIConnectOptions] = None,
+        conn_options: APIConnectOptions | None = None,
         opts: _TTSOptions,
         get_client: Callable[[], Any],
     ) -> None:
@@ -149,7 +144,7 @@ class ChunkedStream(tts.ChunkedStream):
             async with self._get_client() as client:
                 params = {
                     "Text": self._input_text,
-                    "OutputFormat": self._opts.output_format,
+                    "OutputFormat": "mp3",
                     "Engine": self._opts.speech_engine,
                     "VoiceId": self._opts.voice,
                     "TextType": "text",
@@ -158,32 +153,36 @@ class ChunkedStream(tts.ChunkedStream):
                 }
                 response = await client.synthesize_speech(**_strip_nones(params))
                 if "AudioStream" in response:
-                    decoder = utils.codecs.Mp3StreamDecoder()
-                    async with response["AudioStream"] as resp:
-                        async for data, _ in resp.content.iter_chunks():
-                            if self._opts.output_format == "mp3":
-                                frames = decoder.decode_chunk(data)
-                                for frame in frames:
-                                    self._event_ch.send_nowait(
-                                        tts.SynthesizedAudio(
-                                            request_id=request_id,
-                                            segment_id=self._segment_id,
-                                            frame=frame,
-                                        )
-                                    )
-                            else:
-                                self._event_ch.send_nowait(
-                                    tts.SynthesizedAudio(
-                                        request_id=request_id,
-                                        segment_id=self._segment_id,
-                                        frame=rtc.AudioFrame(
-                                            data=data,
-                                            sample_rate=self._opts.sample_rate,
-                                            num_channels=1,
-                                            samples_per_channel=len(data) // 2,
-                                        ),
-                                    )
-                                )
+                    decoder = utils.codecs.AudioStreamDecoder(
+                        sample_rate=self._opts.sample_rate,
+                        num_channels=1,
+                    )
+                    # Create a task to push data to the decoder
+                    async def push_data():
+                        try:
+                            async with response["AudioStream"] as resp:
+                                async for data, _ in resp.content.iter_chunks():
+                                    decoder.push(data)
+                        finally:
+                            decoder.end_input()
+                    # Start pushing data to the decoder
+                    push_task = asyncio.create_task(push_data())
+                    try:
+                        # Create emitter and process decoded frames
+                        emitter = tts.SynthesizedAudioEmitter(
+                            event_ch=self._event_ch,
+                            request_id=request_id,
+                            segment_id=self._segment_id,
+                        )
+                        async for frame in decoder:
+                            emitter.push(frame)
+                        emitter.flush()
+                        await push_task
+                    finally:
+                        await utils.aio.gracefully_cancel(push_task)
         except asyncio.TimeoutError as e:
             raise APITimeoutError() from e

livekit/plugins/aws/utils.py ADDED Viewed

@@ -0,0 +1,135 @@
+from __future__ import annotations
+import json
+import os
+from typing import Any, cast
+import boto3
+from livekit.agents import llm
+from livekit.agents.llm import ChatContext, FunctionTool, ImageContent, utils
+__all__ = ["to_fnc_ctx", "to_chat_ctx", "get_aws_credentials"]
+def get_aws_credentials(api_key: str | None, api_secret: str | None, region: str | None):
+    region = region or os.environ.get("AWS_DEFAULT_REGION")
+    if not region:
+        raise ValueError(
+            "AWS_DEFAULT_REGION must be set via argument or the AWS_DEFAULT_REGION environment variable."
+        )
+    if api_key and api_secret:
+        session = boto3.Session(
+            aws_access_key_id=api_key,
+            aws_secret_access_key=api_secret,
+            region_name=region,
+        )
+    else:
+        session = boto3.Session(region_name=region)
+    credentials = session.get_credentials()
+    if not credentials or not credentials.access_key or not credentials.secret_key:
+        raise ValueError("No valid AWS credentials found.")
+    return cast(tuple[str, str, str], (credentials.access_key, credentials.secret_key, region))
+def to_fnc_ctx(fncs: list[FunctionTool]) -> list[dict]:
+    return [_build_tool_spec(fnc) for fnc in fncs]
+def to_chat_ctx(chat_ctx: ChatContext, cache_key: Any) -> tuple[list[dict], dict | None]:
+    messages: list[dict] = []
+    system_message: dict | None = None
+    current_role: str | None = None
+    current_content: list[dict] = []
+    for msg in chat_ctx.items:
+        if msg.type == "message" and msg.role == "system":
+            for content in msg.content:
+                if isinstance(content, str):
+                    system_message = {"text": content}
+            continue
+        if msg.type == "message":
+            role = "assistant" if msg.role == "assistant" else "user"
+        elif msg.type == "function_call":
+            role = "assistant"
+        elif msg.type == "function_call_output":
+            role = "user"
+        # if the effective role changed, finalize the previous turn.
+        if role != current_role:
+            if current_content and current_role is not None:
+                messages.append({"role": current_role, "content": current_content})
+            current_content = []
+            current_role = role
+        if msg.type == "message":
+            for content in msg.content:
+                if isinstance(content, str):
+                    current_content.append({"text": content})
+                elif isinstance(content, ImageContent):
+                    current_content.append(_build_image(content, cache_key))
+        elif msg.type == "function_call":
+            current_content.append(
+                {
+                    "toolUse": {
+                        "toolUseId": msg.call_id,
+                        "name": msg.name,
+                        "input": json.loads(msg.arguments or "{}"),
+                    }
+                }
+            )
+        elif msg.type == "function_call_output":
+            tool_response = {
+                "toolResult": {
+                    "toolUseId": msg.call_id,
+                    "content": [],
+                    "status": "success",
+                }
+            }
+            if isinstance(msg.output, dict):
+                tool_response["toolResult"]["content"].append({"json": msg.output})
+            elif isinstance(msg.output, str):
+                tool_response["toolResult"]["content"].append({"text": msg.output})
+            current_content.append(tool_response)
+    # Finalize the last message if there’s any content left
+    if current_role is not None and current_content:
+        messages.append({"role": current_role, "content": current_content})
+    # Ensure the message list starts with a "user" message
+    if not messages or messages[0]["role"] != "user":
+        messages.insert(0, {"role": "user", "content": [{"text": "(empty)"}]})
+    return messages, system_message
+def _build_tool_spec(fnc: FunctionTool) -> dict:
+    fnc = llm.utils.build_legacy_openai_schema(fnc, internally_tagged=True)
+    return {
+        "toolSpec": _strip_nones(
+            {
+                "name": fnc["name"],
+                "description": fnc["description"] if fnc["description"] else None,
+                "inputSchema": {"json": fnc["parameters"] if fnc["parameters"] else {}},
+            }
+        )
+    }
+def _build_image(image: ImageContent, cache_key: Any) -> dict:
+    img = utils.serialize_image(image)
+    if cache_key not in image._cache:
+        image._cache[cache_key] = img.data_bytes
+    return {
+        "image": {
+            "format": "jpeg",
+            "source": {"bytes": image._cache[cache_key]},
+        }
+    }
+def _strip_nones(d: dict) -> dict:
+    return {k: v for k, v in d.items() if v is not None}

livekit/plugins/aws/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "0.1.0"
+__version__ = "1.0.0.dev4"

{livekit_plugins_aws-0.1.0.dist-info → livekit_plugins_aws-1.0.0.dev4.dist-info}/METADATA RENAMED Viewed

@@ -1,38 +1,28 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: livekit-plugins-aws
-Version: 0.1.0
+Version: 1.0.0.dev4
 Summary: LiveKit Agents Plugin for services from AWS
-Home-page: https://github.com/livekit/agents
-License: Apache-2.0
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
 Project-URL: Source, https://github.com/livekit/agents
-Keywords: webrtc,realtime,audio,video,livekit,aws
+Author-email: LiveKit <support@livekit.io>
+License-Expression: Apache-2.0
+Keywords: audio,aws,livekit,realtime,video,webrtc
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Topic :: Multimedia :: Sound/Audio
-Classifier: Topic :: Multimedia :: Video
-Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Topic :: Multimedia :: Sound/Audio
+Classifier: Topic :: Multimedia :: Video
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
-Description-Content-Type: text/markdown
-Requires-Dist: livekit-agents>=0.12.0
 Requires-Dist: aiobotocore==2.19.0
-Requires-Dist: boto3==1.36.3
 Requires-Dist: amazon-transcribe>=0.6.2
-Dynamic: classifier
-Dynamic: description
-Dynamic: description-content-type
-Dynamic: home-page
-Dynamic: keywords
-Dynamic: license
-Dynamic: project-url
-Dynamic: requires-dist
-Dynamic: requires-python
-Dynamic: summary
+Requires-Dist: boto3==1.36.3
+Requires-Dist: livekit-agents>=1.0.0.dev4
+Description-Content-Type: text/markdown
 # LiveKit Plugins AWS
@@ -50,4 +40,4 @@ pip install livekit-plugins-aws
 ## Pre-requisites
-You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
+You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.

livekit_plugins_aws-1.0.0.dev4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+livekit/plugins/aws/__init__.py,sha256=Ea-hK7QdutnwdZvvs9K2fiR8RWJqz2JcONxXnV1kXF0,977
+livekit/plugins/aws/llm.py,sha256=Mc910AREP7-FX1yEV1k_rViue_30Gy8qmp42VDAptSE,11011
+livekit/plugins/aws/log.py,sha256=jFief0Xhv0n_F6sp6UFu9VKxs2bXNVGAfYGmEYfR_2Q,66
+livekit/plugins/aws/models.py,sha256=Nf8RFmDulW7h03dG2lERTog3mgDK0TbLvW0eGOncuEE,704
+livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/aws/stt.py,sha256=hRulbbMXtYqYPuqo359ARWE0fYDy1PzMdpT-h2m1UsY,7575
+livekit/plugins/aws/tts.py,sha256=WA-KtEVF8dq4GZEbPWdY3azdHZRiHFyptesx7kh6Tio,7250
+livekit/plugins/aws/utils.py,sha256=Q62NpoJs3bLerMBlhW22L9xiZHgmtxK3-js7KbL0bkQ,4790
+livekit/plugins/aws/version.py,sha256=koM_bT4QbztrKQ60Gjg7V4oe99CuxgGcpuUtWMOEKqU,605
+livekit_plugins_aws-1.0.0.dev4.dist-info/METADATA,sha256=2GdpNgK-u87T1YW20JWZzR0r8iadqvQLr4NsNnOLUEo,1488
+livekit_plugins_aws-1.0.0.dev4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_aws-1.0.0.dev4.dist-info/RECORD,,

{livekit_plugins_aws-0.1.0.dist-info → livekit_plugins_aws-1.0.0.dev4.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,4 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.2)
+Generator: hatchling 1.27.0
 Root-Is-Purelib: true
 Tag: py3-none-any

livekit/plugins/aws/_utils.py DELETED Viewed

@@ -1,216 +0,0 @@
-from __future__ import annotations
-import base64
-import inspect
-import json
-import os
-from typing import Any, Dict, List, Optional, Tuple, get_args, get_origin
-import boto3
-from livekit import rtc
-from livekit.agents import llm, utils
-from livekit.agents.llm.function_context import _is_optional_type
-__all__ = ["_build_aws_ctx", "_build_tools", "_get_aws_credentials"]
-def _get_aws_credentials(
-    api_key: Optional[str], api_secret: Optional[str], region: Optional[str]
-):
-    region = region or os.environ.get("AWS_DEFAULT_REGION")
-    if not region:
-        raise ValueError(
-            "AWS_DEFAULT_REGION must be set using the argument or by setting the AWS_DEFAULT_REGION environment variable."
-        )
-    # If API key and secret are provided, create a session with them
-    if api_key and api_secret:
-        session = boto3.Session(
-            aws_access_key_id=api_key,
-            aws_secret_access_key=api_secret,
-            region_name=region,
-        )
-    else:
-        session = boto3.Session(region_name=region)
-    credentials = session.get_credentials()
-    if not credentials or not credentials.access_key or not credentials.secret_key:
-        raise ValueError("No valid AWS credentials found.")
-    return credentials.access_key, credentials.secret_key
-JSON_SCHEMA_TYPE_MAP: Dict[type, str] = {
-    str: "string",
-    int: "integer",
-    float: "number",
-    bool: "boolean",
-    dict: "object",
-    list: "array",
-}
-def _build_parameters(arguments: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    properties: Dict[str, dict] = {}
-    required: List[str] = []
-    for arg_name, arg_info in arguments.items():
-        prop = {}
-        if hasattr(arg_info, "description") and arg_info.description:
-            prop["description"] = arg_info.description
-        _, py_type = _is_optional_type(arg_info.type)
-        origin = get_origin(py_type)
-        if origin is list:
-            item_type = get_args(py_type)[0]
-            if item_type not in JSON_SCHEMA_TYPE_MAP:
-                raise ValueError(f"Unsupported type: {item_type}")
-            prop["type"] = "array"
-            prop["items"] = {"type": JSON_SCHEMA_TYPE_MAP[item_type]}
-            if hasattr(arg_info, "choices") and arg_info.choices:
-                prop["items"]["enum"] = list(arg_info.choices)
-        else:
-            if py_type not in JSON_SCHEMA_TYPE_MAP:
-                raise ValueError(f"Unsupported type: {py_type}")
-            prop["type"] = JSON_SCHEMA_TYPE_MAP[py_type]
-            if arg_info.choices:
-                prop["enum"] = list(arg_info.choices)
-        properties[arg_name] = prop
-        if arg_info.default is inspect.Parameter.empty:
-            required.append(arg_name)
-    if properties:
-        parameters = {"json": {"type": "object", "properties": properties}}
-        if required:
-            parameters["json"]["required"] = required
-        return parameters
-    return None
-def _build_tools(fnc_ctx: Any) -> List[dict]:
-    tools: List[dict] = []
-    for fnc_info in fnc_ctx.ai_functions.values():
-        parameters = _build_parameters(fnc_info.arguments)
-        func_decl = {
-            "toolSpec": {
-                "name": fnc_info.name,
-                "description": fnc_info.description,
-                "inputSchema": parameters
-                if parameters
-                else {"json": {"type": "object", "properties": {}}},
-            }
-        }
-        tools.append(func_decl)
-    return tools
-def _build_image(image: llm.ChatImage, cache_key: Any) -> dict:
-    if isinstance(image.image, str):
-        if image.image.startswith("data:image/jpeg;base64,"):
-            base64_data = image.image.split(",", 1)[1]
-            try:
-                image_bytes = base64.b64decode(base64_data)
-            except Exception as e:
-                raise ValueError("Invalid base64 data in image URL") from e
-            return {"image": {"format": "jpeg", "source": {"bytes": image_bytes}}}
-        else:
-            return {"image": {"format": "jpeg", "source": {"uri": image.image}}}
-    elif isinstance(image.image, rtc.VideoFrame):
-        if cache_key not in image._cache:
-            opts = utils.images.EncodeOptions()
-            if image.inference_width and image.inference_height:
-                opts.resize_options = utils.images.ResizeOptions(
-                    width=image.inference_width,
-                    height=image.inference_height,
-                    strategy="scale_aspect_fit",
-                )
-            image._cache[cache_key] = utils.images.encode(image.image, opts)
-        return {
-            "image": {
-                "format": "jpeg",
-                "source": {
-                    "bytes": image._cache[cache_key],
-                },
-            }
-        }
-    raise ValueError(f"Unsupported image type: {type(image.image)}")
-def _build_aws_ctx(
-    chat_ctx: llm.ChatContext, cache_key: Any
-) -> Tuple[List[dict], Optional[dict]]:
-    messages: List[dict] = []
-    system: Optional[dict] = None
-    current_role: Optional[str] = None
-    current_content: List[dict] = []
-    for msg in chat_ctx.messages:
-        if msg.role == "system":
-            if isinstance(msg.content, str):
-                system = {"text": msg.content}
-            continue
-        if msg.role == "assistant":
-            role = "assistant"
-        else:
-            role = "user"
-        if role != current_role:
-            if current_role is not None and current_content:
-                messages.append({"role": current_role, "content": current_content})
-            current_role = role
-            current_content = []
-        if msg.tool_calls:
-            for fnc in msg.tool_calls:
-                current_content.append(
-                    {
-                        "toolUse": {
-                            "toolUseId": fnc.tool_call_id,
-                            "name": fnc.function_info.name,
-                            "input": fnc.arguments,
-                        }
-                    }
-                )
-        if msg.role == "tool":
-            tool_response: dict = {
-                "toolResult": {
-                    "toolUseId": msg.tool_call_id,
-                    "content": [],
-                    "status": "success",
-                }
-            }
-            if isinstance(msg.content, dict):
-                tool_response["toolResult"]["content"].append({"json": msg.content})
-            elif isinstance(msg.content, str):
-                tool_response["toolResult"]["content"].append({"text": msg.content})
-            current_content.append(tool_response)
-        else:
-            if msg.content:
-                if isinstance(msg.content, str):
-                    current_content.append({"text": msg.content})
-                elif isinstance(msg.content, dict):
-                    current_content.append({"text": json.dumps(msg.content)})
-                elif isinstance(msg.content, list):
-                    for item in msg.content:
-                        if isinstance(item, str):
-                            current_content.append({"text": item})
-                        elif isinstance(item, llm.ChatImage):
-                            current_content.append(_build_image(item, cache_key))
-    if current_role is not None and current_content:
-        messages.append({"role": current_role, "content": current_content})
-    return messages, system

livekit_plugins_aws-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-livekit/plugins/aws/__init__.py,sha256=Ea-hK7QdutnwdZvvs9K2fiR8RWJqz2JcONxXnV1kXF0,977
-livekit/plugins/aws/_utils.py,sha256=iuDuQpPta4wLtgW1Wc2rHspZWoa7KZI76tujQIPY898,7411
-livekit/plugins/aws/llm.py,sha256=yUAiBCtb2jRB1_S9BNrILTMmDffvKOpDod802kYnPVM,13527
-livekit/plugins/aws/log.py,sha256=jFief0Xhv0n_F6sp6UFu9VKxs2bXNVGAfYGmEYfR_2Q,66
-livekit/plugins/aws/models.py,sha256=wb7AfN-z7qgtKMZnUbQsELi6wN8ha5exI3DH8z6Gz3M,711
-livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/aws/stt.py,sha256=eH7gKtdCjwki20Th6PrCsjjtH-zjXa8ZWu-cu_KaT80,7935
-livekit/plugins/aws/tts.py,sha256=miUYrhstJ7tcLkvJ-8Cpv1UCQxRSdOqaSC2tvHBh9WI,7800
-livekit/plugins/aws/version.py,sha256=vQH9cItKAVYAmrLbOntkbLqmxrUZrPiKb1TjkZ8jRKQ,600
-livekit_plugins_aws-0.1.0.dist-info/METADATA,sha256=FUzLRO0YcUvcIidEEq_EK7Lbp6yPYKjzT_BkclYNGhM,1686
-livekit_plugins_aws-0.1.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-livekit_plugins_aws-0.1.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
-livekit_plugins_aws-0.1.0.dist-info/RECORD,,

livekit_plugins_aws-0.1.0.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- livekit

livekit-plugins-aws 0.1.0__py3-none-any.whl → 1.0.0.dev4__py3-none-any.whl

Potentially problematic release.

livekit-plugins-aws 0.1.0py3-none-any.whl → 1.0.0.dev4py3-none-any.whl