PyPI - livekit-plugins-aws - Versions diffs - 1.1.5__tar.gz → 1.1.7__tar.gz - Mend

livekit-plugins-aws 1.1.5tar.gz → 1.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-aws might be problematic. Click here for more details.

Files changed (18) hide show

{livekit_plugins_aws-1.1.5 → livekit_plugins_aws-1.1.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-aws
-Version: 1.1.5
+Version: 1.1.7
 Summary: LiveKit Agents Plugin for services from AWS
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -20,7 +20,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
 Requires-Dist: aioboto3>=14.1.0
 Requires-Dist: amazon-transcribe>=0.6.2
-Requires-Dist: livekit-agents>=1.1.5
+Requires-Dist: livekit-agents>=1.1.7
 Provides-Extra: realtime
 Requires-Dist: aws-sdk-bedrock-runtime==0.0.2; (python_version >= '3.12') and extra == 'realtime'
 Requires-Dist: boto3>1.35.10; extra == 'realtime'
@@ -44,3 +44,7 @@ pip install livekit-plugins-aws[realtime]
 ## Pre-requisites
 You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
+## Example
+For an example of the realtime STS model, Nova Sonic, see: https://github.com/livekit/agents/blob/main/examples/voice_agents/realtime_joke_teller.py

{livekit_plugins_aws-1.1.5 → livekit_plugins_aws-1.1.7}/README.md RENAMED Viewed

@@ -16,3 +16,7 @@ pip install livekit-plugins-aws[realtime]
 ## Pre-requisites
 You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
+## Example
+For an example of the realtime STS model, Nova Sonic, see: https://github.com/livekit/agents/blob/main/examples/voice_agents/realtime_joke_teller.py

{livekit_plugins_aws-1.1.5 → livekit_plugins_aws-1.1.7}/livekit/plugins/aws/experimental/realtime/events.py RENAMED Viewed

@@ -1,8 +1,8 @@
 import json
 import uuid
-from typing import Any, Literal, Optional, Union
+from typing import Any, Literal, Optional, Union, cast
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel as _BaseModel, ConfigDict, Field
 from livekit.agents import llm
@@ -20,7 +20,7 @@ SAMPLE_SIZE_BITS = Literal[16]  # only supports 16-bit audio
 CHANNEL_COUNT = Literal[1]  # only supports monochannel audio
-class BaseModel(BaseModel):
+class BaseModel(_BaseModel):
     model_config = ConfigDict(populate_by_name=True, extra="forbid")
@@ -91,7 +91,7 @@ class Tool(BaseModel):
 class ToolConfiguration(BaseModel):
-    toolChoice: dict[str, dict[str, str]] | None = None
+    toolChoice: Optional[dict[str, dict[str, str]]] = None
     tools: list[Tool]
@@ -260,6 +260,8 @@ class SonicEventBuilder:
             else:
                 return "other_event"
+        raise ValueError(f"Unknown event type: {json_data}")
     def create_text_content_block(
         self,
         content_name: str,
@@ -313,10 +315,18 @@ class SonicEventBuilder:
         if chat_ctx.items:
             logger.debug("initiating session with chat context")
             for item in chat_ctx.items:
+                if item.type != "message":
+                    continue
+                if (role := item.role.upper()) not in ["USER", "ASSISTANT", "SYSTEM"]:
+                    continue
                 ctx_content_name = str(uuid.uuid4())
                 init_events.extend(
                     self.create_text_content_block(
-                        ctx_content_name, item.role.upper(), "".join(item.content)
+                        ctx_content_name,
+                        cast(ROLE, role),
+                        "".join(c for c in item.content if isinstance(c, str)),
                     )
                 )
@@ -481,26 +491,15 @@ class SonicEventBuilder:
         sample_rate: SAMPLE_RATE_HERTZ,
         tool_configuration: Optional[Union[ToolConfiguration, dict[str, Any], str]] = None,
     ) -> str:
-        tool_configuration = tool_configuration or ToolConfiguration(tools=[])
-        for tool in tool_configuration.tools:
-            logger.debug(f"TOOL JSON SCHEMA: {tool.toolSpec.inputSchema}")
-        tool_objects = [
-            Tool(
-                toolSpec=ToolSpec(
-                    name=tool.toolSpec.name,
-                    description=tool.toolSpec.description,
-                    inputSchema=ToolInputSchema(json_=tool.toolSpec.inputSchema.json_),
-                )
-            )
-            for tool in tool_configuration.tools
-        ]
         if tool_configuration is None:
             tool_configuration = ToolConfiguration(tools=[])
         elif isinstance(tool_configuration, str):
-            tool_configuration = ToolConfiguration(**json.loads(tool_configuration))
+            tool_configuration = ToolConfiguration.model_validate_json(tool_configuration)
         elif isinstance(tool_configuration, dict):
-            tool_configuration = ToolConfiguration(**tool_configuration)
+            tool_configuration = ToolConfiguration.model_validate(tool_configuration)
+        for tool in tool_configuration.tools:
+            logger.debug(f"TOOL JSON SCHEMA: {tool.toolSpec.inputSchema}")
         tool_objects = list(tool_configuration.tools)
         event = Event(

{livekit_plugins_aws-1.1.5 → livekit_plugins_aws-1.1.7}/livekit/plugins/aws/experimental/realtime/realtime_model.py RENAMED Viewed

@@ -1,5 +1,8 @@
+# mypy: disable-error-code=unused-ignore
 from __future__ import annotations
+import ast
 import asyncio
 import base64
 import json
@@ -10,7 +13,7 @@ import weakref
 from collections.abc import Iterator
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, Literal
+from typing import Any, Callable, Literal, cast
 import boto3
 from aws_sdk_bedrock_runtime.client import (
@@ -33,11 +36,9 @@ from smithy_core.aio.interfaces.identity import IdentityResolver
 from livekit import rtc
 from livekit.agents import (
     APIStatusError,
-    ToolError,
     llm,
     utils,
 )
-from livekit.agents.llm.realtime import RealtimeSession
 from livekit.agents.metrics import RealtimeModelMetrics
 from livekit.agents.types import NOT_GIVEN, NotGivenOr
 from livekit.agents.utils import is_given
@@ -150,12 +151,12 @@ class _ResponseGeneration:
     speculative_messages: dict[str, str] = field(default_factory=dict)
     tool_messages: dict[str, str] = field(default_factory=dict)
     output_text: str = ""  # agent ASR text
-    _created_timestamp: str = field(default_factory=datetime.now().isoformat())
+    _created_timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
     _first_token_timestamp: float | None = None
     _completed_timestamp: float | None = None
-class Boto3CredentialsResolver(IdentityResolver):
+class Boto3CredentialsResolver(IdentityResolver):  # type: ignore[misc]
     """IdentityResolver implementation that sources AWS credentials from boto3.
     The resolver delegates to the default boto3.Session() credential chain which
@@ -164,10 +165,10 @@ class Boto3CredentialsResolver(IdentityResolver):
     passed into Bedrock runtime clients.
     """
-    def __init__(self):
-        self.session = boto3.Session()
+    def __init__(self) -> None:
+        self.session = boto3.Session()  # type: ignore[attr-defined]
-    async def get_identity(self, **kwargs):
+    async def get_identity(self, **kwargs: Any) -> AWSCredentialsIdentity:
         """Asynchronously resolve AWS credentials.
         This method is invoked by the Bedrock runtime client whenever a new request needs to be
@@ -247,7 +248,7 @@ class RealtimeModel(llm.RealtimeModel):
         self.temperature = temperature
         self.top_p = top_p
         self._opts = _RealtimeOptions(
-            voice=voice if is_given(voice) else "tiffany",
+            voice=cast(VOICE_ID, voice) if is_given(voice) else "tiffany",
             temperature=temperature if is_given(temperature) else DEFAULT_TEMPERATURE,
             top_p=top_p if is_given(top_p) else DEFAULT_TOP_P,
             max_tokens=max_tokens if is_given(max_tokens) else DEFAULT_MAX_TOKENS,
@@ -295,7 +296,7 @@ class RealtimeSession(  # noqa: F811
                 inference options and the Smithy Bedrock client configuration.
         """
         super().__init__(realtime_model)
-        self._realtime_model = realtime_model
+        self._realtime_model: RealtimeModel = realtime_model
         self._event_builder = seb(
             prompt_name=str(uuid.uuid4()),
             audio_content_name=str(uuid.uuid4()),
@@ -309,10 +310,10 @@ class RealtimeSession(  # noqa: F811
         self._audio_input_task = None
         self._stream_response = None
         self._bedrock_client = None
+        self._pending_tools: set[str] = set()
         self._is_sess_active = asyncio.Event()
         self._chat_ctx = llm.ChatContext.empty()
         self._tools = llm.ToolContext.empty()
-        self._tool_type_map = {}
         self._tool_results_ch = utils.aio.Chan[dict[str, str]]()
         self._tools_ready = asyncio.get_running_loop().create_future()
         self._instructions_ready = asyncio.get_running_loop().create_future()
@@ -341,16 +342,12 @@ class RealtimeSession(  # noqa: F811
             "other_event": self._handle_other_event,
         }
         self._turn_tracker = _TurnTracker(
-            self.emit, streams_provider=self._current_generation_streams
+            cast(Callable[[str, Any], None], self.emit),
+            cast(Callable[[], None], self.emit_generation_event),
         )
-    def _current_generation_streams(
-        self,
-    ) -> tuple[utils.aio.Chan[llm.MessageGeneration], utils.aio.Chan[llm.FunctionCall]]:
-        return (self._current_generation.message_ch, self._current_generation.function_ch)
     @utils.log_exceptions(logger=logger)
-    def _initialize_client(self):
+    def _initialize_client(self) -> None:
         """Instantiate the Bedrock runtime client"""
         config = Config(
             endpoint_uri=f"https://bedrock-runtime.{self._realtime_model._opts.region}.amazonaws.com",
@@ -358,15 +355,16 @@ class RealtimeSession(  # noqa: F811
             aws_credentials_identity_resolver=Boto3CredentialsResolver(),
             http_auth_scheme_resolver=HTTPAuthSchemeResolver(),
             http_auth_schemes={"aws.auth#sigv4": SigV4AuthScheme()},
+            user_agent_extra="x-client-framework:livekit-plugins-aws[realtime]",
         )
         self._bedrock_client = BedrockRuntimeClient(config=config)
     @utils.log_exceptions(logger=logger)
-    async def _send_raw_event(self, event_json):
+    async def _send_raw_event(self, event_json: str) -> None:
         """Low-level helper that serialises event_json and forwards it to the bidirectional stream.
         Args:
-            event_json (dict | str): The JSON payload (already in Bedrock wire format) to queue.
+            event_json (str): The JSON payload (already in Bedrock wire format) to queue.
         Raises:
             Exception: Propagates any failures returned by the Bedrock runtime client.
@@ -425,21 +423,21 @@ class RealtimeSession(  # noqa: F811
                     input_schema = llm.utils.build_legacy_openai_schema(f, internally_tagged=True)[
                         "parameters"
                     ]
-                    self._tool_type_map[name] = "FunctionTool"
-                else:
+                elif llm.tool_context.is_raw_function_tool(f):
                     description = llm.tool_context.get_raw_function_info(f).raw_schema.get(
                         "description"
                     )
                     input_schema = llm.tool_context.get_raw_function_info(f).raw_schema[
                         "parameters"
                     ]
-                    self._tool_type_map[name] = "RawFunctionTool"
+                else:
+                    continue
                 tool = Tool(
                     toolSpec=ToolSpec(
                         name=name,
-                        description=description,
-                        inputSchema=ToolInputSchema(json_=json.dumps(input_schema)),
+                        description=description or "No description provided",
+                        inputSchema=ToolInputSchema(json_=json.dumps(input_schema)),  # type: ignore
                     )
                 )
                 tools.append(tool)
@@ -455,7 +453,7 @@ class RealtimeSession(  # noqa: F811
         return tool_cfg
     @utils.log_exceptions(logger=logger)
-    async def initialize_streams(self, is_restart: bool = False):
+    async def initialize_streams(self, is_restart: bool = False) -> None:
         """Open the Bedrock bidirectional stream and spawn background worker tasks.
         This coroutine is idempotent and can be invoked again when recoverable
@@ -469,6 +467,7 @@ class RealtimeSession(  # noqa: F811
             if not self._bedrock_client:
                 logger.info("Creating Bedrock client")
                 self._initialize_client()
+            assert self._bedrock_client is not None, "bedrock_client is None"
             logger.info("Initializing Bedrock stream")
             self._stream_response = (
@@ -518,7 +517,7 @@ class RealtimeSession(  # noqa: F811
                 self._chat_ctx.truncate(max_items=MAX_MESSAGES)
             init_events = self._event_builder.create_prompt_start_block(
                 voice_id=self._realtime_model._opts.voice,
-                sample_rate=DEFAULT_OUTPUT_SAMPLE_RATE,
+                sample_rate=DEFAULT_OUTPUT_SAMPLE_RATE,  # type: ignore
                 system_content=self._instructions,
                 chat_ctx=self.chat_ctx,
                 tool_configuration=self._serialize_tool_config(),
@@ -542,15 +541,16 @@ class RealtimeSession(  # noqa: F811
             self._is_sess_active.set()
             logger.debug("Stream initialized successfully")
         except Exception as e:
-            self._is_sess_active.set_exception(e)
             logger.debug(f"Failed to initialize stream: {str(e)}")
             raise
         return self
     @utils.log_exceptions(logger=logger)
-    def _emit_generation_event(self) -> None:
+    def emit_generation_event(self) -> None:
         """Publish a llm.GenerationCreatedEvent to external subscribers."""
         logger.debug("Emitting generation event")
+        assert self._current_generation is not None, "current_generation is None"
         generation_ev = llm.GenerationCreatedEvent(
             message_stream=self._current_generation.message_ch,
             function_stream=self._current_generation.function_ch,
@@ -605,10 +605,12 @@ class RealtimeSession(  # noqa: F811
         """Handle text_output_content_start for both user and assistant roles."""
         log_event_data(event_data)
         role = event_data["event"]["contentStart"]["role"]
+        self._create_response_generation()
         # note: does not work if you emit llm.GCE too early (for some reason)
         if role == "USER":
-            self._create_response_generation()
+            assert self._current_generation is not None, "current_generation is None"
             content_id = event_data["event"]["contentStart"]["contentId"]
             self._current_generation.user_messages[content_id] = self._current_generation.input_id
@@ -616,6 +618,8 @@ class RealtimeSession(  # noqa: F811
             role == "ASSISTANT"
             and "SPECULATIVE" in event_data["event"]["contentStart"]["additionalModelFields"]
         ):
+            assert self._current_generation is not None, "current_generation is None"
             text_content_id = event_data["event"]["contentStart"]["contentId"]
             self._current_generation.speculative_messages[text_content_id] = (
                 self._current_generation.response_id
@@ -633,10 +637,15 @@ class RealtimeSession(  # noqa: F811
             # this is b/c audio playback is desynced from text transcription
             # TODO: fix this; possibly via a playback timer
             idx = self._chat_ctx.find_insertion_index(created_at=time.time()) - 1
+            if idx < 0:
+                logger.warning("Barge-in DETECTED but no previous message found")
+                return
             logger.debug(
                 f"BARGE-IN DETECTED using idx: {idx} and chat_msg: {self._chat_ctx.items[idx]}"
             )
-            self._chat_ctx.items[idx].interrupted = True
+            if (item := self._chat_ctx.items[idx]).type == "message":
+                item.interrupted = True
             self._close_current_generation()
             return
@@ -661,27 +670,31 @@ class RealtimeSession(  # noqa: F811
                 # note: this update is per utterance, not per turn
                 self._update_chat_ctx(role="assistant", text_content=text_content)
-    def _update_chat_ctx(self, role: str, text_content: str) -> None:
+    def _update_chat_ctx(self, role: llm.ChatRole, text_content: str) -> None:
         """
         Update the chat context with the latest ASR text while guarding against model limitations:
             a) 40 total messages limit
             b) 1kB message size limit
         """
-        prev_utterance = self._chat_ctx.items[-1]
-        if prev_utterance.role == role:
-            if (
-                len(prev_utterance.content[0].encode("utf-8")) + len(text_content.encode("utf-8"))
-                < MAX_MESSAGE_SIZE
-            ):
-                prev_utterance.content[0] = "\n".join([prev_utterance.content[0], text_content])
+        logger.debug(f"Updating chat context with role: {role} and text_content: {text_content}")
+        if len(self._chat_ctx.items) == 0:
+            self._chat_ctx.add_message(role=role, content=text_content)
+        else:
+            prev_utterance = self._chat_ctx.items[-1]
+            if prev_utterance.type == "message" and prev_utterance.role == role:
+                if isinstance(prev_content := prev_utterance.content[0], str) and (
+                    len(prev_content.encode("utf-8")) + len(text_content.encode("utf-8"))
+                    < MAX_MESSAGE_SIZE
+                ):
+                    prev_utterance.content[0] = "\n".join([prev_content, text_content])
+                else:
+                    self._chat_ctx.add_message(role=role, content=text_content)
+                    if len(self._chat_ctx.items) > MAX_MESSAGES:
+                        self._chat_ctx.truncate(max_items=MAX_MESSAGES)
             else:
                 self._chat_ctx.add_message(role=role, content=text_content)
                 if len(self._chat_ctx.items) > MAX_MESSAGES:
                     self._chat_ctx.truncate(max_items=MAX_MESSAGES)
-        else:
-            self._chat_ctx.add_message(role=role, content=text_content)
-            if len(self._chat_ctx.items) > MAX_MESSAGES:
-                self._chat_ctx.truncate(max_items=MAX_MESSAGES)
     # cannot rely on this event for user b/c stopReason=PARTIAL_TURN always for user
     async def _handle_text_output_content_end_event(self, event_data: dict) -> None:
@@ -701,6 +714,8 @@ class RealtimeSession(  # noqa: F811
     async def _handle_tool_output_content_start_event(self, event_data: dict) -> None:
         """Track mapping content_id -> response_id for upcoming tool use."""
         log_event_data(event_data)
+        assert self._current_generation is not None, "current_generation is None"
         tool_use_content_id = event_data["event"]["contentStart"]["contentId"]
         self._current_generation.tool_messages[tool_use_content_id] = (
             self._current_generation.response_id
@@ -710,6 +725,8 @@ class RealtimeSession(  # noqa: F811
     async def _handle_tool_output_content_event(self, event_data: dict) -> None:
         """Execute the referenced tool locally and forward results back to Bedrock."""
         log_event_data(event_data)
+        assert self._current_generation is not None, "current_generation is None"
         tool_use_content_id = event_data["event"]["toolUse"]["contentId"]
         tool_use_id = event_data["event"]["toolUse"]["toolUseId"]
         tool_name = event_data["event"]["toolUse"]["toolName"]
@@ -719,34 +736,38 @@ class RealtimeSession(  # noqa: F811
         ):
             args = event_data["event"]["toolUse"]["content"]
             self._current_generation.function_ch.send_nowait(
-                llm.FunctionCall(
-                    call_id=tool_use_id,
-                    name=tool_name,
-                    arguments=args,
-                )
+                llm.FunctionCall(call_id=tool_use_id, name=tool_name, arguments=args)
             )
-            # note: may need to inject RunContext here...
-            tool_type = self._tool_type_map[tool_name]
-            if tool_type == "FunctionTool":
-                tool_result = await self.tools.function_tools[tool_name](**json.loads(args))
-            elif tool_type == "RawFunctionTool":
-                tool_result = await self.tools.function_tools[tool_name](json.loads(args))
-            else:
-                raise ValueError(f"Unknown tool type: {tool_type}")
-            logger.debug(f"TOOL ARGS: {args}\nTOOL RESULT: {tool_result}")
-            # Sonic only accepts Structured Output for tool results
-            # therefore, must JSON stringify ToolError
-            if isinstance(tool_result, ToolError):
-                logger.warning(f"TOOL ERROR: {tool_name} {tool_result.message}")
-                tool_result = {"error": tool_result.message}
-            self._tool_results_ch.send_nowait(
-                {
-                    "tool_use_id": tool_use_id,
-                    "tool_result": tool_result,
-                }
+            self._pending_tools.add(tool_use_id)
+            # performing these acrobatics in order to release the deadlock
+            # LiveKit will not accept a new generation until the previous one is closed
+            # the issue is that audio data cannot be generated until toolResult is received
+            # however, toolResults only arrive after update_chat_ctx() is invoked
+            # which will only occur after agent speech has completed
+            # therefore we introduce an artificial turn to trigger update_chat_ctx()
+            # TODO: this is messy-- investigate if there is a better way to handle this
+            curr_gen = self._current_generation.messages[self._current_generation.response_id]
+            curr_gen.audio_ch.close()
+            curr_gen.text_ch.close()
+            self._current_generation.message_ch.close()
+            self._current_generation.message_ch = utils.aio.Chan()
+            self._current_generation.function_ch.close()
+            self._current_generation.function_ch = utils.aio.Chan()
+            msg_gen = _MessageGeneration(
+                message_id=self._current_generation.response_id,
+                text_ch=utils.aio.Chan(),
+                audio_ch=utils.aio.Chan(),
+            )
+            self._current_generation.messages[self._current_generation.response_id] = msg_gen
+            self._current_generation.message_ch.send_nowait(
+                llm.MessageGeneration(
+                    message_id=msg_gen.message_id,
+                    text_stream=msg_gen.text_ch,
+                    audio_stream=msg_gen.audio_ch,
+                )
             )
+            self.emit_generation_event()
     async def _handle_tool_output_content_end_event(self, event_data: dict) -> None:
         log_event_data(event_data)
@@ -804,6 +825,14 @@ class RealtimeSession(  # noqa: F811
                 if not curr_gen.text_ch.closed:
                     curr_gen.text_ch.close()
+            # TODO: seems not needed, tool_messages[id] is a str, function_ch is closed below?
+            # if self._current_generation.response_id in self._current_generation.tool_messages:
+            #     curr_gen = self._current_generation.tool_messages[
+            #         self._current_generation.response_id
+            #     ]
+            #     if not curr_gen.function_ch.closed:
+            #         curr_gen.function_ch.close()
             if not self._current_generation.message_ch.closed:
                 self._current_generation.message_ch.close()
             if not self._current_generation.function_ch.closed:
@@ -855,10 +884,11 @@ class RealtimeSession(  # noqa: F811
         self.emit("metrics_collected", metrics)
     @utils.log_exceptions(logger=logger)
-    async def _process_responses(self):
+    async def _process_responses(self) -> None:
         """Background task that drains Bedrock's output stream and feeds the event handlers."""
         try:
             await self._is_sess_active.wait()
+            assert self._stream_response is not None, "stream_response is None"
             # note: may need another signal here to block input task until bedrock is ready
             # TODO: save this as a field so we're not re-awaiting it every time
@@ -892,7 +922,6 @@ class RealtimeSession(  # noqa: F811
                     else:
                         logger.error(f"Validation error: {ve}")
-                        request_id = ve.split(" ")[0].split("=")[1]
                         self.emit(
                             "error",
                             llm.RealtimeModelError(
@@ -901,7 +930,7 @@ class RealtimeSession(  # noqa: F811
                                 error=APIStatusError(
                                     message=ve.message,
                                     status_code=400,
-                                    request_id=request_id,
+                                    request_id="",
                                     body=ve,
                                     retryable=False,
                                 ),
@@ -940,7 +969,7 @@ class RealtimeSession(  # noqa: F811
                             timestamp=time.monotonic(),
                             label=self._realtime_model._label,
                             error=APIStatusError(
-                                message=e.message,
+                                message=err_msg,
                                 status_code=500,
                                 request_id=request_id,
                                 body=e,
@@ -1014,6 +1043,25 @@ class RealtimeSession(  # noqa: F811
             logger.debug(f"Chat context updated: {self._chat_ctx.items}")
             self._chat_ctx_ready.set_result(True)
+        # for each function tool, send the result to aws
+        for item in chat_ctx.items:
+            if item.type != "function_call_output":
+                continue
+            if item.call_id not in self._pending_tools:
+                continue
+            logger.debug(f"function call output: {item}")
+            self._pending_tools.discard(item.call_id)
+            self._tool_results_ch.send_nowait(
+                {
+                    "tool_use_id": item.call_id,
+                    "tool_result": item.output
+                    if not item.is_error
+                    else f"{{'error': '{item.output}'}}",
+                }
+            )
     async def _send_tool_events(self, tool_use_id: str, tool_result: str) -> None:
         """Send tool_result back to Bedrock, grouped under tool_use_id."""
         tool_content_name = str(uuid.uuid4())
@@ -1026,7 +1074,9 @@ class RealtimeSession(  # noqa: F811
             await self._send_raw_event(event)
             # logger.debug(f"Sent tool event: {event}")
-    def _tool_choice_adapter(self, tool_choice: llm.ToolChoice) -> dict[str, dict[str, str]] | None:
+    def _tool_choice_adapter(
+        self, tool_choice: llm.ToolChoice | None
+    ) -> dict[str, dict[str, str]] | None:
         """Translate the LiveKit ToolChoice enum into Sonic's JSON schema."""
         if tool_choice == "auto":
             return {"auto": {}}
@@ -1079,7 +1129,7 @@ class RealtimeSession(  # noqa: F811
             yield frame
     @utils.log_exceptions(logger=logger)
-    async def _process_audio_input(self):
+    async def _process_audio_input(self) -> None:
         """Background task that feeds audio and tool results into the Bedrock stream."""
         await self._send_raw_event(self._event_builder.create_audio_content_start_event())
         logger.info("Starting audio input processing loop")
@@ -1090,6 +1140,19 @@ class RealtimeSession(  # noqa: F811
                     val = self._tool_results_ch.recv_nowait()
                     tool_result = val["tool_result"]
                     tool_use_id = val["tool_use_id"]
+                    if not isinstance(tool_result, str):
+                        tool_result = json.dumps(tool_result)
+                    else:
+                        try:
+                            json.loads(tool_result)
+                        except json.JSONDecodeError:
+                            try:
+                                tool_result = json.dumps(ast.literal_eval(tool_result))
+                            except Exception:
+                                # return the original value
+                                pass
+                    logger.debug(f"Sending tool result: {tool_result}")
                     await self._send_tool_events(tool_use_id, tool_result)
                 except utils.aio.channel.ChanEmpty:
@@ -1152,6 +1215,11 @@ class RealtimeSession(  # noqa: F811
         instructions: NotGivenOr[str] = NOT_GIVEN,
     ) -> asyncio.Future[llm.GenerationCreatedEvent]:
         logger.warning("unprompted generation is not supported by Nova Sonic's Realtime API")
+        fut = asyncio.Future[llm.GenerationCreatedEvent]()
+        fut.set_exception(
+            llm.RealtimeError("unprompted generation is not supported by Nova Sonic's Realtime API")
+        )
+        return fut
     def commit_audio(self) -> None:
         logger.warning("commit_audio is not supported by Nova Sonic's Realtime API")
@@ -1190,19 +1258,22 @@ class RealtimeSession(  # noqa: F811
         # resulting in an error after cancellation
         # however, it's mostly cosmetic-- the event loop will still exit
         # TODO: fix this nit
+        tasks: list[asyncio.Task[Any]] = []
         if self._response_task:
             try:
                 await asyncio.wait_for(self._response_task, timeout=1.0)
             except asyncio.TimeoutError:
                 logger.warning("shutdown of output event loop timed out-- cancelling")
                 self._response_task.cancel()
+            tasks.append(self._response_task)
         # must cancel the audio input task before closing the input stream
         if self._audio_input_task and not self._audio_input_task.done():
             self._audio_input_task.cancel()
+            tasks.append(self._audio_input_task)
         if self._stream_response and not self._stream_response.input_stream.closed:
             await self._stream_response.input_stream.close()
-        await asyncio.gather(self._response_task, self._audio_input_task, return_exceptions=True)
+        await asyncio.gather(*tasks, return_exceptions=True)
         logger.debug(f"CHAT CONTEXT: {self._chat_ctx.items}")
         logger.info("Session end")

{livekit_plugins_aws-1.1.5 → livekit_plugins_aws-1.1.7}/livekit/plugins/aws/experimental/realtime/turn_tracker.py RENAMED Viewed

@@ -6,7 +6,7 @@ import uuid
 from dataclasses import dataclass, field
 from typing import Any, Callable
-from livekit.agents import llm, utils
+from livekit.agents import llm
 from ...log import logger
@@ -34,7 +34,7 @@ class _Turn:
     ev_trans_completed: bool = False
     ev_generation_sent: bool = False
-    def add_partial_text(self, text: str):
+    def add_partial_text(self, text: str) -> None:
         self.transcript.append(text)
     @property
@@ -46,19 +46,17 @@ class _TurnTracker:
     def __init__(
         self,
         emit_fn: Callable[[str, Any], None],
-        streams_provider: Callable[
-            [], tuple[utils.aio.Chan[llm.MessageGeneration], utils.aio.Chan[llm.FunctionCall]]
-        ],
+        emit_generation_fn: Callable[[], None],
     ):
         self._emit = emit_fn
         self._turn_idx = 0
         self._curr_turn: _Turn | None = None
-        self._get_streams = streams_provider
+        self._emit_generation_fn = emit_generation_fn
     # --------------------------------------------------------
     #  PUBLIC ENTRY POINT
     # --------------------------------------------------------
-    def feed(self, event: dict):
+    def feed(self, event: dict) -> None:
         turn = self._ensure_turn()
         kind = _classify(event)
@@ -97,13 +95,13 @@ class _TurnTracker:
             self._curr_turn = _Turn(turn_id=self._turn_idx)
         return self._curr_turn
-    def _maybe_emit_input_started(self, turn: _Turn):
+    def _maybe_emit_input_started(self, turn: _Turn) -> None:
         if not turn.ev_input_started:
             turn.ev_input_started = True
             self._emit("input_speech_started", llm.InputSpeechStartedEvent())
             turn.phase = _Phase.USER_SPEAKING
-    def _maybe_emit_input_stopped(self, turn: _Turn):
+    def _maybe_emit_input_stopped(self, turn: _Turn) -> None:
         if not turn.ev_input_stopped:
             turn.ev_input_stopped = True
             self._emit(
@@ -111,7 +109,7 @@ class _TurnTracker:
             )
             turn.phase = _Phase.USER_FINISHED
-    def _emit_transcript_updated(self, turn: _Turn):
+    def _emit_transcript_updated(self, turn: _Turn) -> None:
         self._emit(
             "input_audio_transcription_completed",
             llm.InputTranscriptionCompleted(
@@ -121,7 +119,7 @@ class _TurnTracker:
             ),
         )
-    def _maybe_emit_transcript_completed(self, turn: _Turn):
+    def _maybe_emit_transcript_completed(self, turn: _Turn) -> None:
         if not turn.ev_trans_completed:
             turn.ev_trans_completed = True
             self._emit(
@@ -134,17 +132,10 @@ class _TurnTracker:
                 ),
             )
-    def _maybe_emit_generation_created(self, turn: _Turn):
+    def _maybe_emit_generation_created(self, turn: _Turn) -> None:
         if not turn.ev_generation_sent:
             turn.ev_generation_sent = True
-            msg_stream, fn_stream = self._get_streams()
-            logger.debug("Emitting generation event")
-            generation_ev = llm.GenerationCreatedEvent(
-                message_stream=msg_stream,
-                function_stream=fn_stream,
-                user_initiated=False,
-            )
-            self._emit("generation_created", generation_ev)
+            self._emit_generation_fn()
             turn.phase = _Phase.ASSISTANT_RESPONDING

{livekit_plugins_aws-1.1.5 → livekit_plugins_aws-1.1.7}/livekit/plugins/aws/llm.py RENAMED Viewed

@@ -19,6 +19,7 @@ from dataclasses import dataclass
 from typing import Any, cast
 import aioboto3  # type: ignore
+from botocore.config import Config
 from livekit.agents import APIConnectionError, APIStatusError, llm
 from livekit.agents.llm import (
@@ -205,7 +206,8 @@ class LLMStream(llm.LLMStream):
     async def _run(self) -> None:
         retryable = True
         try:
-            async with self._session.client("bedrock-runtime") as client:
+            config = Config(user_agent_extra="x-client-framework:livekit-plugins-aws")
+            async with self._session.client("bedrock-runtime", config=config) as client:
                 response = await client.converse_stream(**self._opts)
                 request_id = response["ResponseMetadata"]["RequestId"]
                 if response["ResponseMetadata"]["HTTPStatusCode"] != 200:

{livekit_plugins_aws-1.1.5 → livekit_plugins_aws-1.1.7}/livekit/plugins/aws/version.py RENAMED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.1.5"
+__version__ = "1.1.7"

{livekit_plugins_aws-1.1.5 → livekit_plugins_aws-1.1.7}/pyproject.toml RENAMED Viewed

@@ -23,7 +23,7 @@ classifiers = [
     "Programming Language :: Python :: 3 :: Only",
 ]
 dependencies = [
-    "livekit-agents>=1.1.5",
+    "livekit-agents>=1.1.7",
     "aioboto3>=14.1.0",
     "amazon-transcribe>=0.6.2",
 ]