PyPI - livekit-plugins-aws - Versions diffs - 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl - Mend

livekit-plugins-aws 1.1.3py3-none-any.whl → 1.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-aws might be problematic. Click here for more details.

Files changed (13) hide show

livekit/plugins/aws/__init__.py +32 -8
livekit/plugins/aws/experimental/realtime/__init__.py +15 -0
livekit/plugins/aws/experimental/realtime/events.py +521 -0
livekit/plugins/aws/experimental/realtime/pretty_printer.py +49 -0
livekit/plugins/aws/experimental/realtime/realtime_model.py +1208 -0
livekit/plugins/aws/experimental/realtime/turn_tracker.py +172 -0
livekit/plugins/aws/log.py +4 -0
livekit/plugins/aws/tts.py +0 -2
livekit/plugins/aws/version.py +1 -1
{livekit_plugins_aws-1.1.3.dist-info → livekit_plugins_aws-1.1.5.dist-info}/METADATA +11 -5
livekit_plugins_aws-1.1.5.dist-info/RECORD +17 -0
livekit_plugins_aws-1.1.3.dist-info/RECORD +0 -12
{livekit_plugins_aws-1.1.3.dist-info → livekit_plugins_aws-1.1.5.dist-info}/WHEEL +0 -0

livekit/plugins/aws/experimental/realtime/turn_tracker.py ADDED Viewed

@@ -0,0 +1,172 @@
+from __future__ import annotations
+import datetime
+import enum
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, Callable
+from livekit.agents import llm, utils
+from ...log import logger
+class _Phase(enum.Enum):
+    IDLE = 0  # waiting for the USER to begin speaking
+    USER_SPEAKING = 1  # still receiving USER text+audio blocks
+    USER_FINISHED = 2  # first ASSISTANT speculative block observed
+    ASSISTANT_RESPONDING = 3  # ASSISTANT audio/text streaming
+    DONE = 4  # assistant audio ended (END_TURN) or barge-in (INTERRUPTED)
+# note: b/c user ASR text is transcribed server-side, a single turn constitutes
+# both the user and agent's speech
+@dataclass
+class _Turn:
+    turn_id: int
+    input_id: str = field(default_factory=lambda: str(uuid.uuid4()))
+    created: datetime.datetime = field(default_factory=datetime.datetime.utcnow)
+    transcript: list[str] = field(default_factory=list)
+    phase: _Phase = _Phase.IDLE
+    ev_input_started: bool = False
+    ev_input_stopped: bool = False
+    ev_trans_completed: bool = False
+    ev_generation_sent: bool = False
+    def add_partial_text(self, text: str):
+        self.transcript.append(text)
+    @property
+    def curr_transcript(self) -> str:
+        return " ".join(self.transcript)
+class _TurnTracker:
+    def __init__(
+        self,
+        emit_fn: Callable[[str, Any], None],
+        streams_provider: Callable[
+            [], tuple[utils.aio.Chan[llm.MessageGeneration], utils.aio.Chan[llm.FunctionCall]]
+        ],
+    ):
+        self._emit = emit_fn
+        self._turn_idx = 0
+        self._curr_turn: _Turn | None = None
+        self._get_streams = streams_provider
+    # --------------------------------------------------------
+    #  PUBLIC ENTRY POINT
+    # --------------------------------------------------------
+    def feed(self, event: dict):
+        turn = self._ensure_turn()
+        kind = _classify(event)
+        if kind == "USER_TEXT_PARTIAL":
+            turn.add_partial_text(event["event"]["textOutput"]["content"])
+            self._maybe_emit_input_started(turn)
+            self._emit_transcript_updated(turn)
+            # note: cannot invoke self._maybe_input_stopped() here
+            # b/c there is no way to know if the user is done speaking
+        # will always be correlated b/c generate_reply() is a stub
+        # user ASR text ends when agent's ASR speculative text begins
+        # corresponds to beginning of agent's turn
+        elif kind == "TOOL_OUTPUT_CONTENT_START" or kind == "ASSISTANT_SPEC_START":
+            # must be a maybe methods b/c agent can chain multiple tool calls
+            self._maybe_emit_input_stopped(turn)
+            self._maybe_emit_transcript_completed(turn)
+            self._maybe_emit_generation_created(turn)
+        elif kind == "BARGE_IN":
+            logger.debug(f"BARGE-IN DETECTED IN TURN TRACKER: {turn}")
+            # start new turn immediately to make interruptions snappier
+            self._emit("input_speech_started", llm.InputSpeechStartedEvent())
+            turn.phase = _Phase.DONE
+        elif kind == "ASSISTANT_AUDIO_END":
+            if event["event"]["contentEnd"]["stopReason"] == "END_TURN":
+                turn.phase = _Phase.DONE
+        if turn.phase is _Phase.DONE:
+            self._curr_turn = None
+    def _ensure_turn(self) -> _Turn:
+        if self._curr_turn is None:
+            self._turn_idx += 1
+            self._curr_turn = _Turn(turn_id=self._turn_idx)
+        return self._curr_turn
+    def _maybe_emit_input_started(self, turn: _Turn):
+        if not turn.ev_input_started:
+            turn.ev_input_started = True
+            self._emit("input_speech_started", llm.InputSpeechStartedEvent())
+            turn.phase = _Phase.USER_SPEAKING
+    def _maybe_emit_input_stopped(self, turn: _Turn):
+        if not turn.ev_input_stopped:
+            turn.ev_input_stopped = True
+            self._emit(
+                "input_speech_stopped", llm.InputSpeechStoppedEvent(user_transcription_enabled=True)
+            )
+            turn.phase = _Phase.USER_FINISHED
+    def _emit_transcript_updated(self, turn: _Turn):
+        self._emit(
+            "input_audio_transcription_completed",
+            llm.InputTranscriptionCompleted(
+                item_id=turn.input_id,
+                transcript=turn.curr_transcript,
+                is_final=False,
+            ),
+        )
+    def _maybe_emit_transcript_completed(self, turn: _Turn):
+        if not turn.ev_trans_completed:
+            turn.ev_trans_completed = True
+            self._emit(
+                "input_audio_transcription_completed",
+                # Q: does input_id need to match /w the _ResponseGeneration.input_id?
+                llm.InputTranscriptionCompleted(
+                    item_id=turn.input_id,
+                    transcript=turn.curr_transcript,
+                    is_final=True,
+                ),
+            )
+    def _maybe_emit_generation_created(self, turn: _Turn):
+        if not turn.ev_generation_sent:
+            turn.ev_generation_sent = True
+            msg_stream, fn_stream = self._get_streams()
+            logger.debug("Emitting generation event")
+            generation_ev = llm.GenerationCreatedEvent(
+                message_stream=msg_stream,
+                function_stream=fn_stream,
+                user_initiated=False,
+            )
+            self._emit("generation_created", generation_ev)
+            turn.phase = _Phase.ASSISTANT_RESPONDING
+def _classify(ev: dict) -> str:
+    e = ev.get("event", {})
+    if "textOutput" in e and e["textOutput"]["role"] == "USER":
+        return "USER_TEXT_PARTIAL"
+    if "contentStart" in e and e["contentStart"]["type"] == "TOOL":
+        return "TOOL_OUTPUT_CONTENT_START"
+    if "contentStart" in e and e["contentStart"]["role"] == "ASSISTANT":
+        add = e["contentStart"].get("additionalModelFields", "")
+        if "SPECULATIVE" in add:
+            return "ASSISTANT_SPEC_START"
+    if "textOutput" in e and e["textOutput"]["content"] == '{ "interrupted" : true }':
+        return "BARGE_IN"
+    # note: there cannot be any audio events for the user in the output event loop
+    # therefore, we know that the audio event must be for the assistant
+    if "contentEnd" in e and e["contentEnd"]["type"] == "AUDIO":
+        return "ASSISTANT_AUDIO_END"
+    return ""

livekit/plugins/aws/log.py CHANGED Viewed

@@ -1,3 +1,7 @@
 import logging
 logger = logging.getLogger("livekit.plugins.aws")
+smithy_logger = logging.getLogger("smithy_aws_event_stream.aio")
+smithy_logger.setLevel(logging.INFO)
+bedrock_client_logger = logging.getLogger("aws_sdk_bedrock_runtime.client")
+bedrock_client_logger.setLevel(logging.INFO)

livekit/plugins/aws/tts.py CHANGED Viewed

@@ -148,8 +148,6 @@ class ChunkedStream(tts.ChunkedStream):
                     async with response["AudioStream"] as resp:
                         async for data, _ in resp.content.iter_chunks():
                             output_emitter.push(data)
-                    output_emitter.flush()
         except botocore.exceptions.ConnectTimeoutError:
             raise APITimeoutError() from None
         except Exception as e:

livekit/plugins/aws/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.1.3"
+__version__ = "1.1.5"

{livekit_plugins_aws-1.1.3.dist-info → livekit_plugins_aws-1.1.5.dist-info}/METADATA RENAMED Viewed

@@ -1,13 +1,13 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-aws
-Version: 1.1.3
+Version: 1.1.5
 Summary: LiveKit Agents Plugin for services from AWS
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
 Project-URL: Source, https://github.com/livekit/agents
 Author-email: LiveKit <hello@livekit.io>
 License-Expression: Apache-2.0
-Keywords: audio,aws,livekit,realtime,video,webrtc
+Keywords: audio,aws,livekit,nova,realtime,sonic,video,webrtc
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Programming Language :: Python :: 3
@@ -20,12 +20,15 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
 Requires-Dist: aioboto3>=14.1.0
 Requires-Dist: amazon-transcribe>=0.6.2
-Requires-Dist: livekit-agents>=1.1.3
+Requires-Dist: livekit-agents>=1.1.5
+Provides-Extra: realtime
+Requires-Dist: aws-sdk-bedrock-runtime==0.0.2; (python_version >= '3.12') and extra == 'realtime'
+Requires-Dist: boto3>1.35.10; extra == 'realtime'
 Description-Content-Type: text/markdown
 # AWS plugin for LiveKit Agents
-Support for AWS AI including Bedrock, Polly, and Transcribe.
+Support for AWS AI including Bedrock, Polly, Transcribe and optionally Nova Sonic (realtime STS model).
 See [https://docs.livekit.io/agents/integrations/aws/](https://docs.livekit.io/agents/integrations/aws/) for more information.
@@ -33,8 +36,11 @@ See [https://docs.livekit.io/agents/integrations/aws/](https://docs.livekit.io/a
 ```bash
 pip install livekit-plugins-aws
+# for access to Nova Sonic
+pip install livekit-plugins-aws[realtime]
 ```
 ## Pre-requisites
-You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
+You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.

livekit_plugins_aws-1.1.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+livekit/plugins/aws/__init__.py,sha256=dCZISj1yZG0WZTojk3sU-Ub4PK1ThCVhamrl9k_NbBw,2047
+livekit/plugins/aws/llm.py,sha256=pSbO7SaqYZYJ-3JGOmyMTmTLLcjoyrZy-j5BSkFOHU8,11922
+livekit/plugins/aws/log.py,sha256=S5ICcsnwshZhMG0HPmc_lI3mtHmcY4oQMJBsnnho-bM,289
+livekit/plugins/aws/models.py,sha256=ztI9n3r8OHGlQewWXiyQP3SaavFKHkw-VVuK9Xz4K1I,696
+livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/aws/stt.py,sha256=PSR89aN28wm4i83yEdhkDJ9xzM0CsNIKrc3v3EbPndQ,9018
+livekit/plugins/aws/tts.py,sha256=T5dVpTuIuzQimYNnkfXi5dRLmRldWySL4IcbkXjmJLM,6083
+livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
+livekit/plugins/aws/version.py,sha256=OKtayGMVDYKyoKBO2yNM4kfRbH-PODJqECIiYhUzNWg,600
+livekit/plugins/aws/experimental/realtime/__init__.py,sha256=mm_TGZc9QAWSO-VOO3PdE8Y5R6xlWckXRZuiFUIHa-Q,287
+livekit/plugins/aws/experimental/realtime/events.py,sha256=ViWr4_RLY0VDGTF-dDL0b_-7GFlF08Lw5_x6q3EJ5eM,15917
+livekit/plugins/aws/experimental/realtime/pretty_printer.py,sha256=KN7KPrfQu8cU7ff34vFAtfrd1umUSTVNKXQU7D8AMiM,1442
+livekit/plugins/aws/experimental/realtime/realtime_model.py,sha256=1FyGB7VkUHWHsgkzNEELc0-qOz3tbDEuT2PWlqI-2GU,55978
+livekit/plugins/aws/experimental/realtime/turn_tracker.py,sha256=ER1Inu9D7X4EZ_wqpKeidrx52JXfnmnQHmxOielbjvc,6363
+livekit_plugins_aws-1.1.5.dist-info/METADATA,sha256=EU-x14QER4ma3vx2b9vALcZMYYc1fv1f9fB5lX01E7Y,1827
+livekit_plugins_aws-1.1.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_aws-1.1.5.dist-info/RECORD,,

livekit_plugins_aws-1.1.3.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-livekit/plugins/aws/__init__.py,sha256=fkbgTfNZc6z4VxbYGNdY73EoGvNuRcJiuD-OlUHvjHU,1322
-livekit/plugins/aws/llm.py,sha256=pSbO7SaqYZYJ-3JGOmyMTmTLLcjoyrZy-j5BSkFOHU8,11922
-livekit/plugins/aws/log.py,sha256=jFief0Xhv0n_F6sp6UFu9VKxs2bXNVGAfYGmEYfR_2Q,66
-livekit/plugins/aws/models.py,sha256=ztI9n3r8OHGlQewWXiyQP3SaavFKHkw-VVuK9Xz4K1I,696
-livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-livekit/plugins/aws/stt.py,sha256=PSR89aN28wm4i83yEdhkDJ9xzM0CsNIKrc3v3EbPndQ,9018
-livekit/plugins/aws/tts.py,sha256=vYWPOw0QLIAJR-2lNVIV92o3cafNYCFdVBxE3z2L8E8,6127
-livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
-livekit/plugins/aws/version.py,sha256=CX0B8KLm54mWslofdGA4Ue0sqe3NNbkDowPcEG7tAXA,600
-livekit_plugins_aws-1.1.3.dist-info/METADATA,sha256=3_MbOYnrp3EAZ0G-yYlqBY6B1LqRUplqcTPbOKqJBuE,1529
-livekit_plugins_aws-1.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_aws-1.1.3.dist-info/RECORD,,

{livekit_plugins_aws-1.1.3.dist-info → livekit_plugins_aws-1.1.5.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-aws 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl

Potentially problematic release.

livekit-plugins-aws 1.1.3py3-none-any.whl → 1.1.5py3-none-any.whl