livekit-plugins-aws 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-aws might be problematic. Click here for more details.

@@ -0,0 +1,163 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import enum
5
+ import uuid
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, Callable
8
+
9
+ from livekit.agents import llm
10
+
11
+ from ...log import logger
12
+
13
+
14
+ class _Phase(enum.Enum):
15
+ IDLE = 0 # waiting for the USER to begin speaking
16
+ USER_SPEAKING = 1 # still receiving USER text+audio blocks
17
+ USER_FINISHED = 2 # first ASSISTANT speculative block observed
18
+ ASSISTANT_RESPONDING = 3 # ASSISTANT audio/text streaming
19
+ DONE = 4 # assistant audio ended (END_TURN) or barge-in (INTERRUPTED)
20
+
21
+
22
+ # note: b/c user ASR text is transcribed server-side, a single turn constitutes
23
+ # both the user and agent's speech
24
+ @dataclass
25
+ class _Turn:
26
+ turn_id: int
27
+ input_id: str = field(default_factory=lambda: str(uuid.uuid4()))
28
+ created: datetime.datetime = field(default_factory=datetime.datetime.utcnow)
29
+ transcript: list[str] = field(default_factory=list)
30
+
31
+ phase: _Phase = _Phase.IDLE
32
+ ev_input_started: bool = False
33
+ ev_input_stopped: bool = False
34
+ ev_trans_completed: bool = False
35
+ ev_generation_sent: bool = False
36
+
37
+ def add_partial_text(self, text: str) -> None:
38
+ self.transcript.append(text)
39
+
40
+ @property
41
+ def curr_transcript(self) -> str:
42
+ return " ".join(self.transcript)
43
+
44
+
45
+ class _TurnTracker:
46
+ def __init__(
47
+ self,
48
+ emit_fn: Callable[[str, Any], None],
49
+ emit_generation_fn: Callable[[], None],
50
+ ):
51
+ self._emit = emit_fn
52
+ self._turn_idx = 0
53
+ self._curr_turn: _Turn | None = None
54
+ self._emit_generation_fn = emit_generation_fn
55
+
56
+ # --------------------------------------------------------
57
+ # PUBLIC ENTRY POINT
58
+ # --------------------------------------------------------
59
+ def feed(self, event: dict) -> None:
60
+ turn = self._ensure_turn()
61
+ kind = _classify(event)
62
+
63
+ if kind == "USER_TEXT_PARTIAL":
64
+ turn.add_partial_text(event["event"]["textOutput"]["content"])
65
+ self._maybe_emit_input_started(turn)
66
+ self._emit_transcript_updated(turn)
67
+ # note: cannot invoke self._maybe_input_stopped() here
68
+ # b/c there is no way to know if the user is done speaking
69
+
70
+ # will always be correlated b/c generate_reply() is a stub
71
+ # user ASR text ends when agent's ASR speculative text begins
72
+ # corresponds to beginning of agent's turn
73
+ elif kind == "TOOL_OUTPUT_CONTENT_START" or kind == "ASSISTANT_SPEC_START":
74
+ # must be a maybe methods b/c agent can chain multiple tool calls
75
+ self._maybe_emit_input_stopped(turn)
76
+ self._maybe_emit_transcript_completed(turn)
77
+ self._maybe_emit_generation_created(turn)
78
+
79
+ elif kind == "BARGE_IN":
80
+ logger.debug(f"BARGE-IN DETECTED IN TURN TRACKER: {turn}")
81
+ # start new turn immediately to make interruptions snappier
82
+ self._emit("input_speech_started", llm.InputSpeechStartedEvent())
83
+ turn.phase = _Phase.DONE
84
+
85
+ elif kind == "ASSISTANT_AUDIO_END":
86
+ if event["event"]["contentEnd"]["stopReason"] == "END_TURN":
87
+ turn.phase = _Phase.DONE
88
+
89
+ if turn.phase is _Phase.DONE:
90
+ self._curr_turn = None
91
+
92
+ def _ensure_turn(self) -> _Turn:
93
+ if self._curr_turn is None:
94
+ self._turn_idx += 1
95
+ self._curr_turn = _Turn(turn_id=self._turn_idx)
96
+ return self._curr_turn
97
+
98
+ def _maybe_emit_input_started(self, turn: _Turn) -> None:
99
+ if not turn.ev_input_started:
100
+ turn.ev_input_started = True
101
+ self._emit("input_speech_started", llm.InputSpeechStartedEvent())
102
+ turn.phase = _Phase.USER_SPEAKING
103
+
104
+ def _maybe_emit_input_stopped(self, turn: _Turn) -> None:
105
+ if not turn.ev_input_stopped:
106
+ turn.ev_input_stopped = True
107
+ self._emit(
108
+ "input_speech_stopped", llm.InputSpeechStoppedEvent(user_transcription_enabled=True)
109
+ )
110
+ turn.phase = _Phase.USER_FINISHED
111
+
112
+ def _emit_transcript_updated(self, turn: _Turn) -> None:
113
+ self._emit(
114
+ "input_audio_transcription_completed",
115
+ llm.InputTranscriptionCompleted(
116
+ item_id=turn.input_id,
117
+ transcript=turn.curr_transcript,
118
+ is_final=False,
119
+ ),
120
+ )
121
+
122
+ def _maybe_emit_transcript_completed(self, turn: _Turn) -> None:
123
+ if not turn.ev_trans_completed:
124
+ turn.ev_trans_completed = True
125
+ self._emit(
126
+ "input_audio_transcription_completed",
127
+ # Q: does input_id need to match /w the _ResponseGeneration.input_id?
128
+ llm.InputTranscriptionCompleted(
129
+ item_id=turn.input_id,
130
+ transcript=turn.curr_transcript,
131
+ is_final=True,
132
+ ),
133
+ )
134
+
135
+ def _maybe_emit_generation_created(self, turn: _Turn) -> None:
136
+ if not turn.ev_generation_sent:
137
+ turn.ev_generation_sent = True
138
+ self._emit_generation_fn()
139
+ turn.phase = _Phase.ASSISTANT_RESPONDING
140
+
141
+
142
+ def _classify(ev: dict) -> str:
143
+ e = ev.get("event", {})
144
+ if "textOutput" in e and e["textOutput"]["role"] == "USER":
145
+ return "USER_TEXT_PARTIAL"
146
+
147
+ if "contentStart" in e and e["contentStart"]["type"] == "TOOL":
148
+ return "TOOL_OUTPUT_CONTENT_START"
149
+
150
+ if "contentStart" in e and e["contentStart"]["role"] == "ASSISTANT":
151
+ add = e["contentStart"].get("additionalModelFields", "")
152
+ if "SPECULATIVE" in add:
153
+ return "ASSISTANT_SPEC_START"
154
+
155
+ if "textOutput" in e and e["textOutput"]["content"] == '{ "interrupted" : true }':
156
+ return "BARGE_IN"
157
+
158
+ # note: there cannot be any audio events for the user in the output event loop
159
+ # therefore, we know that the audio event must be for the assistant
160
+ if "contentEnd" in e and e["contentEnd"]["type"] == "AUDIO":
161
+ return "ASSISTANT_AUDIO_END"
162
+
163
+ return ""
@@ -1,3 +1,7 @@
1
1
  import logging
2
2
 
3
3
  logger = logging.getLogger("livekit.plugins.aws")
4
+ smithy_logger = logging.getLogger("smithy_aws_event_stream.aio")
5
+ smithy_logger.setLevel(logging.INFO)
6
+ bedrock_client_logger = logging.getLogger("aws_sdk_bedrock_runtime.client")
7
+ bedrock_client_logger.setLevel(logging.INFO)
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.1.4"
15
+ __version__ = "1.1.6"
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-aws
3
- Version: 1.1.4
3
+ Version: 1.1.6
4
4
  Summary: LiveKit Agents Plugin for services from AWS
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
7
7
  Project-URL: Source, https://github.com/livekit/agents
8
8
  Author-email: LiveKit <hello@livekit.io>
9
9
  License-Expression: Apache-2.0
10
- Keywords: audio,aws,livekit,realtime,video,webrtc
10
+ Keywords: audio,aws,livekit,nova,realtime,sonic,video,webrtc
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Programming Language :: Python :: 3
@@ -20,12 +20,15 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
21
  Requires-Dist: aioboto3>=14.1.0
22
22
  Requires-Dist: amazon-transcribe>=0.6.2
23
- Requires-Dist: livekit-agents>=1.1.4
23
+ Requires-Dist: livekit-agents>=1.1.6
24
+ Provides-Extra: realtime
25
+ Requires-Dist: aws-sdk-bedrock-runtime==0.0.2; (python_version >= '3.12') and extra == 'realtime'
26
+ Requires-Dist: boto3>1.35.10; extra == 'realtime'
24
27
  Description-Content-Type: text/markdown
25
28
 
26
29
  # AWS plugin for LiveKit Agents
27
30
 
28
- Support for AWS AI including Bedrock, Polly, and Transcribe.
31
+ Support for AWS AI including Bedrock, Polly, Transcribe and optionally Nova Sonic (realtime STS model).
29
32
 
30
33
  See [https://docs.livekit.io/agents/integrations/aws/](https://docs.livekit.io/agents/integrations/aws/) for more information.
31
34
 
@@ -33,8 +36,15 @@ See [https://docs.livekit.io/agents/integrations/aws/](https://docs.livekit.io/a
33
36
 
34
37
  ```bash
35
38
  pip install livekit-plugins-aws
39
+
40
+ # for access to Nova Sonic
41
+ pip install livekit-plugins-aws[realtime]
36
42
  ```
37
43
 
38
44
  ## Pre-requisites
39
45
 
40
- You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
46
+ You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
47
+
48
+ ## Example
49
+
50
+ For an example of the realtime STS model, Nova Sonic, see: https://github.com/livekit/agents/blob/main/examples/voice_agents/realtime_joke_teller.py
@@ -0,0 +1,17 @@
1
+ livekit/plugins/aws/__init__.py,sha256=dCZISj1yZG0WZTojk3sU-Ub4PK1ThCVhamrl9k_NbBw,2047
2
+ livekit/plugins/aws/llm.py,sha256=pSbO7SaqYZYJ-3JGOmyMTmTLLcjoyrZy-j5BSkFOHU8,11922
3
+ livekit/plugins/aws/log.py,sha256=S5ICcsnwshZhMG0HPmc_lI3mtHmcY4oQMJBsnnho-bM,289
4
+ livekit/plugins/aws/models.py,sha256=ztI9n3r8OHGlQewWXiyQP3SaavFKHkw-VVuK9Xz4K1I,696
5
+ livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ livekit/plugins/aws/stt.py,sha256=PSR89aN28wm4i83yEdhkDJ9xzM0CsNIKrc3v3EbPndQ,9018
7
+ livekit/plugins/aws/tts.py,sha256=T5dVpTuIuzQimYNnkfXi5dRLmRldWySL4IcbkXjmJLM,6083
8
+ livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
9
+ livekit/plugins/aws/version.py,sha256=-bNd31cMcYCdhZCIKJ1-jtY4NgZvppVgKyzXAIzQtqM,600
10
+ livekit/plugins/aws/experimental/realtime/__init__.py,sha256=mm_TGZc9QAWSO-VOO3PdE8Y5R6xlWckXRZuiFUIHa-Q,287
11
+ livekit/plugins/aws/experimental/realtime/events.py,sha256=-pJrwVrH5AZFxa1eDbX5nDdnJMz4BNucNZlYUYLsP-Y,15853
12
+ livekit/plugins/aws/experimental/realtime/pretty_printer.py,sha256=KN7KPrfQu8cU7ff34vFAtfrd1umUSTVNKXQU7D8AMiM,1442
13
+ livekit/plugins/aws/experimental/realtime/realtime_model.py,sha256=em_3Fbp1qefF7cIIHc6ib1FLdD1MOGes2Lwq61o2wlk,59464
14
+ livekit/plugins/aws/experimental/realtime/turn_tracker.py,sha256=bcufaap-coeIYuK3ct1Is9W_UoefGYRmnJu7Mn5DCYU,6002
15
+ livekit_plugins_aws-1.1.6.dist-info/METADATA,sha256=ST8uYsoqQgHUVRCLC3BdkYdwALh3joYGRjblVKQgDrE,1989
16
+ livekit_plugins_aws-1.1.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ livekit_plugins_aws-1.1.6.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- livekit/plugins/aws/__init__.py,sha256=fkbgTfNZc6z4VxbYGNdY73EoGvNuRcJiuD-OlUHvjHU,1322
2
- livekit/plugins/aws/llm.py,sha256=pSbO7SaqYZYJ-3JGOmyMTmTLLcjoyrZy-j5BSkFOHU8,11922
3
- livekit/plugins/aws/log.py,sha256=jFief0Xhv0n_F6sp6UFu9VKxs2bXNVGAfYGmEYfR_2Q,66
4
- livekit/plugins/aws/models.py,sha256=ztI9n3r8OHGlQewWXiyQP3SaavFKHkw-VVuK9Xz4K1I,696
5
- livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- livekit/plugins/aws/stt.py,sha256=PSR89aN28wm4i83yEdhkDJ9xzM0CsNIKrc3v3EbPndQ,9018
7
- livekit/plugins/aws/tts.py,sha256=T5dVpTuIuzQimYNnkfXi5dRLmRldWySL4IcbkXjmJLM,6083
8
- livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
9
- livekit/plugins/aws/version.py,sha256=B5kiBdHIDDcwA9hDZC77YLcp8cT8rcpNKzBQ-kJeiYY,600
10
- livekit_plugins_aws-1.1.4.dist-info/METADATA,sha256=67ZScP6Cqs8dTwuQgvFn0_4BHT1wJjkKl-AOkDbIz5A,1529
11
- livekit_plugins_aws-1.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- livekit_plugins_aws-1.1.4.dist-info/RECORD,,