livekit-plugins-aws 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-aws might be problematic. Click here for more details.

@@ -0,0 +1,172 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import enum
5
+ import uuid
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, Callable
8
+
9
+ from livekit.agents import llm, utils
10
+
11
+ from ...log import logger
12
+
13
+
14
+ class _Phase(enum.Enum):
15
+ IDLE = 0 # waiting for the USER to begin speaking
16
+ USER_SPEAKING = 1 # still receiving USER text+audio blocks
17
+ USER_FINISHED = 2 # first ASSISTANT speculative block observed
18
+ ASSISTANT_RESPONDING = 3 # ASSISTANT audio/text streaming
19
+ DONE = 4 # assistant audio ended (END_TURN) or barge-in (INTERRUPTED)
20
+
21
+
22
+ # note: b/c user ASR text is transcribed server-side, a single turn constitutes
23
+ # both the user and agent's speech
24
+ @dataclass
25
+ class _Turn:
26
+ turn_id: int
27
+ input_id: str = field(default_factory=lambda: str(uuid.uuid4()))
28
+ created: datetime.datetime = field(default_factory=datetime.datetime.utcnow)
29
+ transcript: list[str] = field(default_factory=list)
30
+
31
+ phase: _Phase = _Phase.IDLE
32
+ ev_input_started: bool = False
33
+ ev_input_stopped: bool = False
34
+ ev_trans_completed: bool = False
35
+ ev_generation_sent: bool = False
36
+
37
+ def add_partial_text(self, text: str):
38
+ self.transcript.append(text)
39
+
40
+ @property
41
+ def curr_transcript(self) -> str:
42
+ return " ".join(self.transcript)
43
+
44
+
45
+ class _TurnTracker:
46
+ def __init__(
47
+ self,
48
+ emit_fn: Callable[[str, Any], None],
49
+ streams_provider: Callable[
50
+ [], tuple[utils.aio.Chan[llm.MessageGeneration], utils.aio.Chan[llm.FunctionCall]]
51
+ ],
52
+ ):
53
+ self._emit = emit_fn
54
+ self._turn_idx = 0
55
+ self._curr_turn: _Turn | None = None
56
+ self._get_streams = streams_provider
57
+
58
+ # --------------------------------------------------------
59
+ # PUBLIC ENTRY POINT
60
+ # --------------------------------------------------------
61
+ def feed(self, event: dict):
62
+ turn = self._ensure_turn()
63
+ kind = _classify(event)
64
+
65
+ if kind == "USER_TEXT_PARTIAL":
66
+ turn.add_partial_text(event["event"]["textOutput"]["content"])
67
+ self._maybe_emit_input_started(turn)
68
+ self._emit_transcript_updated(turn)
69
+ # note: cannot invoke self._maybe_input_stopped() here
70
+ # b/c there is no way to know if the user is done speaking
71
+
72
+ # will always be correlated b/c generate_reply() is a stub
73
+ # user ASR text ends when agent's ASR speculative text begins
74
+ # corresponds to beginning of agent's turn
75
+ elif kind == "TOOL_OUTPUT_CONTENT_START" or kind == "ASSISTANT_SPEC_START":
76
+ # must be a maybe methods b/c agent can chain multiple tool calls
77
+ self._maybe_emit_input_stopped(turn)
78
+ self._maybe_emit_transcript_completed(turn)
79
+ self._maybe_emit_generation_created(turn)
80
+
81
+ elif kind == "BARGE_IN":
82
+ logger.debug(f"BARGE-IN DETECTED IN TURN TRACKER: {turn}")
83
+ # start new turn immediately to make interruptions snappier
84
+ self._emit("input_speech_started", llm.InputSpeechStartedEvent())
85
+ turn.phase = _Phase.DONE
86
+
87
+ elif kind == "ASSISTANT_AUDIO_END":
88
+ if event["event"]["contentEnd"]["stopReason"] == "END_TURN":
89
+ turn.phase = _Phase.DONE
90
+
91
+ if turn.phase is _Phase.DONE:
92
+ self._curr_turn = None
93
+
94
+ def _ensure_turn(self) -> _Turn:
95
+ if self._curr_turn is None:
96
+ self._turn_idx += 1
97
+ self._curr_turn = _Turn(turn_id=self._turn_idx)
98
+ return self._curr_turn
99
+
100
+ def _maybe_emit_input_started(self, turn: _Turn):
101
+ if not turn.ev_input_started:
102
+ turn.ev_input_started = True
103
+ self._emit("input_speech_started", llm.InputSpeechStartedEvent())
104
+ turn.phase = _Phase.USER_SPEAKING
105
+
106
+ def _maybe_emit_input_stopped(self, turn: _Turn):
107
+ if not turn.ev_input_stopped:
108
+ turn.ev_input_stopped = True
109
+ self._emit(
110
+ "input_speech_stopped", llm.InputSpeechStoppedEvent(user_transcription_enabled=True)
111
+ )
112
+ turn.phase = _Phase.USER_FINISHED
113
+
114
+ def _emit_transcript_updated(self, turn: _Turn):
115
+ self._emit(
116
+ "input_audio_transcription_completed",
117
+ llm.InputTranscriptionCompleted(
118
+ item_id=turn.input_id,
119
+ transcript=turn.curr_transcript,
120
+ is_final=False,
121
+ ),
122
+ )
123
+
124
+ def _maybe_emit_transcript_completed(self, turn: _Turn):
125
+ if not turn.ev_trans_completed:
126
+ turn.ev_trans_completed = True
127
+ self._emit(
128
+ "input_audio_transcription_completed",
129
+ # Q: does input_id need to match /w the _ResponseGeneration.input_id?
130
+ llm.InputTranscriptionCompleted(
131
+ item_id=turn.input_id,
132
+ transcript=turn.curr_transcript,
133
+ is_final=True,
134
+ ),
135
+ )
136
+
137
+ def _maybe_emit_generation_created(self, turn: _Turn):
138
+ if not turn.ev_generation_sent:
139
+ turn.ev_generation_sent = True
140
+ msg_stream, fn_stream = self._get_streams()
141
+ logger.debug("Emitting generation event")
142
+ generation_ev = llm.GenerationCreatedEvent(
143
+ message_stream=msg_stream,
144
+ function_stream=fn_stream,
145
+ user_initiated=False,
146
+ )
147
+ self._emit("generation_created", generation_ev)
148
+ turn.phase = _Phase.ASSISTANT_RESPONDING
149
+
150
+
151
+ def _classify(ev: dict) -> str:
152
+ e = ev.get("event", {})
153
+ if "textOutput" in e and e["textOutput"]["role"] == "USER":
154
+ return "USER_TEXT_PARTIAL"
155
+
156
+ if "contentStart" in e and e["contentStart"]["type"] == "TOOL":
157
+ return "TOOL_OUTPUT_CONTENT_START"
158
+
159
+ if "contentStart" in e and e["contentStart"]["role"] == "ASSISTANT":
160
+ add = e["contentStart"].get("additionalModelFields", "")
161
+ if "SPECULATIVE" in add:
162
+ return "ASSISTANT_SPEC_START"
163
+
164
+ if "textOutput" in e and e["textOutput"]["content"] == '{ "interrupted" : true }':
165
+ return "BARGE_IN"
166
+
167
+ # note: there cannot be any audio events for the user in the output event loop
168
+ # therefore, we know that the audio event must be for the assistant
169
+ if "contentEnd" in e and e["contentEnd"]["type"] == "AUDIO":
170
+ return "ASSISTANT_AUDIO_END"
171
+
172
+ return ""
@@ -1,3 +1,7 @@
1
1
  import logging
2
2
 
3
3
  logger = logging.getLogger("livekit.plugins.aws")
4
+ smithy_logger = logging.getLogger("smithy_aws_event_stream.aio")
5
+ smithy_logger.setLevel(logging.INFO)
6
+ bedrock_client_logger = logging.getLogger("aws_sdk_bedrock_runtime.client")
7
+ bedrock_client_logger.setLevel(logging.INFO)
@@ -148,8 +148,6 @@ class ChunkedStream(tts.ChunkedStream):
148
148
  async with response["AudioStream"] as resp:
149
149
  async for data, _ in resp.content.iter_chunks():
150
150
  output_emitter.push(data)
151
-
152
- output_emitter.flush()
153
151
  except botocore.exceptions.ConnectTimeoutError:
154
152
  raise APITimeoutError() from None
155
153
  except Exception as e:
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.1.3"
15
+ __version__ = "1.1.5"
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-aws
3
- Version: 1.1.3
3
+ Version: 1.1.5
4
4
  Summary: LiveKit Agents Plugin for services from AWS
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
7
7
  Project-URL: Source, https://github.com/livekit/agents
8
8
  Author-email: LiveKit <hello@livekit.io>
9
9
  License-Expression: Apache-2.0
10
- Keywords: audio,aws,livekit,realtime,video,webrtc
10
+ Keywords: audio,aws,livekit,nova,realtime,sonic,video,webrtc
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Programming Language :: Python :: 3
@@ -20,12 +20,15 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
21
  Requires-Dist: aioboto3>=14.1.0
22
22
  Requires-Dist: amazon-transcribe>=0.6.2
23
- Requires-Dist: livekit-agents>=1.1.3
23
+ Requires-Dist: livekit-agents>=1.1.5
24
+ Provides-Extra: realtime
25
+ Requires-Dist: aws-sdk-bedrock-runtime==0.0.2; (python_version >= '3.12') and extra == 'realtime'
26
+ Requires-Dist: boto3>1.35.10; extra == 'realtime'
24
27
  Description-Content-Type: text/markdown
25
28
 
26
29
  # AWS plugin for LiveKit Agents
27
30
 
28
- Support for AWS AI including Bedrock, Polly, and Transcribe.
31
+ Support for AWS AI including Bedrock, Polly, Transcribe and optionally Nova Sonic (realtime STS model).
29
32
 
30
33
  See [https://docs.livekit.io/agents/integrations/aws/](https://docs.livekit.io/agents/integrations/aws/) for more information.
31
34
 
@@ -33,8 +36,11 @@ See [https://docs.livekit.io/agents/integrations/aws/](https://docs.livekit.io/a
33
36
 
34
37
  ```bash
35
38
  pip install livekit-plugins-aws
39
+
40
+ # for access to Nova Sonic
41
+ pip install livekit-plugins-aws[realtime]
36
42
  ```
37
43
 
38
44
  ## Pre-requisites
39
45
 
40
- You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
46
+ You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
@@ -0,0 +1,17 @@
1
+ livekit/plugins/aws/__init__.py,sha256=dCZISj1yZG0WZTojk3sU-Ub4PK1ThCVhamrl9k_NbBw,2047
2
+ livekit/plugins/aws/llm.py,sha256=pSbO7SaqYZYJ-3JGOmyMTmTLLcjoyrZy-j5BSkFOHU8,11922
3
+ livekit/plugins/aws/log.py,sha256=S5ICcsnwshZhMG0HPmc_lI3mtHmcY4oQMJBsnnho-bM,289
4
+ livekit/plugins/aws/models.py,sha256=ztI9n3r8OHGlQewWXiyQP3SaavFKHkw-VVuK9Xz4K1I,696
5
+ livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ livekit/plugins/aws/stt.py,sha256=PSR89aN28wm4i83yEdhkDJ9xzM0CsNIKrc3v3EbPndQ,9018
7
+ livekit/plugins/aws/tts.py,sha256=T5dVpTuIuzQimYNnkfXi5dRLmRldWySL4IcbkXjmJLM,6083
8
+ livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
9
+ livekit/plugins/aws/version.py,sha256=OKtayGMVDYKyoKBO2yNM4kfRbH-PODJqECIiYhUzNWg,600
10
+ livekit/plugins/aws/experimental/realtime/__init__.py,sha256=mm_TGZc9QAWSO-VOO3PdE8Y5R6xlWckXRZuiFUIHa-Q,287
11
+ livekit/plugins/aws/experimental/realtime/events.py,sha256=ViWr4_RLY0VDGTF-dDL0b_-7GFlF08Lw5_x6q3EJ5eM,15917
12
+ livekit/plugins/aws/experimental/realtime/pretty_printer.py,sha256=KN7KPrfQu8cU7ff34vFAtfrd1umUSTVNKXQU7D8AMiM,1442
13
+ livekit/plugins/aws/experimental/realtime/realtime_model.py,sha256=1FyGB7VkUHWHsgkzNEELc0-qOz3tbDEuT2PWlqI-2GU,55978
14
+ livekit/plugins/aws/experimental/realtime/turn_tracker.py,sha256=ER1Inu9D7X4EZ_wqpKeidrx52JXfnmnQHmxOielbjvc,6363
15
+ livekit_plugins_aws-1.1.5.dist-info/METADATA,sha256=EU-x14QER4ma3vx2b9vALcZMYYc1fv1f9fB5lX01E7Y,1827
16
+ livekit_plugins_aws-1.1.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ livekit_plugins_aws-1.1.5.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- livekit/plugins/aws/__init__.py,sha256=fkbgTfNZc6z4VxbYGNdY73EoGvNuRcJiuD-OlUHvjHU,1322
2
- livekit/plugins/aws/llm.py,sha256=pSbO7SaqYZYJ-3JGOmyMTmTLLcjoyrZy-j5BSkFOHU8,11922
3
- livekit/plugins/aws/log.py,sha256=jFief0Xhv0n_F6sp6UFu9VKxs2bXNVGAfYGmEYfR_2Q,66
4
- livekit/plugins/aws/models.py,sha256=ztI9n3r8OHGlQewWXiyQP3SaavFKHkw-VVuK9Xz4K1I,696
5
- livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- livekit/plugins/aws/stt.py,sha256=PSR89aN28wm4i83yEdhkDJ9xzM0CsNIKrc3v3EbPndQ,9018
7
- livekit/plugins/aws/tts.py,sha256=vYWPOw0QLIAJR-2lNVIV92o3cafNYCFdVBxE3z2L8E8,6127
8
- livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
9
- livekit/plugins/aws/version.py,sha256=CX0B8KLm54mWslofdGA4Ue0sqe3NNbkDowPcEG7tAXA,600
10
- livekit_plugins_aws-1.1.3.dist-info/METADATA,sha256=3_MbOYnrp3EAZ0G-yYlqBY6B1LqRUplqcTPbOKqJBuE,1529
11
- livekit_plugins_aws-1.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- livekit_plugins_aws-1.1.3.dist-info/RECORD,,