livekit-plugins-aws 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of livekit-plugins-aws might be problematic. Click here for more details.
- livekit/plugins/aws/__init__.py +32 -8
- livekit/plugins/aws/experimental/realtime/__init__.py +15 -0
- livekit/plugins/aws/experimental/realtime/events.py +521 -0
- livekit/plugins/aws/experimental/realtime/pretty_printer.py +49 -0
- livekit/plugins/aws/experimental/realtime/realtime_model.py +1208 -0
- livekit/plugins/aws/experimental/realtime/turn_tracker.py +172 -0
- livekit/plugins/aws/log.py +4 -0
- livekit/plugins/aws/tts.py +0 -2
- livekit/plugins/aws/version.py +1 -1
- {livekit_plugins_aws-1.1.3.dist-info → livekit_plugins_aws-1.1.5.dist-info}/METADATA +11 -5
- livekit_plugins_aws-1.1.5.dist-info/RECORD +17 -0
- livekit_plugins_aws-1.1.3.dist-info/RECORD +0 -12
- {livekit_plugins_aws-1.1.3.dist-info → livekit_plugins_aws-1.1.5.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import enum
|
|
5
|
+
import uuid
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any, Callable
|
|
8
|
+
|
|
9
|
+
from livekit.agents import llm, utils
|
|
10
|
+
|
|
11
|
+
from ...log import logger
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class _Phase(enum.Enum):
|
|
15
|
+
IDLE = 0 # waiting for the USER to begin speaking
|
|
16
|
+
USER_SPEAKING = 1 # still receiving USER text+audio blocks
|
|
17
|
+
USER_FINISHED = 2 # first ASSISTANT speculative block observed
|
|
18
|
+
ASSISTANT_RESPONDING = 3 # ASSISTANT audio/text streaming
|
|
19
|
+
DONE = 4 # assistant audio ended (END_TURN) or barge-in (INTERRUPTED)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# note: b/c user ASR text is transcribed server-side, a single turn constitutes
|
|
23
|
+
# both the user and agent's speech
|
|
24
|
+
@dataclass
|
|
25
|
+
class _Turn:
|
|
26
|
+
turn_id: int
|
|
27
|
+
input_id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
|
28
|
+
created: datetime.datetime = field(default_factory=datetime.datetime.utcnow)
|
|
29
|
+
transcript: list[str] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
phase: _Phase = _Phase.IDLE
|
|
32
|
+
ev_input_started: bool = False
|
|
33
|
+
ev_input_stopped: bool = False
|
|
34
|
+
ev_trans_completed: bool = False
|
|
35
|
+
ev_generation_sent: bool = False
|
|
36
|
+
|
|
37
|
+
def add_partial_text(self, text: str):
|
|
38
|
+
self.transcript.append(text)
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def curr_transcript(self) -> str:
|
|
42
|
+
return " ".join(self.transcript)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class _TurnTracker:
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
emit_fn: Callable[[str, Any], None],
|
|
49
|
+
streams_provider: Callable[
|
|
50
|
+
[], tuple[utils.aio.Chan[llm.MessageGeneration], utils.aio.Chan[llm.FunctionCall]]
|
|
51
|
+
],
|
|
52
|
+
):
|
|
53
|
+
self._emit = emit_fn
|
|
54
|
+
self._turn_idx = 0
|
|
55
|
+
self._curr_turn: _Turn | None = None
|
|
56
|
+
self._get_streams = streams_provider
|
|
57
|
+
|
|
58
|
+
# --------------------------------------------------------
|
|
59
|
+
# PUBLIC ENTRY POINT
|
|
60
|
+
# --------------------------------------------------------
|
|
61
|
+
def feed(self, event: dict):
|
|
62
|
+
turn = self._ensure_turn()
|
|
63
|
+
kind = _classify(event)
|
|
64
|
+
|
|
65
|
+
if kind == "USER_TEXT_PARTIAL":
|
|
66
|
+
turn.add_partial_text(event["event"]["textOutput"]["content"])
|
|
67
|
+
self._maybe_emit_input_started(turn)
|
|
68
|
+
self._emit_transcript_updated(turn)
|
|
69
|
+
# note: cannot invoke self._maybe_input_stopped() here
|
|
70
|
+
# b/c there is no way to know if the user is done speaking
|
|
71
|
+
|
|
72
|
+
# will always be correlated b/c generate_reply() is a stub
|
|
73
|
+
# user ASR text ends when agent's ASR speculative text begins
|
|
74
|
+
# corresponds to beginning of agent's turn
|
|
75
|
+
elif kind == "TOOL_OUTPUT_CONTENT_START" or kind == "ASSISTANT_SPEC_START":
|
|
76
|
+
# must be a maybe methods b/c agent can chain multiple tool calls
|
|
77
|
+
self._maybe_emit_input_stopped(turn)
|
|
78
|
+
self._maybe_emit_transcript_completed(turn)
|
|
79
|
+
self._maybe_emit_generation_created(turn)
|
|
80
|
+
|
|
81
|
+
elif kind == "BARGE_IN":
|
|
82
|
+
logger.debug(f"BARGE-IN DETECTED IN TURN TRACKER: {turn}")
|
|
83
|
+
# start new turn immediately to make interruptions snappier
|
|
84
|
+
self._emit("input_speech_started", llm.InputSpeechStartedEvent())
|
|
85
|
+
turn.phase = _Phase.DONE
|
|
86
|
+
|
|
87
|
+
elif kind == "ASSISTANT_AUDIO_END":
|
|
88
|
+
if event["event"]["contentEnd"]["stopReason"] == "END_TURN":
|
|
89
|
+
turn.phase = _Phase.DONE
|
|
90
|
+
|
|
91
|
+
if turn.phase is _Phase.DONE:
|
|
92
|
+
self._curr_turn = None
|
|
93
|
+
|
|
94
|
+
def _ensure_turn(self) -> _Turn:
|
|
95
|
+
if self._curr_turn is None:
|
|
96
|
+
self._turn_idx += 1
|
|
97
|
+
self._curr_turn = _Turn(turn_id=self._turn_idx)
|
|
98
|
+
return self._curr_turn
|
|
99
|
+
|
|
100
|
+
def _maybe_emit_input_started(self, turn: _Turn):
|
|
101
|
+
if not turn.ev_input_started:
|
|
102
|
+
turn.ev_input_started = True
|
|
103
|
+
self._emit("input_speech_started", llm.InputSpeechStartedEvent())
|
|
104
|
+
turn.phase = _Phase.USER_SPEAKING
|
|
105
|
+
|
|
106
|
+
def _maybe_emit_input_stopped(self, turn: _Turn):
|
|
107
|
+
if not turn.ev_input_stopped:
|
|
108
|
+
turn.ev_input_stopped = True
|
|
109
|
+
self._emit(
|
|
110
|
+
"input_speech_stopped", llm.InputSpeechStoppedEvent(user_transcription_enabled=True)
|
|
111
|
+
)
|
|
112
|
+
turn.phase = _Phase.USER_FINISHED
|
|
113
|
+
|
|
114
|
+
def _emit_transcript_updated(self, turn: _Turn):
|
|
115
|
+
self._emit(
|
|
116
|
+
"input_audio_transcription_completed",
|
|
117
|
+
llm.InputTranscriptionCompleted(
|
|
118
|
+
item_id=turn.input_id,
|
|
119
|
+
transcript=turn.curr_transcript,
|
|
120
|
+
is_final=False,
|
|
121
|
+
),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def _maybe_emit_transcript_completed(self, turn: _Turn):
|
|
125
|
+
if not turn.ev_trans_completed:
|
|
126
|
+
turn.ev_trans_completed = True
|
|
127
|
+
self._emit(
|
|
128
|
+
"input_audio_transcription_completed",
|
|
129
|
+
# Q: does input_id need to match /w the _ResponseGeneration.input_id?
|
|
130
|
+
llm.InputTranscriptionCompleted(
|
|
131
|
+
item_id=turn.input_id,
|
|
132
|
+
transcript=turn.curr_transcript,
|
|
133
|
+
is_final=True,
|
|
134
|
+
),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def _maybe_emit_generation_created(self, turn: _Turn):
|
|
138
|
+
if not turn.ev_generation_sent:
|
|
139
|
+
turn.ev_generation_sent = True
|
|
140
|
+
msg_stream, fn_stream = self._get_streams()
|
|
141
|
+
logger.debug("Emitting generation event")
|
|
142
|
+
generation_ev = llm.GenerationCreatedEvent(
|
|
143
|
+
message_stream=msg_stream,
|
|
144
|
+
function_stream=fn_stream,
|
|
145
|
+
user_initiated=False,
|
|
146
|
+
)
|
|
147
|
+
self._emit("generation_created", generation_ev)
|
|
148
|
+
turn.phase = _Phase.ASSISTANT_RESPONDING
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _classify(ev: dict) -> str:
|
|
152
|
+
e = ev.get("event", {})
|
|
153
|
+
if "textOutput" in e and e["textOutput"]["role"] == "USER":
|
|
154
|
+
return "USER_TEXT_PARTIAL"
|
|
155
|
+
|
|
156
|
+
if "contentStart" in e and e["contentStart"]["type"] == "TOOL":
|
|
157
|
+
return "TOOL_OUTPUT_CONTENT_START"
|
|
158
|
+
|
|
159
|
+
if "contentStart" in e and e["contentStart"]["role"] == "ASSISTANT":
|
|
160
|
+
add = e["contentStart"].get("additionalModelFields", "")
|
|
161
|
+
if "SPECULATIVE" in add:
|
|
162
|
+
return "ASSISTANT_SPEC_START"
|
|
163
|
+
|
|
164
|
+
if "textOutput" in e and e["textOutput"]["content"] == '{ "interrupted" : true }':
|
|
165
|
+
return "BARGE_IN"
|
|
166
|
+
|
|
167
|
+
# note: there cannot be any audio events for the user in the output event loop
|
|
168
|
+
# therefore, we know that the audio event must be for the assistant
|
|
169
|
+
if "contentEnd" in e and e["contentEnd"]["type"] == "AUDIO":
|
|
170
|
+
return "ASSISTANT_AUDIO_END"
|
|
171
|
+
|
|
172
|
+
return ""
|
livekit/plugins/aws/log.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
3
|
logger = logging.getLogger("livekit.plugins.aws")
|
|
4
|
+
smithy_logger = logging.getLogger("smithy_aws_event_stream.aio")
|
|
5
|
+
smithy_logger.setLevel(logging.INFO)
|
|
6
|
+
bedrock_client_logger = logging.getLogger("aws_sdk_bedrock_runtime.client")
|
|
7
|
+
bedrock_client_logger.setLevel(logging.INFO)
|
livekit/plugins/aws/tts.py
CHANGED
|
@@ -148,8 +148,6 @@ class ChunkedStream(tts.ChunkedStream):
|
|
|
148
148
|
async with response["AudioStream"] as resp:
|
|
149
149
|
async for data, _ in resp.content.iter_chunks():
|
|
150
150
|
output_emitter.push(data)
|
|
151
|
-
|
|
152
|
-
output_emitter.flush()
|
|
153
151
|
except botocore.exceptions.ConnectTimeoutError:
|
|
154
152
|
raise APITimeoutError() from None
|
|
155
153
|
except Exception as e:
|
livekit/plugins/aws/version.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livekit-plugins-aws
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.5
|
|
4
4
|
Summary: LiveKit Agents Plugin for services from AWS
|
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
|
6
6
|
Project-URL: Website, https://livekit.io/
|
|
7
7
|
Project-URL: Source, https://github.com/livekit/agents
|
|
8
8
|
Author-email: LiveKit <hello@livekit.io>
|
|
9
9
|
License-Expression: Apache-2.0
|
|
10
|
-
Keywords: audio,aws,livekit,realtime,video,webrtc
|
|
10
|
+
Keywords: audio,aws,livekit,nova,realtime,sonic,video,webrtc
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -20,12 +20,15 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
20
20
|
Requires-Python: >=3.9.0
|
|
21
21
|
Requires-Dist: aioboto3>=14.1.0
|
|
22
22
|
Requires-Dist: amazon-transcribe>=0.6.2
|
|
23
|
-
Requires-Dist: livekit-agents>=1.1.
|
|
23
|
+
Requires-Dist: livekit-agents>=1.1.5
|
|
24
|
+
Provides-Extra: realtime
|
|
25
|
+
Requires-Dist: aws-sdk-bedrock-runtime==0.0.2; (python_version >= '3.12') and extra == 'realtime'
|
|
26
|
+
Requires-Dist: boto3>1.35.10; extra == 'realtime'
|
|
24
27
|
Description-Content-Type: text/markdown
|
|
25
28
|
|
|
26
29
|
# AWS plugin for LiveKit Agents
|
|
27
30
|
|
|
28
|
-
Support for AWS AI including Bedrock, Polly, and
|
|
31
|
+
Support for AWS AI including Bedrock, Polly, Transcribe and optionally Nova Sonic (realtime STS model).
|
|
29
32
|
|
|
30
33
|
See [https://docs.livekit.io/agents/integrations/aws/](https://docs.livekit.io/agents/integrations/aws/) for more information.
|
|
31
34
|
|
|
@@ -33,8 +36,11 @@ See [https://docs.livekit.io/agents/integrations/aws/](https://docs.livekit.io/a
|
|
|
33
36
|
|
|
34
37
|
```bash
|
|
35
38
|
pip install livekit-plugins-aws
|
|
39
|
+
|
|
40
|
+
# for access to Nova Sonic
|
|
41
|
+
pip install livekit-plugins-aws[realtime]
|
|
36
42
|
```
|
|
37
43
|
|
|
38
44
|
## Pre-requisites
|
|
39
45
|
|
|
40
|
-
You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
|
|
46
|
+
You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
livekit/plugins/aws/__init__.py,sha256=dCZISj1yZG0WZTojk3sU-Ub4PK1ThCVhamrl9k_NbBw,2047
|
|
2
|
+
livekit/plugins/aws/llm.py,sha256=pSbO7SaqYZYJ-3JGOmyMTmTLLcjoyrZy-j5BSkFOHU8,11922
|
|
3
|
+
livekit/plugins/aws/log.py,sha256=S5ICcsnwshZhMG0HPmc_lI3mtHmcY4oQMJBsnnho-bM,289
|
|
4
|
+
livekit/plugins/aws/models.py,sha256=ztI9n3r8OHGlQewWXiyQP3SaavFKHkw-VVuK9Xz4K1I,696
|
|
5
|
+
livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
livekit/plugins/aws/stt.py,sha256=PSR89aN28wm4i83yEdhkDJ9xzM0CsNIKrc3v3EbPndQ,9018
|
|
7
|
+
livekit/plugins/aws/tts.py,sha256=T5dVpTuIuzQimYNnkfXi5dRLmRldWySL4IcbkXjmJLM,6083
|
|
8
|
+
livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
|
|
9
|
+
livekit/plugins/aws/version.py,sha256=OKtayGMVDYKyoKBO2yNM4kfRbH-PODJqECIiYhUzNWg,600
|
|
10
|
+
livekit/plugins/aws/experimental/realtime/__init__.py,sha256=mm_TGZc9QAWSO-VOO3PdE8Y5R6xlWckXRZuiFUIHa-Q,287
|
|
11
|
+
livekit/plugins/aws/experimental/realtime/events.py,sha256=ViWr4_RLY0VDGTF-dDL0b_-7GFlF08Lw5_x6q3EJ5eM,15917
|
|
12
|
+
livekit/plugins/aws/experimental/realtime/pretty_printer.py,sha256=KN7KPrfQu8cU7ff34vFAtfrd1umUSTVNKXQU7D8AMiM,1442
|
|
13
|
+
livekit/plugins/aws/experimental/realtime/realtime_model.py,sha256=1FyGB7VkUHWHsgkzNEELc0-qOz3tbDEuT2PWlqI-2GU,55978
|
|
14
|
+
livekit/plugins/aws/experimental/realtime/turn_tracker.py,sha256=ER1Inu9D7X4EZ_wqpKeidrx52JXfnmnQHmxOielbjvc,6363
|
|
15
|
+
livekit_plugins_aws-1.1.5.dist-info/METADATA,sha256=EU-x14QER4ma3vx2b9vALcZMYYc1fv1f9fB5lX01E7Y,1827
|
|
16
|
+
livekit_plugins_aws-1.1.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
17
|
+
livekit_plugins_aws-1.1.5.dist-info/RECORD,,
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
livekit/plugins/aws/__init__.py,sha256=fkbgTfNZc6z4VxbYGNdY73EoGvNuRcJiuD-OlUHvjHU,1322
|
|
2
|
-
livekit/plugins/aws/llm.py,sha256=pSbO7SaqYZYJ-3JGOmyMTmTLLcjoyrZy-j5BSkFOHU8,11922
|
|
3
|
-
livekit/plugins/aws/log.py,sha256=jFief0Xhv0n_F6sp6UFu9VKxs2bXNVGAfYGmEYfR_2Q,66
|
|
4
|
-
livekit/plugins/aws/models.py,sha256=ztI9n3r8OHGlQewWXiyQP3SaavFKHkw-VVuK9Xz4K1I,696
|
|
5
|
-
livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
livekit/plugins/aws/stt.py,sha256=PSR89aN28wm4i83yEdhkDJ9xzM0CsNIKrc3v3EbPndQ,9018
|
|
7
|
-
livekit/plugins/aws/tts.py,sha256=vYWPOw0QLIAJR-2lNVIV92o3cafNYCFdVBxE3z2L8E8,6127
|
|
8
|
-
livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
|
|
9
|
-
livekit/plugins/aws/version.py,sha256=CX0B8KLm54mWslofdGA4Ue0sqe3NNbkDowPcEG7tAXA,600
|
|
10
|
-
livekit_plugins_aws-1.1.3.dist-info/METADATA,sha256=3_MbOYnrp3EAZ0G-yYlqBY6B1LqRUplqcTPbOKqJBuE,1529
|
|
11
|
-
livekit_plugins_aws-1.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
12
|
-
livekit_plugins_aws-1.1.3.dist-info/RECORD,,
|
|
File without changes
|