livekit-plugins-aws 1.0.0rc6__py3-none-any.whl → 1.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- livekit/plugins/aws/__init__.py +47 -7
- livekit/plugins/aws/experimental/realtime/__init__.py +11 -0
- livekit/plugins/aws/experimental/realtime/events.py +545 -0
- livekit/plugins/aws/experimental/realtime/pretty_printer.py +49 -0
- livekit/plugins/aws/experimental/realtime/realtime_model.py +2106 -0
- livekit/plugins/aws/experimental/realtime/turn_tracker.py +171 -0
- livekit/plugins/aws/experimental/realtime/types.py +38 -0
- livekit/plugins/aws/llm.py +109 -71
- livekit/plugins/aws/log.py +4 -0
- livekit/plugins/aws/models.py +4 -3
- livekit/plugins/aws/stt.py +214 -71
- livekit/plugins/aws/tts.py +96 -116
- livekit/plugins/aws/utils.py +29 -125
- livekit/plugins/aws/version.py +1 -1
- livekit_plugins_aws-1.3.9.dist-info/METADATA +385 -0
- livekit_plugins_aws-1.3.9.dist-info/RECORD +18 -0
- {livekit_plugins_aws-1.0.0rc6.dist-info → livekit_plugins_aws-1.3.9.dist-info}/WHEEL +1 -1
- livekit_plugins_aws-1.0.0rc6.dist-info/METADATA +0 -43
- livekit_plugins_aws-1.0.0rc6.dist-info/RECORD +0 -12
livekit/plugins/aws/utils.py
CHANGED
|
@@ -1,142 +1,46 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
|
-
import os
|
|
5
|
-
from typing import Any, cast
|
|
6
|
-
|
|
7
|
-
import boto3
|
|
8
|
-
|
|
9
3
|
from livekit.agents import llm
|
|
10
|
-
from livekit.agents.llm import
|
|
11
|
-
from livekit.agents.
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
4
|
+
from livekit.agents.llm import FunctionTool, RawFunctionTool
|
|
5
|
+
from livekit.agents.llm.tool_context import (
|
|
6
|
+
get_raw_function_info,
|
|
7
|
+
is_function_tool,
|
|
8
|
+
is_raw_function_tool,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = ["to_fnc_ctx"]
|
|
16
12
|
DEFAULT_REGION = "us-east-1"
|
|
17
13
|
|
|
18
14
|
|
|
19
|
-
def
|
|
20
|
-
api_key: NotGivenOr[str],
|
|
21
|
-
api_secret: NotGivenOr[str],
|
|
22
|
-
region: NotGivenOr[str],
|
|
23
|
-
):
|
|
24
|
-
aws_region = region if is_given(region) else os.environ.get("AWS_DEFAULT_REGION")
|
|
25
|
-
if not aws_region:
|
|
26
|
-
aws_region = DEFAULT_REGION
|
|
27
|
-
|
|
28
|
-
if is_given(api_key) and is_given(api_secret):
|
|
29
|
-
session = boto3.Session(
|
|
30
|
-
aws_access_key_id=api_key,
|
|
31
|
-
aws_secret_access_key=api_secret,
|
|
32
|
-
region_name=aws_region,
|
|
33
|
-
)
|
|
34
|
-
else:
|
|
35
|
-
session = boto3.Session(region_name=aws_region)
|
|
36
|
-
|
|
37
|
-
credentials = session.get_credentials()
|
|
38
|
-
if not credentials or not credentials.access_key or not credentials.secret_key:
|
|
39
|
-
raise ValueError("No valid AWS credentials found.")
|
|
40
|
-
return cast(tuple[str, str, str], (credentials.access_key, credentials.secret_key, region))
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def to_fnc_ctx(fncs: list[FunctionTool]) -> list[dict]:
|
|
15
|
+
def to_fnc_ctx(fncs: list[FunctionTool | RawFunctionTool]) -> list[dict]:
|
|
44
16
|
return [_build_tool_spec(fnc) for fnc in fncs]
|
|
45
17
|
|
|
46
18
|
|
|
47
|
-
def
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
for msg in chat_ctx.items:
|
|
54
|
-
if msg.type == "message" and msg.role == "system":
|
|
55
|
-
for content in msg.content:
|
|
56
|
-
if content and isinstance(content, str):
|
|
57
|
-
system_message = {"text": content}
|
|
58
|
-
continue
|
|
59
|
-
|
|
60
|
-
if msg.type == "message":
|
|
61
|
-
role = "assistant" if msg.role == "assistant" else "user"
|
|
62
|
-
elif msg.type == "function_call":
|
|
63
|
-
role = "assistant"
|
|
64
|
-
elif msg.type == "function_call_output":
|
|
65
|
-
role = "user"
|
|
66
|
-
|
|
67
|
-
# if the effective role changed, finalize the previous turn.
|
|
68
|
-
if role != current_role:
|
|
69
|
-
if current_content and current_role is not None:
|
|
70
|
-
messages.append({"role": current_role, "content": current_content})
|
|
71
|
-
current_content = []
|
|
72
|
-
current_role = role
|
|
73
|
-
|
|
74
|
-
if msg.type == "message":
|
|
75
|
-
for content in msg.content:
|
|
76
|
-
if content and isinstance(content, str):
|
|
77
|
-
current_content.append({"text": content})
|
|
78
|
-
elif isinstance(content, ImageContent):
|
|
79
|
-
current_content.append(_build_image(content, cache_key))
|
|
80
|
-
elif msg.type == "function_call":
|
|
81
|
-
current_content.append(
|
|
19
|
+
def _build_tool_spec(function: FunctionTool | RawFunctionTool) -> dict:
|
|
20
|
+
if is_function_tool(function):
|
|
21
|
+
fnc = llm.utils.build_legacy_openai_schema(function, internally_tagged=True)
|
|
22
|
+
return {
|
|
23
|
+
"toolSpec": _strip_nones(
|
|
82
24
|
{
|
|
83
|
-
"
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
"input": json.loads(msg.arguments or "{}"),
|
|
87
|
-
}
|
|
25
|
+
"name": fnc["name"],
|
|
26
|
+
"description": fnc["description"] if fnc["description"] else None,
|
|
27
|
+
"inputSchema": {"json": fnc["parameters"] if fnc["parameters"] else {}},
|
|
88
28
|
}
|
|
89
29
|
)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
30
|
+
}
|
|
31
|
+
elif is_raw_function_tool(function):
|
|
32
|
+
info = get_raw_function_info(function)
|
|
33
|
+
return {
|
|
34
|
+
"toolSpec": _strip_nones(
|
|
35
|
+
{
|
|
36
|
+
"name": info.name,
|
|
37
|
+
"description": info.raw_schema.get("description", ""),
|
|
38
|
+
"inputSchema": {"json": info.raw_schema.get("parameters", {})},
|
|
96
39
|
}
|
|
97
|
-
|
|
98
|
-
if isinstance(msg.output, dict):
|
|
99
|
-
tool_response["toolResult"]["content"].append({"json": msg.output})
|
|
100
|
-
elif isinstance(msg.output, str):
|
|
101
|
-
tool_response["toolResult"]["content"].append({"text": msg.output})
|
|
102
|
-
current_content.append(tool_response)
|
|
103
|
-
|
|
104
|
-
# Finalize the last message if there’s any content left
|
|
105
|
-
if current_role is not None and current_content:
|
|
106
|
-
messages.append({"role": current_role, "content": current_content})
|
|
107
|
-
|
|
108
|
-
# Ensure the message list starts with a "user" message
|
|
109
|
-
if not messages or messages[0]["role"] != "user":
|
|
110
|
-
messages.insert(0, {"role": "user", "content": [{"text": "(empty)"}]})
|
|
111
|
-
|
|
112
|
-
return messages, system_message
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def _build_tool_spec(fnc: FunctionTool) -> dict:
|
|
116
|
-
fnc = llm.utils.build_legacy_openai_schema(fnc, internally_tagged=True)
|
|
117
|
-
return {
|
|
118
|
-
"toolSpec": _strip_nones(
|
|
119
|
-
{
|
|
120
|
-
"name": fnc["name"],
|
|
121
|
-
"description": fnc["description"] if fnc["description"] else None,
|
|
122
|
-
"inputSchema": {"json": fnc["parameters"] if fnc["parameters"] else {}},
|
|
123
|
-
}
|
|
124
|
-
)
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def _build_image(image: ImageContent, cache_key: Any) -> dict:
|
|
129
|
-
img = utils.serialize_image(image)
|
|
130
|
-
if img.external_url:
|
|
131
|
-
raise ValueError("external_url is not supported by AWS Bedrock.")
|
|
132
|
-
if cache_key not in image._cache:
|
|
133
|
-
image._cache[cache_key] = img.data_bytes
|
|
134
|
-
return {
|
|
135
|
-
"image": {
|
|
136
|
-
"format": "jpeg",
|
|
137
|
-
"source": {"bytes": image._cache[cache_key]},
|
|
40
|
+
)
|
|
138
41
|
}
|
|
139
|
-
|
|
42
|
+
else:
|
|
43
|
+
raise ValueError("Invalid function tool")
|
|
140
44
|
|
|
141
45
|
|
|
142
46
|
def _strip_nones(d: dict) -> dict:
|
livekit/plugins/aws/version.py
CHANGED
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: livekit-plugins-aws
|
|
3
|
+
Version: 1.3.9
|
|
4
|
+
Summary: LiveKit Agents Plugin for services from AWS
|
|
5
|
+
Project-URL: Documentation, https://docs.livekit.io
|
|
6
|
+
Project-URL: Website, https://livekit.io/
|
|
7
|
+
Project-URL: Source, https://github.com/livekit/agents
|
|
8
|
+
Author-email: LiveKit <hello@livekit.io>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
Keywords: ai,audio,aws,livekit,nova,realtime,sonic,video,voice
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
19
|
+
Classifier: Topic :: Multimedia :: Video
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.9.0
|
|
22
|
+
Requires-Dist: aioboto3>=14.1.0
|
|
23
|
+
Requires-Dist: aws-sdk-transcribe-streaming>=0.2.0; python_version >= '3.12'
|
|
24
|
+
Requires-Dist: livekit-agents>=1.3.9
|
|
25
|
+
Provides-Extra: realtime
|
|
26
|
+
Requires-Dist: aws-sdk-bedrock-runtime>=0.2.0; (python_version >= '3.12') and extra == 'realtime'
|
|
27
|
+
Requires-Dist: aws-sdk-signers>=0.0.3; (python_version >= '3.12') and extra == 'realtime'
|
|
28
|
+
Requires-Dist: boto3>1.35.10; extra == 'realtime'
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
|
|
31
|
+
# AWS Plugin for LiveKit Agents
|
|
32
|
+
|
|
33
|
+
Complete AWS AI integration for LiveKit Agents, including Bedrock, Polly, Transcribe, and realtime voice-to-voice support for Amazon Nova 2 Sonic
|
|
34
|
+
|
|
35
|
+
**What's included:**
|
|
36
|
+
- **RealtimeModel** - Amazon Nova Sonic 1.0 & 2.0 for speech-to-speech
|
|
37
|
+
- **LLM** - Powered by Amazon Bedrock, defaults to Nova 2 Lite
|
|
38
|
+
- **STT** - Powered by AWS Transcribe
|
|
39
|
+
- **TTS** - Powered by Amazon Polly
|
|
40
|
+
|
|
41
|
+
See [https://docs.livekit.io/agents/integrations/aws/](https://docs.livekit.io/agents/integrations/aws/) for more information.
|
|
42
|
+
|
|
43
|
+
## ⚠️ Breaking Change
|
|
44
|
+
|
|
45
|
+
**Default model changed to Nova 2.0 Sonic**: `RealtimeModel()` now defaults to `amazon.nova-2-sonic-v1:0` with `modalities="mixed"` (was `amazon.nova-sonic-v1:0` with `modalities="audio"`).
|
|
46
|
+
|
|
47
|
+
If you need the previous behavior, explicitly specify Nova 1.0 Sonic:
|
|
48
|
+
```python
|
|
49
|
+
model = aws.realtime.RealtimeModel.with_nova_sonic_1()
|
|
50
|
+
# or
|
|
51
|
+
model = aws.realtime.RealtimeModel(
|
|
52
|
+
model="amazon.nova-sonic-v1:0",
|
|
53
|
+
modalities="audio"
|
|
54
|
+
)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install livekit-plugins-aws
|
|
61
|
+
|
|
62
|
+
# For Nova Sonic realtime models
|
|
63
|
+
pip install livekit-plugins-aws[realtime]
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Prerequisites
|
|
67
|
+
|
|
68
|
+
### AWS Credentials
|
|
69
|
+
|
|
70
|
+
You'll need AWS credentials with access to Amazon Bedrock. Set them as environment variables:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
export AWS_ACCESS_KEY_ID=<your-access-key>
|
|
74
|
+
export AWS_SECRET_ACCESS_KEY=<your-secret-key>
|
|
75
|
+
export AWS_DEFAULT_REGION=us-east-1 # or your preferred region
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Getting Temporary Credentials from SSO (Local Testing)
|
|
79
|
+
|
|
80
|
+
If you use AWS SSO for authentication, get temporary credentials for local testing:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
# Login to your SSO profile
|
|
84
|
+
aws sso login --profile your-profile-name
|
|
85
|
+
|
|
86
|
+
# Export credentials from your SSO session
|
|
87
|
+
eval $(aws configure export-credentials --profile your-profile-name --format env)
|
|
88
|
+
|
|
89
|
+
# Verify credentials are set
|
|
90
|
+
aws sts get-caller-identity
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Alternatively, add this to your shell profile for automatic credential export:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
# Add to ~/.bashrc or ~/.zshrc
|
|
97
|
+
function aws-creds() {
|
|
98
|
+
eval $(aws configure export-credentials --profile $1 --format env)
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Usage: aws-creds your-profile-name
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Quick Start Example
|
|
105
|
+
|
|
106
|
+
The `realtime_joke_teller.py` example demonstrates both realtime and pipeline modes:
|
|
107
|
+
|
|
108
|
+
### Demonstrates Both Modes
|
|
109
|
+
- **Realtime mode**: Nova Sonic 2.0 for end-to-end speech-to-speech
|
|
110
|
+
- **Pipeline mode**: AWS Transcribe + Nova 2 Lite + Amazon Polly
|
|
111
|
+
|
|
112
|
+
### Demonstrates Nova 2 Sonic Capabilities
|
|
113
|
+
- **Text prompting**: Agent greets users first using `generate_reply()`
|
|
114
|
+
- **Multilingual support**: Automatic language detection and response in 8 languages
|
|
115
|
+
- **Multiple voices**: 15 expressive voices across languages
|
|
116
|
+
- **Function calling**: Weather lookup, web search, and joke telling
|
|
117
|
+
|
|
118
|
+
### Setup
|
|
119
|
+
|
|
120
|
+
1. **Install dependencies:**
|
|
121
|
+
```bash
|
|
122
|
+
pip install livekit-plugins-aws[realtime] \
|
|
123
|
+
livekit-plugins-silero \
|
|
124
|
+
jokeapi \
|
|
125
|
+
duckduckgo-search \
|
|
126
|
+
python-weather \
|
|
127
|
+
python-dotenv
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
2. **Copy the example locally:**
|
|
131
|
+
```bash
|
|
132
|
+
curl -O https://raw.githubusercontent.com/livekit/agents/main/examples/voice_agents/realtime_joke_teller.py
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
3. **Set up environment variables:**
|
|
136
|
+
```bash
|
|
137
|
+
# Create .env file
|
|
138
|
+
echo "AWS_DEFAULT_REGION=us-east-1" > .env
|
|
139
|
+
# Add your AWS credentials (see Prerequisites above)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
4. **(Optional) Run local LiveKit server:**
|
|
143
|
+
|
|
144
|
+
For testing without LiveKit Cloud, run a local server:
|
|
145
|
+
```bash
|
|
146
|
+
# Install LiveKit server
|
|
147
|
+
brew install livekit # macOS
|
|
148
|
+
# or download from https://github.com/livekit/livekit/releases
|
|
149
|
+
|
|
150
|
+
# Run in dev mode
|
|
151
|
+
livekit-server --dev
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Add to your `.env` file:
|
|
155
|
+
```bash
|
|
156
|
+
LIVEKIT_URL=wss://127.0.0.1:7880
|
|
157
|
+
LIVEKIT_API_KEY=devkey
|
|
158
|
+
LIVEKIT_API_SECRET=secret
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
See [self-hosting documentation](https://docs.livekit.io/home/self-hosting/local/) for more details.
|
|
162
|
+
|
|
163
|
+
### Running the Example
|
|
164
|
+
|
|
165
|
+
**Realtime Mode (Nova Sonic 2.0)** - Recommended for testing:
|
|
166
|
+
```bash
|
|
167
|
+
python realtime_joke_teller.py console
|
|
168
|
+
```
|
|
169
|
+
This runs locally using your computer's speakers and microphone. **Use a headset to prevent echo.**
|
|
170
|
+
|
|
171
|
+
**Multilingual Support:** Sonic automatically detects and responds in your language. Just start speaking in your preferred language (English, French, Italian, German, Spanish, Portuguese, or Hindi) and Sonic will respond in the same language!
|
|
172
|
+
|
|
173
|
+
**Pipeline Mode (Transcribe + Nova Lite + Polly)**:
|
|
174
|
+
```bash
|
|
175
|
+
python realtime_joke_teller.py console --mode pipeline
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
**Dev Mode** (connect to LiveKit room for remote testing):
|
|
179
|
+
```bash
|
|
180
|
+
python realtime_joke_teller.py dev
|
|
181
|
+
# or
|
|
182
|
+
python realtime_joke_teller.py dev --mode pipeline
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Try asking:
|
|
186
|
+
- "What's the weather in Seattle?"
|
|
187
|
+
- "Tell me a programming joke"
|
|
188
|
+
- "Search for information about my favorite movie, Short Circuit"
|
|
189
|
+
|
|
190
|
+
## Features
|
|
191
|
+
|
|
192
|
+
### Nova 2 Sonic Capabilities
|
|
193
|
+
|
|
194
|
+
Amazon Nova 2 Sonic is a unified speech-to-speech foundation model that delivers:
|
|
195
|
+
|
|
196
|
+
- **Realtime bidirectional streaming** - Low-latency, natural conversations
|
|
197
|
+
- **Multilingual support** - English, French, Italian, German, Spanish, Portuguese, Hindi
|
|
198
|
+
- **Automatic language mirroring** - Responds in the user's spoken language
|
|
199
|
+
- **Polyglot voices** - Matthew and Tiffany can seamlessly switch between languages within a single conversation, ideal for multilingual applications
|
|
200
|
+
- **16 expressive voices** - Multiple voices per language with natural prosody
|
|
201
|
+
- **Function calling** - Built-in tool use and agentic workflows
|
|
202
|
+
- **Interruption handling** - Graceful handling without losing context
|
|
203
|
+
- **Noise robustness** - Works in real-world environments
|
|
204
|
+
- **Text input support** (Nova 2.0 only) - Programmatic text prompting
|
|
205
|
+
|
|
206
|
+
### Model Selection
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
from livekit.plugins import aws
|
|
210
|
+
|
|
211
|
+
# Nova Sonic 1.0 (audio-only, original model)
|
|
212
|
+
model = aws.realtime.RealtimeModel.with_nova_sonic_1()
|
|
213
|
+
|
|
214
|
+
# Nova Sonic 2.0 (audio + text input, latest)
|
|
215
|
+
model = aws.realtime.RealtimeModel.with_nova_sonic_2()
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
### Voice Selection
|
|
219
|
+
|
|
220
|
+
Voices are specified as lowercase strings. Import `SONIC1_VOICES` or `SONIC2_VOICES` type hints for IDE autocomplete.
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
from livekit.plugins.aws.experimental.realtime import SONIC2_VOICES
|
|
224
|
+
|
|
225
|
+
model = aws.realtime.RealtimeModel.with_nova_sonic_2(
|
|
226
|
+
voice="carolina" # Portuguese, feminine
|
|
227
|
+
)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
#### Nova Sonic 1.0 Voices (11 voices)
|
|
231
|
+
|
|
232
|
+
- **English (US)**: `matthew`, `tiffany`
|
|
233
|
+
- **English (GB)**: `amy`
|
|
234
|
+
- **Spanish**: `lupe`, `carlos`
|
|
235
|
+
- **French**: `ambre`, `florian`
|
|
236
|
+
- **German**: `greta`, `lennart`
|
|
237
|
+
- **Italian**: `beatrice`, `lorenzo`
|
|
238
|
+
|
|
239
|
+
#### Nova Sonic 2.0 Voices (16 voices)
|
|
240
|
+
|
|
241
|
+
- **English (US)**: `matthew` (polyglot), `tiffany` (polyglot), `olivia`
|
|
242
|
+
- **English (GB)**: `amy`
|
|
243
|
+
- **Spanish**: `lupe`, `carlos`
|
|
244
|
+
- **French**: `ambre`, `florian`
|
|
245
|
+
- **German**: `tina`, `lennart`
|
|
246
|
+
- **Italian**: `beatrice`, `lorenzo`
|
|
247
|
+
- **Portuguese (Brazilian)**: `carolina`, `leo`
|
|
248
|
+
- **Hindi**: `arjun`, `kiara`
|
|
249
|
+
|
|
250
|
+
**Note**: Matthew and Tiffany in Nova 2.0 support polyglot mode, seamlessly switching between languages within a single conversation.
|
|
251
|
+
|
|
252
|
+
### Text Prompting with `generate_reply()`
|
|
253
|
+
|
|
254
|
+
Nova 2 Sonic supports programmatic text input to trigger agent responses:
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
class Assistant(Agent):
|
|
258
|
+
async def on_enter(self):
|
|
259
|
+
# Make the agent speak first with a greeting
|
|
260
|
+
await self.session.generate_reply(
|
|
261
|
+
instructions="Greet the user and introduce your capabilities"
|
|
262
|
+
)
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
#### `instructions` vs `user_input`
|
|
266
|
+
|
|
267
|
+
The `generate_reply()` method accepts two parameters with different behaviors:
|
|
268
|
+
|
|
269
|
+
**`instructions`** - System-level commands (recommended):
|
|
270
|
+
```python
|
|
271
|
+
await session.generate_reply(
|
|
272
|
+
instructions="Greet the user warmly and ask how you can help"
|
|
273
|
+
)
|
|
274
|
+
```
|
|
275
|
+
- Sent as a system prompt/command to the model
|
|
276
|
+
- Triggers immediate generation
|
|
277
|
+
- Does not appear in conversation history as user message
|
|
278
|
+
- Use for: Agent-initiated speech, prompting specific behaviors
|
|
279
|
+
|
|
280
|
+
**`user_input`** - Simulated user messages:
|
|
281
|
+
```python
|
|
282
|
+
await session.generate_reply(
|
|
283
|
+
user_input="Hello, I need help with my account"
|
|
284
|
+
)
|
|
285
|
+
```
|
|
286
|
+
- Sent as interactive USER role content
|
|
287
|
+
- Added to Nova's conversation context
|
|
288
|
+
- Triggers generation as if user spoke
|
|
289
|
+
- Use for: Testing, simulating user input, programmatic conversations
|
|
290
|
+
|
|
291
|
+
**When to use each:**
|
|
292
|
+
- **Agent greetings**: Use `instructions` - agent should speak without user input
|
|
293
|
+
- **Guided responses**: Use `instructions` - direct the agent's next action
|
|
294
|
+
- **Simulated conversations**: Use `user_input` - test multi-turn dialogs
|
|
295
|
+
- **Programmatic user input**: Use `user_input` - inject text as if user spoke
|
|
296
|
+
|
|
297
|
+
### Turn-Taking Sensitivity
|
|
298
|
+
|
|
299
|
+
Control how quickly the agent responds to pauses:
|
|
300
|
+
|
|
301
|
+
```python
|
|
302
|
+
model = aws.realtime.RealtimeModel.with_nova_sonic_2(
|
|
303
|
+
turn_detection="MEDIUM" # HIGH, MEDIUM (default), LOW
|
|
304
|
+
)
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
- **HIGH**: Fast responses, may interrupt slower speakers
|
|
308
|
+
- **MEDIUM**: Balanced, works for most use cases (recommended)
|
|
309
|
+
- **LOW**: Patient, better for thoughtful or hesitant speakers
|
|
310
|
+
|
|
311
|
+
### Complete Example
|
|
312
|
+
|
|
313
|
+
```python
|
|
314
|
+
from livekit import agents
|
|
315
|
+
from livekit.agents import Agent, AgentSession
|
|
316
|
+
from livekit.plugins import aws
|
|
317
|
+
from dotenv import load_dotenv
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
load_dotenv()
|
|
321
|
+
|
|
322
|
+
class Assistant(Agent):
|
|
323
|
+
def __init__(self):
|
|
324
|
+
super().__init__(
|
|
325
|
+
instructions="You are a helpful voice assistant powered by Amazon Nova Sonic 2.0."
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
async def on_enter(self):
|
|
329
|
+
await self.session.generate_reply(
|
|
330
|
+
instructions="Greet the user and offer assistance"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
server = agents.AgentServer()
|
|
334
|
+
|
|
335
|
+
@server.rtc_session()
|
|
336
|
+
async def entrypoint(ctx: agents.JobContext):
|
|
337
|
+
await ctx.connect()
|
|
338
|
+
|
|
339
|
+
session = AgentSession(
|
|
340
|
+
llm=aws.realtime.RealtimeModel.with_nova_sonic_2(
|
|
341
|
+
voice="matthew",
|
|
342
|
+
turn_detection="MEDIUM",
|
|
343
|
+
tool_choice="auto"
|
|
344
|
+
)
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
await session.start(room=ctx.room, agent=Assistant())
|
|
348
|
+
|
|
349
|
+
if __name__ == "__main__":
|
|
350
|
+
agents.cli.run_app(server)
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
## Pipeline Mode (STT + LLM + TTS)
|
|
354
|
+
|
|
355
|
+
For more control over individual components, use pipeline mode:
|
|
356
|
+
|
|
357
|
+
```python
|
|
358
|
+
from livekit.plugins import aws, silero
|
|
359
|
+
|
|
360
|
+
session = AgentSession(
|
|
361
|
+
stt=aws.STT(), # AWS Transcribe
|
|
362
|
+
llm=aws.LLM(), # Nova 2 Lite (default)
|
|
363
|
+
tts=aws.TTS(), # Amazon Polly
|
|
364
|
+
vad=silero.VAD.load(),
|
|
365
|
+
)
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
### Nova 2 Lite
|
|
369
|
+
|
|
370
|
+
Amazon Nova 2 Lite is a fast, cost-effective reasoning model optimized for everyday AI workloads:
|
|
371
|
+
|
|
372
|
+
- **Lightning-fast processing** - Very low latency for real-time conversations
|
|
373
|
+
- **Cost-effective** - Industry-leading price-performance
|
|
374
|
+
- **Multimodal inputs** - Text and images ([source](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html))
|
|
375
|
+
- **1 million token context window** - Handle long conversations and complex context ([source](https://aws.amazon.com/blogs/aws/introducing-amazon-nova-2-lite-a-fast-cost-effective-reasoning-model/))
|
|
376
|
+
- **Agentic workflows** - RAG systems, function calling, tool use
|
|
377
|
+
- **Fine-tuning support** - Customize for your specific use case
|
|
378
|
+
|
|
379
|
+
Ideal for pipeline mode where you need fast, accurate LLM responses in voice applications.
|
|
380
|
+
|
|
381
|
+
## Resources
|
|
382
|
+
|
|
383
|
+
- [LiveKit Agents Documentation](https://docs.livekit.io/agents/)
|
|
384
|
+
- [AWS Nova Documentation](https://docs.aws.amazon.com/nova/latest/userguide/speech.html)
|
|
385
|
+
- [Example: realtime_joke_teller.py](https://github.com/livekit/agents/blob/main/examples/voice_agents/realtime_joke_teller.py)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
livekit/plugins/aws/__init__.py,sha256=dCZISj1yZG0WZTojk3sU-Ub4PK1ThCVhamrl9k_NbBw,2047
|
|
2
|
+
livekit/plugins/aws/llm.py,sha256=jupj91Yi-bxrgEi62JP6oIjjTzl4if5IOxR-8QVSvSM,13042
|
|
3
|
+
livekit/plugins/aws/log.py,sha256=S5ICcsnwshZhMG0HPmc_lI3mtHmcY4oQMJBsnnho-bM,289
|
|
4
|
+
livekit/plugins/aws/models.py,sha256=J4yzik9sR68RPZpR1ubRQ9hdn14D9IwA3KaRvAf5tAE,734
|
|
5
|
+
livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
livekit/plugins/aws/stt.py,sha256=2bcqwtC9MbHaLObx445-Vzj2-AstyGmAzbK2RNrbIy4,13584
|
|
7
|
+
livekit/plugins/aws/tts.py,sha256=oav-XWf9ysVGCmERWej6BgACu8vsLbRo9vFGpo9N6Ec,7184
|
|
8
|
+
livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
|
|
9
|
+
livekit/plugins/aws/version.py,sha256=Nr6Pqjd3G4o4xsBcOohDAA8MX74m67zVm0Ti65IKCsM,600
|
|
10
|
+
livekit/plugins/aws/experimental/realtime/__init__.py,sha256=w-Y5TEWOFGfHlTtlEhpvKPxaqbHE2hKGdSFvmzZs8uE,278
|
|
11
|
+
livekit/plugins/aws/experimental/realtime/events.py,sha256=2UQHHMgZZFCD3qFwpaj6SBuEKN1AlOhU9PBOaQeMSZU,16836
|
|
12
|
+
livekit/plugins/aws/experimental/realtime/pretty_printer.py,sha256=KN7KPrfQu8cU7ff34vFAtfrd1umUSTVNKXQU7D8AMiM,1442
|
|
13
|
+
livekit/plugins/aws/experimental/realtime/realtime_model.py,sha256=TQoAcMfgMjcWsDe4pC0DuqhMu_e62M5ai-vZ0w8Zt9A,97853
|
|
14
|
+
livekit/plugins/aws/experimental/realtime/turn_tracker.py,sha256=kGk-2nZDZPviq_8AzrI6TlCqhWe8MGItBZf4fxl8Bbw,6354
|
|
15
|
+
livekit/plugins/aws/experimental/realtime/types.py,sha256=lEiIV1bAUn0CPQzRQaooPUrCCm2_I6ZKfGBWxQ_5HUc,1308
|
|
16
|
+
livekit_plugins_aws-1.3.9.dist-info/METADATA,sha256=luHDElv87SCHwAovRsQOmIk8E-5fTzlONMJ69m_VDqU,12741
|
|
17
|
+
livekit_plugins_aws-1.3.9.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
18
|
+
livekit_plugins_aws-1.3.9.dist-info/RECORD,,
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: livekit-plugins-aws
|
|
3
|
-
Version: 1.0.0rc6
|
|
4
|
-
Summary: LiveKit Agents Plugin for services from AWS
|
|
5
|
-
Project-URL: Documentation, https://docs.livekit.io
|
|
6
|
-
Project-URL: Website, https://livekit.io/
|
|
7
|
-
Project-URL: Source, https://github.com/livekit/agents
|
|
8
|
-
Author-email: LiveKit <hello@livekit.io>
|
|
9
|
-
License-Expression: Apache-2.0
|
|
10
|
-
Keywords: audio,aws,livekit,realtime,video,webrtc
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
-
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
18
|
-
Classifier: Topic :: Multimedia :: Video
|
|
19
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
-
Requires-Python: >=3.9.0
|
|
21
|
-
Requires-Dist: aiobotocore==2.19.0
|
|
22
|
-
Requires-Dist: amazon-transcribe>=0.6.2
|
|
23
|
-
Requires-Dist: boto3==1.36.3
|
|
24
|
-
Requires-Dist: livekit-agents>=1.0.0.rc6
|
|
25
|
-
Description-Content-Type: text/markdown
|
|
26
|
-
|
|
27
|
-
# LiveKit Plugins AWS
|
|
28
|
-
|
|
29
|
-
Agent Framework plugin for services from AWS.
|
|
30
|
-
|
|
31
|
-
- aws polly for tts
|
|
32
|
-
- aws transcribe for stt
|
|
33
|
-
- aws bedrock for llm
|
|
34
|
-
|
|
35
|
-
## Installation
|
|
36
|
-
|
|
37
|
-
```bash
|
|
38
|
-
pip install livekit-plugins-aws
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
## Pre-requisites
|
|
42
|
-
|
|
43
|
-
You'll need to specify an AWS Access Key and a Deployment Region. They can be set as environment variables: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` and `AWS_DEFAULT_REGION`, respectively.
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
livekit/plugins/aws/__init__.py,sha256=Ea-hK7QdutnwdZvvs9K2fiR8RWJqz2JcONxXnV1kXF0,977
|
|
2
|
-
livekit/plugins/aws/llm.py,sha256=rIji8jLIyV-xfrDzDewqQiHIzV-oZAE9Cj6p6bba6Nw,11353
|
|
3
|
-
livekit/plugins/aws/log.py,sha256=jFief0Xhv0n_F6sp6UFu9VKxs2bXNVGAfYGmEYfR_2Q,66
|
|
4
|
-
livekit/plugins/aws/models.py,sha256=Nf8RFmDulW7h03dG2lERTog3mgDK0TbLvW0eGOncuEE,704
|
|
5
|
-
livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
livekit/plugins/aws/stt.py,sha256=j2vEsoixFcsyTbCNhW5EWea5CEv3K-DzL9wmweebf3o,8030
|
|
7
|
-
livekit/plugins/aws/tts.py,sha256=4EOI4VabvhjCcDyp_QoDhlXOdrMrKpnmNIDTeoJ8kcU,7601
|
|
8
|
-
livekit/plugins/aws/utils.py,sha256=FBUOsVNC0J2-_bfdRMbAtny72Tqji3UnC0RZdMND7jo,5011
|
|
9
|
-
livekit/plugins/aws/version.py,sha256=PDOEKN5zsYLQdbfAm5Di6G1sFYANNA0LqJR8zZzeghg,604
|
|
10
|
-
livekit_plugins_aws-1.0.0rc6.dist-info/METADATA,sha256=H7G8v1bskBEj88pLijagWXR_uG-LwkUKnLwLs0R8JU8,1483
|
|
11
|
-
livekit_plugins_aws-1.0.0rc6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
12
|
-
livekit_plugins_aws-1.0.0rc6.dist-info/RECORD,,
|