livekit-plugins-aws 1.2.4__py3-none-any.whl → 1.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-aws might be problematic. Click here for more details.

@@ -51,6 +51,8 @@ class _LLMOptions:
51
51
  max_output_tokens: NotGivenOr[int]
52
52
  top_p: NotGivenOr[float]
53
53
  additional_request_fields: NotGivenOr[dict[str, Any]]
54
+ cache_system: bool
55
+ cache_tools: bool
54
56
 
55
57
 
56
58
  class LLM(llm.LLM):
@@ -66,6 +68,8 @@ class LLM(llm.LLM):
66
68
  top_p: NotGivenOr[float] = NOT_GIVEN,
67
69
  tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
68
70
  additional_request_fields: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
71
+ cache_system: bool = False,
72
+ cache_tools: bool = False,
69
73
  session: aioboto3.Session | None = None,
70
74
  ) -> None:
71
75
  """
@@ -87,6 +91,8 @@ class LLM(llm.LLM):
87
91
  top_p (float, optional): The nucleus sampling probability for response generation. Defaults to None.
88
92
  tool_choice (ToolChoice, optional): Specifies whether to use tools during response generation. Defaults to "auto".
89
93
  additional_request_fields (dict[str, Any], optional): Additional request fields to send to the AWS Bedrock Converse API. Defaults to None.
94
+ cache_system (bool, optional): Caches system messages to reduce token usage. Defaults to False.
95
+ cache_tools (bool, optional): Caches tool definitions to reduce token usage. Defaults to False.
90
96
  session (aioboto3.Session, optional): Optional aioboto3 session to use.
91
97
  """ # noqa: E501
92
98
  super().__init__()
@@ -111,12 +117,18 @@ class LLM(llm.LLM):
111
117
  max_output_tokens=max_output_tokens,
112
118
  top_p=top_p,
113
119
  additional_request_fields=additional_request_fields,
120
+ cache_system=cache_system,
121
+ cache_tools=cache_tools,
114
122
  )
115
123
 
116
124
  @property
117
125
  def model(self) -> str:
118
126
  return self._opts.model
119
127
 
128
+ @property
129
+ def provider(self) -> str:
130
+ return "AWS Bedrock"
131
+
120
132
  def chat(
121
133
  self,
122
134
  *,
@@ -140,7 +152,11 @@ class LLM(llm.LLM):
140
152
  if not tools:
141
153
  return None
142
154
 
143
- tool_config: dict[str, Any] = {"tools": to_fnc_ctx(tools)}
155
+ tools_list = to_fnc_ctx(tools)
156
+ if self._opts.cache_tools:
157
+ tools_list.append({"cachePoint": {"type": "default"}})
158
+
159
+ tool_config: dict[str, Any] = {"tools": tools_list}
144
160
  tool_choice = (
145
161
  cast(ToolChoice, tool_choice) if is_given(tool_choice) else self._opts.tool_choice
146
162
  )
@@ -162,7 +178,12 @@ class LLM(llm.LLM):
162
178
  messages, extra_data = chat_ctx.to_provider_format(format="aws")
163
179
  opts["messages"] = messages
164
180
  if extra_data.system_messages:
165
- opts["system"] = [{"text": content} for content in extra_data.system_messages]
181
+ system_messages: list[dict[str, str | dict]] = [
182
+ {"text": content} for content in extra_data.system_messages
183
+ ]
184
+ if self._opts.cache_system:
185
+ system_messages.append({"cachePoint": {"type": "default"}})
186
+ opts["system"] = system_messages
166
187
 
167
188
  inference_config: dict[str, Any] = {}
168
189
  if is_given(self._opts.max_output_tokens):
@@ -46,3 +46,4 @@ TTSLanguages = Literal[
46
46
  ]
47
47
 
48
48
  TTSEncoding = Literal["mp3"]
49
+ TTSTextType = Literal["text", "ssml"]
@@ -16,10 +16,11 @@ import asyncio
16
16
  import os
17
17
  from dataclasses import dataclass
18
18
 
19
- from amazon_transcribe.auth import AwsCrtCredentialResolver
19
+ from amazon_transcribe.auth import AwsCrtCredentialResolver, CredentialResolver, Credentials
20
20
  from amazon_transcribe.client import TranscribeStreamingClient
21
21
  from amazon_transcribe.exceptions import BadRequestException
22
22
  from amazon_transcribe.model import Result, StartStreamTranscriptionEventStream, TranscriptEvent
23
+ from awscrt.auth import AwsCredentialsProvider # type: ignore[import-untyped]
23
24
 
24
25
  from livekit import rtc
25
26
  from livekit.agents import (
@@ -71,6 +72,7 @@ class STT(stt.STT):
71
72
  enable_partial_results_stabilization: NotGivenOr[bool] = NOT_GIVEN,
72
73
  partial_results_stability: NotGivenOr[str] = NOT_GIVEN,
73
74
  language_model_name: NotGivenOr[str] = NOT_GIVEN,
75
+ credentials: NotGivenOr[Credentials] = NOT_GIVEN,
74
76
  ):
75
77
  super().__init__(capabilities=stt.STTCapabilities(streaming=True, interim_results=True))
76
78
 
@@ -94,6 +96,20 @@ class STT(stt.STT):
94
96
  region=region,
95
97
  )
96
98
 
99
+ self._credentials = credentials if is_given(credentials) else None
100
+
101
+ @property
102
+ def model(self) -> str:
103
+ return (
104
+ self._config.language_model_name
105
+ if is_given(self._config.language_model_name)
106
+ else "unknown"
107
+ )
108
+
109
+ @property
110
+ def provider(self) -> str:
111
+ return "Amazon Transcribe"
112
+
97
113
  async def aclose(self) -> None:
98
114
  await super().aclose()
99
115
 
@@ -112,7 +128,9 @@ class STT(stt.STT):
112
128
  language: NotGivenOr[str] = NOT_GIVEN,
113
129
  conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
114
130
  ) -> SpeechStream:
115
- return SpeechStream(stt=self, conn_options=conn_options, opts=self._config)
131
+ return SpeechStream(
132
+ stt=self, conn_options=conn_options, opts=self._config, credentials=self._credentials
133
+ )
116
134
 
117
135
 
118
136
  class SpeechStream(stt.SpeechStream):
@@ -121,15 +139,37 @@ class SpeechStream(stt.SpeechStream):
121
139
  stt: STT,
122
140
  opts: STTOptions,
123
141
  conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
142
+ credentials: Credentials | None = None,
124
143
  ) -> None:
125
144
  super().__init__(stt=stt, conn_options=conn_options, sample_rate=opts.sample_rate)
126
145
  self._opts = opts
146
+ self._credentials = credentials
147
+
148
+ def _credential_resolver(self) -> CredentialResolver:
149
+ if self._credentials is None:
150
+ return AwsCrtCredentialResolver(None) # type: ignore
151
+
152
+ credentials = self._credentials
153
+
154
+ class CustomAwsCrtCredentialResolver(CredentialResolver):
155
+ def __init__(self) -> None:
156
+ self._crt_resolver = AwsCredentialsProvider.new_static(
157
+ credentials.access_key_id,
158
+ credentials.secret_access_key,
159
+ credentials.session_token,
160
+ )
161
+
162
+ async def get_credentials(self) -> Credentials | None:
163
+ credentials = await asyncio.wrap_future(self._crt_resolver.get_credentials())
164
+ return credentials # type: ignore[no-any-return]
165
+
166
+ return CustomAwsCrtCredentialResolver()
127
167
 
128
168
  async def _run(self) -> None:
129
169
  while True:
130
170
  client = TranscribeStreamingClient(
131
171
  region=self._opts.region,
132
- credential_resolver=AwsCrtCredentialResolver(None), # type: ignore
172
+ credential_resolver=self._credential_resolver(),
133
173
  )
134
174
 
135
175
  live_config = {
@@ -192,7 +232,7 @@ class SpeechStream(stt.SpeechStream):
192
232
  self._event_ch.send_nowait(
193
233
  stt.SpeechEvent(
194
234
  type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
195
- alternatives=[_streaming_recognize_response_to_speech_data(resp)],
235
+ alternatives=[self._streaming_recognize_response_to_speech_data(resp)],
196
236
  )
197
237
  )
198
238
 
@@ -200,20 +240,22 @@ class SpeechStream(stt.SpeechStream):
200
240
  self._event_ch.send_nowait(
201
241
  stt.SpeechEvent(
202
242
  type=stt.SpeechEventType.FINAL_TRANSCRIPT,
203
- alternatives=[_streaming_recognize_response_to_speech_data(resp)],
243
+ alternatives=[self._streaming_recognize_response_to_speech_data(resp)],
204
244
  )
205
245
  )
206
246
 
207
247
  if not resp.is_partial:
208
248
  self._event_ch.send_nowait(stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH))
209
249
 
250
+ def _streaming_recognize_response_to_speech_data(self, resp: Result) -> stt.SpeechData:
251
+ confidence = 0.0
252
+ if resp.alternatives and (items := resp.alternatives[0].items):
253
+ confidence = items[0].confidence or 0.0
210
254
 
211
- def _streaming_recognize_response_to_speech_data(resp: Result) -> stt.SpeechData:
212
- data = stt.SpeechData(
213
- language="en-US",
214
- start_time=resp.start_time if resp.start_time else 0.0,
215
- end_time=resp.end_time if resp.end_time else 0.0,
216
- text=resp.alternatives[0].transcript if resp.alternatives else "",
217
- )
218
-
219
- return data
255
+ return stt.SpeechData(
256
+ language=resp.language_code or self._opts.language,
257
+ start_time=resp.start_time if resp.start_time is not None else 0.0,
258
+ end_time=resp.end_time if resp.end_time is not None else 0.0,
259
+ text=resp.alternatives[0].transcript if resp.alternatives else "",
260
+ confidence=confidence,
261
+ )
@@ -13,6 +13,7 @@
13
13
  from __future__ import annotations
14
14
 
15
15
  from dataclasses import dataclass, replace
16
+ from typing import cast
16
17
 
17
18
  import aioboto3 # type: ignore
18
19
  import botocore # type: ignore
@@ -32,11 +33,12 @@ from livekit.agents.types import (
32
33
  )
33
34
  from livekit.agents.utils import is_given
34
35
 
35
- from .models import TTSLanguages, TTSSpeechEngine
36
+ from .models import TTSLanguages, TTSSpeechEngine, TTSTextType
36
37
  from .utils import _strip_nones
37
38
 
38
39
  DEFAULT_SPEECH_ENGINE: TTSSpeechEngine = "generative"
39
40
  DEFAULT_VOICE = "Ruth"
41
+ DEFAULT_TEXT_TYPE: TTSTextType = "text"
40
42
 
41
43
 
42
44
  @dataclass
@@ -47,6 +49,7 @@ class _TTSOptions:
47
49
  region: str | None
48
50
  sample_rate: int
49
51
  language: TTSLanguages | str | None
52
+ text_type: TTSTextType
50
53
 
51
54
 
52
55
  class TTS(tts.TTS):
@@ -56,6 +59,7 @@ class TTS(tts.TTS):
56
59
  voice: str = "Ruth",
57
60
  language: NotGivenOr[TTSLanguages | str] = NOT_GIVEN,
58
61
  speech_engine: TTSSpeechEngine = "generative",
62
+ text_type: TTSTextType = "text",
59
63
  sample_rate: int = 16000,
60
64
  region: str | None = None,
61
65
  api_key: str | None = None,
@@ -71,10 +75,11 @@ class TTS(tts.TTS):
71
75
  See https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html for more details on the the AWS Polly TTS.
72
76
 
73
77
  Args:
74
- Voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth".
75
- language (TTS_LANGUAGE, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
78
+ voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth".
79
+ language (TTSLanguages, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
80
+ speech_engine(TTSSpeechEngine, optional): The engine to use for the synthesis. Defaults to "generative".
81
+ text_type(TTSTextType, optional): Type of text to synthesize. Use "ssml" for SSML-enhanced text. Defaults to "text".
76
82
  sample_rate(int, optional): The audio frequency specified in Hz. Defaults to 16000.
77
- speech_engine(TTS_SPEECH_ENGINE, optional): The engine to use for the synthesis. Defaults to "generative".
78
83
  region(str, optional): The region to use for the synthesis. Defaults to "us-east-1".
79
84
  api_key(str, optional): AWS access key id.
80
85
  api_secret(str, optional): AWS secret access key.
@@ -96,16 +101,42 @@ class TTS(tts.TTS):
96
101
  self._opts = _TTSOptions(
97
102
  voice=voice,
98
103
  speech_engine=speech_engine,
104
+ text_type=text_type,
99
105
  region=region or None,
100
106
  language=language or None,
101
107
  sample_rate=sample_rate,
102
108
  )
103
109
 
110
+ @property
111
+ def model(self) -> str:
112
+ return self._opts.speech_engine
113
+
114
+ @property
115
+ def provider(self) -> str:
116
+ return "Amazon Polly"
117
+
104
118
  def synthesize(
105
119
  self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
106
120
  ) -> ChunkedStream:
107
121
  return ChunkedStream(tts=self, text=text, conn_options=conn_options)
108
122
 
123
+ def update_options(
124
+ self,
125
+ *,
126
+ voice: NotGivenOr[str] = NOT_GIVEN,
127
+ language: NotGivenOr[str] = NOT_GIVEN,
128
+ speech_engine: NotGivenOr[TTSSpeechEngine] = NOT_GIVEN,
129
+ text_type: NotGivenOr[TTSTextType] = NOT_GIVEN,
130
+ ) -> None:
131
+ if is_given(voice):
132
+ self._opts.voice = voice
133
+ if is_given(language):
134
+ self._opts.language = language
135
+ if is_given(speech_engine):
136
+ self._opts.speech_engine = cast(TTSSpeechEngine, speech_engine)
137
+ if is_given(text_type):
138
+ self._opts.text_type = cast(TTSTextType, text_type)
139
+
109
140
 
110
141
  class ChunkedStream(tts.ChunkedStream):
111
142
  def __init__(
@@ -130,7 +161,7 @@ class ChunkedStream(tts.ChunkedStream):
130
161
  "OutputFormat": "mp3",
131
162
  "Engine": self._opts.speech_engine,
132
163
  "VoiceId": self._opts.voice,
133
- "TextType": "text",
164
+ "TextType": self._opts.text_type,
134
165
  "SampleRate": str(self._opts.sample_rate),
135
166
  "LanguageCode": self._opts.language,
136
167
  }
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.4"
15
+ __version__ = "1.3.2"
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-aws
3
- Version: 1.2.4
3
+ Version: 1.3.2
4
4
  Summary: LiveKit Agents Plugin for services from AWS
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
7
7
  Project-URL: Source, https://github.com/livekit/agents
8
8
  Author-email: LiveKit <hello@livekit.io>
9
9
  License-Expression: Apache-2.0
10
- Keywords: audio,aws,livekit,nova,realtime,sonic,video,webrtc
10
+ Keywords: ai,audio,aws,livekit,nova,realtime,sonic,video,voice
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: License :: OSI Approved :: Apache Software License
13
13
  Classifier: Programming Language :: Python :: 3
@@ -19,10 +19,11 @@ Classifier: Topic :: Multimedia :: Video
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
21
  Requires-Dist: aioboto3>=14.1.0
22
- Requires-Dist: amazon-transcribe>=0.6.2
23
- Requires-Dist: livekit-agents>=1.2.4
22
+ Requires-Dist: amazon-transcribe>=0.6.4
23
+ Requires-Dist: livekit-agents>=1.3.2
24
24
  Provides-Extra: realtime
25
25
  Requires-Dist: aws-sdk-bedrock-runtime==0.0.2; (python_version >= '3.12') and extra == 'realtime'
26
+ Requires-Dist: aws-sdk-signers==0.0.3; (python_version >= '3.12') and extra == 'realtime'
26
27
  Requires-Dist: boto3>1.35.10; extra == 'realtime'
27
28
  Description-Content-Type: text/markdown
28
29
 
@@ -1,17 +1,17 @@
1
1
  livekit/plugins/aws/__init__.py,sha256=dCZISj1yZG0WZTojk3sU-Ub4PK1ThCVhamrl9k_NbBw,2047
2
- livekit/plugins/aws/llm.py,sha256=SUPWhJTbQ6HZJEK7WYUADDo2BJZJl2EaRvfG05IobzU,12150
2
+ livekit/plugins/aws/llm.py,sha256=9adQTcg3hJA6XTw4xaRjCIKkxedbzpNBBW0Yub9pkhA,13001
3
3
  livekit/plugins/aws/log.py,sha256=S5ICcsnwshZhMG0HPmc_lI3mtHmcY4oQMJBsnnho-bM,289
4
- livekit/plugins/aws/models.py,sha256=ztI9n3r8OHGlQewWXiyQP3SaavFKHkw-VVuK9Xz4K1I,696
4
+ livekit/plugins/aws/models.py,sha256=J4yzik9sR68RPZpR1ubRQ9hdn14D9IwA3KaRvAf5tAE,734
5
5
  livekit/plugins/aws/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- livekit/plugins/aws/stt.py,sha256=zlUrIVVYsSMhtVzXVxHTypW_E2YpMD9EAqvW3lZYj1c,8888
7
- livekit/plugins/aws/tts.py,sha256=T5dVpTuIuzQimYNnkfXi5dRLmRldWySL4IcbkXjmJLM,6083
6
+ livekit/plugins/aws/stt.py,sha256=WXE25wXwCgKeTcoMe3AH9EUeomvvRtPpOTZ5JAfOUxk,10629
7
+ livekit/plugins/aws/tts.py,sha256=oav-XWf9ysVGCmERWej6BgACu8vsLbRo9vFGpo9N6Ec,7184
8
8
  livekit/plugins/aws/utils.py,sha256=nA5Ua1f4T-25Loar6EvlrKTXI9N-zpTIH7cdQkwGyGI,1518
9
- livekit/plugins/aws/version.py,sha256=RNHljWBeimzzNkMMbX3wjVENjAQ3D1xYW3wp8ijSE3s,600
9
+ livekit/plugins/aws/version.py,sha256=fSSiY4SPcmDoVdQmTTNkDwxlIEOPaqT_xdow1m-W9JQ,600
10
10
  livekit/plugins/aws/experimental/realtime/__init__.py,sha256=mm_TGZc9QAWSO-VOO3PdE8Y5R6xlWckXRZuiFUIHa-Q,287
11
11
  livekit/plugins/aws/experimental/realtime/events.py,sha256=ltdGEipE3ZOkjn7K6rKN6WSCUPJkVg-S88mUmQ_V00s,15981
12
12
  livekit/plugins/aws/experimental/realtime/pretty_printer.py,sha256=KN7KPrfQu8cU7ff34vFAtfrd1umUSTVNKXQU7D8AMiM,1442
13
- livekit/plugins/aws/experimental/realtime/realtime_model.py,sha256=JkFv4LnlME17v-yXVvZiFdFyKHZBkKlOffmbUAd7qYw,60403
13
+ livekit/plugins/aws/experimental/realtime/realtime_model.py,sha256=ksdw7X-wm5wiDoCur9srYTRV2eVadHOjAIIanNS9dUo,64568
14
14
  livekit/plugins/aws/experimental/realtime/turn_tracker.py,sha256=bcufaap-coeIYuK3ct1Is9W_UoefGYRmnJu7Mn5DCYU,6002
15
- livekit_plugins_aws-1.2.4.dist-info/METADATA,sha256=ExAhD6Tb7l1DuATSpAUjHXECCXHEzrcA43oNlmPmrFQ,1989
16
- livekit_plugins_aws-1.2.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
- livekit_plugins_aws-1.2.4.dist-info/RECORD,,
15
+ livekit_plugins_aws-1.3.2.dist-info/METADATA,sha256=BdW-6sTdtruSTfvIgdDU_m1VQb2A6E4z9HaNSeVPpNQ,2081
16
+ livekit_plugins_aws-1.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
+ livekit_plugins_aws-1.3.2.dist-info/RECORD,,