livekit-plugins-aws 1.2.5__tar.gz → 1.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-aws might be problematic. Click here for more details.

Files changed (18) hide show
  1. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/PKG-INFO +3 -3
  2. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/models.py +1 -0
  3. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/stt.py +14 -12
  4. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/tts.py +10 -5
  5. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/version.py +1 -1
  6. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/pyproject.toml +2 -2
  7. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/.gitignore +0 -0
  8. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/README.md +0 -0
  9. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/__init__.py +0 -0
  10. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/experimental/realtime/__init__.py +0 -0
  11. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/experimental/realtime/events.py +0 -0
  12. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/experimental/realtime/pretty_printer.py +0 -0
  13. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/experimental/realtime/realtime_model.py +0 -0
  14. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/experimental/realtime/turn_tracker.py +0 -0
  15. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/llm.py +0 -0
  16. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/log.py +0 -0
  17. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/py.typed +0 -0
  18. {livekit_plugins_aws-1.2.5 → livekit_plugins_aws-1.2.7}/livekit/plugins/aws/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-aws
3
- Version: 1.2.5
3
+ Version: 1.2.7
4
4
  Summary: LiveKit Agents Plugin for services from AWS
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -19,8 +19,8 @@ Classifier: Topic :: Multimedia :: Video
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: >=3.9.0
21
21
  Requires-Dist: aioboto3>=14.1.0
22
- Requires-Dist: amazon-transcribe>=0.6.2
23
- Requires-Dist: livekit-agents>=1.2.5
22
+ Requires-Dist: amazon-transcribe>=0.6.4
23
+ Requires-Dist: livekit-agents>=1.2.7
24
24
  Provides-Extra: realtime
25
25
  Requires-Dist: aws-sdk-bedrock-runtime==0.0.2; (python_version >= '3.12') and extra == 'realtime'
26
26
  Requires-Dist: boto3>1.35.10; extra == 'realtime'
@@ -46,3 +46,4 @@ TTSLanguages = Literal[
46
46
  ]
47
47
 
48
48
  TTSEncoding = Literal["mp3"]
49
+ TTSTextType = Literal["text", "ssml"]
@@ -192,7 +192,7 @@ class SpeechStream(stt.SpeechStream):
192
192
  self._event_ch.send_nowait(
193
193
  stt.SpeechEvent(
194
194
  type=stt.SpeechEventType.INTERIM_TRANSCRIPT,
195
- alternatives=[_streaming_recognize_response_to_speech_data(resp)],
195
+ alternatives=[self._streaming_recognize_response_to_speech_data(resp)],
196
196
  )
197
197
  )
198
198
 
@@ -200,20 +200,22 @@ class SpeechStream(stt.SpeechStream):
200
200
  self._event_ch.send_nowait(
201
201
  stt.SpeechEvent(
202
202
  type=stt.SpeechEventType.FINAL_TRANSCRIPT,
203
- alternatives=[_streaming_recognize_response_to_speech_data(resp)],
203
+ alternatives=[self._streaming_recognize_response_to_speech_data(resp)],
204
204
  )
205
205
  )
206
206
 
207
207
  if not resp.is_partial:
208
208
  self._event_ch.send_nowait(stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH))
209
209
 
210
-
211
- def _streaming_recognize_response_to_speech_data(resp: Result) -> stt.SpeechData:
212
- data = stt.SpeechData(
213
- language="en-US",
214
- start_time=resp.start_time if resp.start_time else 0.0,
215
- end_time=resp.end_time if resp.end_time else 0.0,
216
- text=resp.alternatives[0].transcript if resp.alternatives else "",
217
- )
218
-
219
- return data
210
+ def _streaming_recognize_response_to_speech_data(self, resp: Result) -> stt.SpeechData:
211
+ confidence = 0.0
212
+ if resp.alternatives and (items := resp.alternatives[0].items):
213
+ confidence = items[0].confidence or 0.0
214
+
215
+ return stt.SpeechData(
216
+ language=resp.language_code or self._opts.language,
217
+ start_time=resp.start_time if resp.start_time is not None else 0.0,
218
+ end_time=resp.end_time if resp.end_time is not None else 0.0,
219
+ text=resp.alternatives[0].transcript if resp.alternatives else "",
220
+ confidence=confidence,
221
+ )
@@ -32,11 +32,12 @@ from livekit.agents.types import (
32
32
  )
33
33
  from livekit.agents.utils import is_given
34
34
 
35
- from .models import TTSLanguages, TTSSpeechEngine
35
+ from .models import TTSLanguages, TTSSpeechEngine, TTSTextType
36
36
  from .utils import _strip_nones
37
37
 
38
38
  DEFAULT_SPEECH_ENGINE: TTSSpeechEngine = "generative"
39
39
  DEFAULT_VOICE = "Ruth"
40
+ DEFAULT_TEXT_TYPE: TTSTextType = "text"
40
41
 
41
42
 
42
43
  @dataclass
@@ -47,6 +48,7 @@ class _TTSOptions:
47
48
  region: str | None
48
49
  sample_rate: int
49
50
  language: TTSLanguages | str | None
51
+ text_type: TTSTextType
50
52
 
51
53
 
52
54
  class TTS(tts.TTS):
@@ -56,6 +58,7 @@ class TTS(tts.TTS):
56
58
  voice: str = "Ruth",
57
59
  language: NotGivenOr[TTSLanguages | str] = NOT_GIVEN,
58
60
  speech_engine: TTSSpeechEngine = "generative",
61
+ text_type: TTSTextType = "text",
59
62
  sample_rate: int = 16000,
60
63
  region: str | None = None,
61
64
  api_key: str | None = None,
@@ -71,10 +74,11 @@ class TTS(tts.TTS):
71
74
  See https://docs.aws.amazon.com/polly/latest/dg/API_SynthesizeSpeech.html for more details on the the AWS Polly TTS.
72
75
 
73
76
  Args:
74
- Voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth".
75
- language (TTS_LANGUAGE, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
77
+ voice (TTSModels, optional): Voice ID to use for the synthesis. Defaults to "Ruth".
78
+ language (TTSLanguages, optional): language code for the Synthesize Speech request. This is only necessary if using a bilingual voice, such as Aditi, which can be used for either Indian English (en-IN) or Hindi (hi-IN).
79
+ speech_engine(TTSSpeechEngine, optional): The engine to use for the synthesis. Defaults to "generative".
80
+ text_type(TTSTextType, optional): Type of text to synthesize. Use "ssml" for SSML-enhanced text. Defaults to "text".
76
81
  sample_rate(int, optional): The audio frequency specified in Hz. Defaults to 16000.
77
- speech_engine(TTS_SPEECH_ENGINE, optional): The engine to use for the synthesis. Defaults to "generative".
78
82
  region(str, optional): The region to use for the synthesis. Defaults to "us-east-1".
79
83
  api_key(str, optional): AWS access key id.
80
84
  api_secret(str, optional): AWS secret access key.
@@ -96,6 +100,7 @@ class TTS(tts.TTS):
96
100
  self._opts = _TTSOptions(
97
101
  voice=voice,
98
102
  speech_engine=speech_engine,
103
+ text_type=text_type,
99
104
  region=region or None,
100
105
  language=language or None,
101
106
  sample_rate=sample_rate,
@@ -130,7 +135,7 @@ class ChunkedStream(tts.ChunkedStream):
130
135
  "OutputFormat": "mp3",
131
136
  "Engine": self._opts.speech_engine,
132
137
  "VoiceId": self._opts.voice,
133
- "TextType": "text",
138
+ "TextType": self._opts.text_type,
134
139
  "SampleRate": str(self._opts.sample_rate),
135
140
  "LanguageCode": self._opts.language,
136
141
  }
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.5"
15
+ __version__ = "1.2.7"
@@ -23,9 +23,9 @@ classifiers = [
23
23
  "Programming Language :: Python :: 3 :: Only",
24
24
  ]
25
25
  dependencies = [
26
- "livekit-agents>=1.2.5",
26
+ "livekit-agents>=1.2.7",
27
27
  "aioboto3>=14.1.0",
28
- "amazon-transcribe>=0.6.2",
28
+ "amazon-transcribe>=0.6.4",
29
29
  ]
30
30
 
31
31
  [project.optional-dependencies]