livekit-plugins-hume 1.0.17__py3-none-any.whl → 1.0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of livekit-plugins-hume might be problematic. Click here for more details.
- livekit/plugins/hume/__init__.py +2 -6
- livekit/plugins/hume/tts.py +58 -72
- livekit/plugins/hume/version.py +1 -1
- {livekit_plugins_hume-1.0.17.dist-info → livekit_plugins_hume-1.0.19.dist-info}/METADATA +3 -3
- livekit_plugins_hume-1.0.19.dist-info/RECORD +9 -0
- livekit_plugins_hume-1.0.17.dist-info/RECORD +0 -9
- {livekit_plugins_hume-1.0.17.dist-info → livekit_plugins_hume-1.0.19.dist-info}/WHEEL +0 -0
livekit/plugins/hume/__init__.py
CHANGED
|
@@ -20,9 +20,7 @@ __version__ = "1.0.0"
|
|
|
20
20
|
from hume.tts import (
|
|
21
21
|
Format,
|
|
22
22
|
PostedContext,
|
|
23
|
-
|
|
24
|
-
PostedUtteranceVoiceWithId,
|
|
25
|
-
PostedUtteranceVoiceWithName,
|
|
23
|
+
PostedUtteranceVoice,
|
|
26
24
|
)
|
|
27
25
|
from livekit.agents import Plugin
|
|
28
26
|
|
|
@@ -32,10 +30,8 @@ from .tts import TTS
|
|
|
32
30
|
__all__ = [
|
|
33
31
|
"TTS",
|
|
34
32
|
"Format",
|
|
35
|
-
"PostedUtterance",
|
|
36
33
|
"PostedContext",
|
|
37
|
-
"
|
|
38
|
-
"PostedUtteranceVoiceWithId",
|
|
34
|
+
"PostedUtteranceVoice",
|
|
39
35
|
]
|
|
40
36
|
|
|
41
37
|
|
livekit/plugins/hume/tts.py
CHANGED
|
@@ -22,7 +22,7 @@ from dataclasses import dataclass
|
|
|
22
22
|
import aiohttp
|
|
23
23
|
|
|
24
24
|
from hume import AsyncHumeClient
|
|
25
|
-
from hume.tts import Format, FormatWav, PostedContext, PostedUtterance,
|
|
25
|
+
from hume.tts import Format, FormatWav, PostedContext, PostedUtterance, PostedUtteranceVoice
|
|
26
26
|
from livekit.agents import (
|
|
27
27
|
APIConnectionError,
|
|
28
28
|
APIConnectOptions,
|
|
@@ -39,31 +39,21 @@ from livekit.agents.types import (
|
|
|
39
39
|
from livekit.agents.utils import is_given
|
|
40
40
|
|
|
41
41
|
# Default audio settings
|
|
42
|
-
DEFAULT_SAMPLE_RATE =
|
|
42
|
+
DEFAULT_SAMPLE_RATE = 48000
|
|
43
43
|
DEFAULT_NUM_CHANNELS = 1
|
|
44
44
|
|
|
45
|
-
# Default TTS settings
|
|
46
|
-
DEFAULT_VOICE = PostedUtteranceVoiceWithName(name="Colton Rivers", provider="HUME_AI")
|
|
47
|
-
|
|
48
|
-
# text is required in PostedUtterance but it is declared as an empty string
|
|
49
|
-
# it will be overwritten when input tokens are received
|
|
50
|
-
DEFAULT_UTTERANCE = PostedUtterance(
|
|
51
|
-
voice=DEFAULT_VOICE, speed=1, trailing_silence=0.35, description="", text=""
|
|
52
|
-
)
|
|
53
|
-
|
|
54
45
|
|
|
55
46
|
@dataclass
|
|
56
47
|
class _TTSOptions:
|
|
57
48
|
"""TTS options for Hume API"""
|
|
58
49
|
|
|
59
50
|
api_key: str
|
|
60
|
-
|
|
51
|
+
voice: PostedUtteranceVoice | None
|
|
52
|
+
description: str | None
|
|
53
|
+
speed: float | None
|
|
61
54
|
context: PostedContext | None
|
|
62
55
|
format: Format
|
|
63
|
-
sample_rate: int
|
|
64
|
-
split_utterances: bool
|
|
65
56
|
strip_headers: bool
|
|
66
|
-
num_generations: int
|
|
67
57
|
instant_mode: bool
|
|
68
58
|
word_tokenizer: tokenize.WordTokenizer
|
|
69
59
|
|
|
@@ -72,35 +62,37 @@ class TTS(tts.TTS):
|
|
|
72
62
|
def __init__(
|
|
73
63
|
self,
|
|
74
64
|
*,
|
|
75
|
-
|
|
65
|
+
voice: NotGivenOr[PostedUtteranceVoice] = NOT_GIVEN,
|
|
66
|
+
description: NotGivenOr[str] = NOT_GIVEN,
|
|
67
|
+
speed: NotGivenOr[float] = NOT_GIVEN,
|
|
76
68
|
context: NotGivenOr[PostedContext] = NOT_GIVEN,
|
|
77
69
|
format: NotGivenOr[Format] = NOT_GIVEN,
|
|
78
|
-
split_utterances: bool = False,
|
|
79
|
-
num_generations: int = 1,
|
|
80
70
|
instant_mode: bool = False,
|
|
81
71
|
strip_headers: bool = True,
|
|
82
72
|
api_key: NotGivenOr[str] = NOT_GIVEN,
|
|
83
73
|
word_tokenizer: tokenize.WordTokenizer | None = None,
|
|
84
74
|
http_session: aiohttp.ClientSession | None = None,
|
|
85
|
-
sample_rate: int = 24000,
|
|
86
75
|
) -> None:
|
|
87
76
|
"""Initialize the Hume TTS client.
|
|
88
77
|
|
|
89
78
|
See https://dev.hume.ai/reference/text-to-speech-tts/synthesize-json-streaming for API doc
|
|
90
79
|
|
|
91
80
|
Args:
|
|
92
|
-
|
|
93
|
-
|
|
81
|
+
voice (NotGivenOr[PostedUtteranceVoice]): The voice, specified by name or id, to be
|
|
82
|
+
used. When no voice is specified, a novel voice will be generated based on the
|
|
83
|
+
text and optionally provided description.
|
|
84
|
+
description (NotGivenOr[str]): Natural language instructions describing how the
|
|
85
|
+
synthesized speech should sound, including but not limited to tone, intonation,
|
|
86
|
+
pacing, and accent. If a Voice is specified in the request, this description
|
|
87
|
+
serves as acting instructions. If no Voice is specified, a new voice is generated
|
|
88
|
+
based on this description.
|
|
89
|
+
speed: (NotGivenOr[float]): Adjusts the relative speaking rate on a non-linear scale
|
|
90
|
+
from 0.25 (much slower) to 3.0 (much faster), where 1.0 represents normal speaking
|
|
91
|
+
pace.
|
|
94
92
|
context (NotGivenOr[PostedContext]): Utterances to use as context for generating
|
|
95
93
|
consistent speech style and prosody across multiple requests.
|
|
96
94
|
format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
|
|
97
95
|
Defaults to WAV format.
|
|
98
|
-
split_utterances (bool): Controls how audio output is segmented in the response.
|
|
99
|
-
When enabled (True), input utterances are split into natural-sounding segments.
|
|
100
|
-
When disabled (False), maintains one-to-one mapping between input and output.
|
|
101
|
-
Defaults to False.
|
|
102
|
-
num_generations (int): Number of generations of the audio to produce.
|
|
103
|
-
Must be between 1 and 5. Defaults to 1.
|
|
104
96
|
instant_mode (bool): Enables ultra-low latency streaming, reducing time to first chunk.
|
|
105
97
|
Recommended for real-time applications. Only for streaming endpoints.
|
|
106
98
|
With this enabled, requests incur 10% higher cost. Defaults to False.
|
|
@@ -113,14 +105,13 @@ class TTS(tts.TTS):
|
|
|
113
105
|
If None, a basic word tokenizer will be used.
|
|
114
106
|
http_session (aiohttp.ClientSession | None): Optional HTTP session for API requests.
|
|
115
107
|
If None, a new session will be created.
|
|
116
|
-
sample_rate (int): Audio sample rate in Hz. Defaults to 24000.
|
|
117
108
|
"""
|
|
118
109
|
|
|
119
110
|
super().__init__(
|
|
120
111
|
capabilities=tts.TTSCapabilities(
|
|
121
112
|
streaming=False,
|
|
122
113
|
),
|
|
123
|
-
sample_rate=
|
|
114
|
+
sample_rate=DEFAULT_SAMPLE_RATE,
|
|
124
115
|
num_channels=DEFAULT_NUM_CHANNELS,
|
|
125
116
|
)
|
|
126
117
|
|
|
@@ -134,15 +125,12 @@ class TTS(tts.TTS):
|
|
|
134
125
|
word_tokenizer = tokenize.basic.WordTokenizer(ignore_punctuation=False)
|
|
135
126
|
|
|
136
127
|
self._opts = _TTSOptions(
|
|
137
|
-
|
|
138
|
-
if is_given(
|
|
139
|
-
else
|
|
128
|
+
voice=voice if is_given(voice) else None,
|
|
129
|
+
description=description if is_given(description) else None,
|
|
130
|
+
speed=speed if is_given(speed) else None,
|
|
140
131
|
context=context if is_given(context) else None,
|
|
141
132
|
format=format if is_given(format) else FormatWav(),
|
|
142
133
|
api_key=self._api_key,
|
|
143
|
-
sample_rate=self.sample_rate,
|
|
144
|
-
split_utterances=split_utterances,
|
|
145
|
-
num_generations=num_generations,
|
|
146
134
|
strip_headers=strip_headers,
|
|
147
135
|
instant_mode=instant_mode,
|
|
148
136
|
word_tokenizer=word_tokenizer,
|
|
@@ -159,26 +147,31 @@ class TTS(tts.TTS):
|
|
|
159
147
|
def update_options(
|
|
160
148
|
self,
|
|
161
149
|
*,
|
|
162
|
-
|
|
150
|
+
voice: NotGivenOr[PostedUtteranceVoice] = NOT_GIVEN,
|
|
151
|
+
description: NotGivenOr[str] = NOT_GIVEN,
|
|
152
|
+
speed: NotGivenOr[float] = NOT_GIVEN,
|
|
163
153
|
context: NotGivenOr[PostedContext] = NOT_GIVEN,
|
|
164
154
|
format: NotGivenOr[Format] = NOT_GIVEN,
|
|
165
|
-
split_utterances: NotGivenOr[bool] = NOT_GIVEN,
|
|
166
|
-
num_generations: NotGivenOr[int] = NOT_GIVEN,
|
|
167
155
|
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
|
|
168
156
|
strip_headers: NotGivenOr[bool] = NOT_GIVEN,
|
|
169
157
|
) -> None:
|
|
170
158
|
"""Update TTS options for synthesizing speech.
|
|
171
159
|
|
|
172
160
|
Args:
|
|
173
|
-
|
|
174
|
-
|
|
161
|
+
voice (NotGivenOr[PostedUtteranceVoice]): The voice, specified by name or id, to be
|
|
162
|
+
used. When no voice is specified, a novel voice will be generated based on the
|
|
163
|
+
text and optionally provided description.
|
|
164
|
+
description (NotGivenOr[str]): Natural language instructions describing how the
|
|
165
|
+
synthesized speech should sound, including but not limited to tone, intonation,
|
|
166
|
+
pacing, and accent. If a Voice is specified in the request, this description
|
|
167
|
+
serves as acting instructions. If no Voice is specified, a new voice is generated
|
|
168
|
+
based on this description.
|
|
169
|
+
speed: (NotGivenOr[float]): Adjusts the relative speaking rate on a non-linear scale
|
|
170
|
+
from 0.25 (much slower) to 3.0 (much faster), where 1.0 represents normal speaking
|
|
171
|
+
pace.
|
|
175
172
|
context (Optional[PostedContext]): Utterances to use as context for generating
|
|
176
173
|
consistent speech style and prosody across multiple requests.
|
|
177
174
|
format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
|
|
178
|
-
split_utterances (NotGivenOr[bool]): Controls how audio output is segmented.
|
|
179
|
-
When True, utterances are split into natural-sounding segments.
|
|
180
|
-
When False, maintains one-to-one mapping between input and output.
|
|
181
|
-
num_generations (NotGivenOr[int]): Number of speech generations to produce (1-5).
|
|
182
175
|
instant_mode (NotGivenOr[bool]): Enables ultra-low latency streaming.
|
|
183
176
|
Reduces time to first audio chunk, recommended for real-time applications.
|
|
184
177
|
Note: Incurs 10% higher cost when enabled.
|
|
@@ -187,26 +180,16 @@ class TTS(tts.TTS):
|
|
|
187
180
|
If disabled, each chunk’s audio will be its own audio file, each with its headers.
|
|
188
181
|
"""
|
|
189
182
|
|
|
190
|
-
if is_given(
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
self._opts.
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
speed=utterance_options.speed if utterance_options.speed else 1,
|
|
197
|
-
trailing_silence=utterance_options.trailing_silence
|
|
198
|
-
if utterance_options.trailing_silence
|
|
199
|
-
else 0.35,
|
|
200
|
-
text="",
|
|
201
|
-
)
|
|
183
|
+
if is_given(voice):
|
|
184
|
+
self._opts.voice = voice
|
|
185
|
+
if is_given(description):
|
|
186
|
+
self._opts.description = description
|
|
187
|
+
if is_given(speed):
|
|
188
|
+
self._opts.speed = speed
|
|
202
189
|
if is_given(format):
|
|
203
190
|
self._opts.format = format
|
|
204
191
|
if is_given(context):
|
|
205
192
|
self._opts.context = context
|
|
206
|
-
if is_given(split_utterances):
|
|
207
|
-
self._opts.split_utterances = split_utterances
|
|
208
|
-
if is_given(num_generations):
|
|
209
|
-
self._opts.num_generations = num_generations
|
|
210
193
|
if is_given(instant_mode):
|
|
211
194
|
self._opts.instant_mode = instant_mode
|
|
212
195
|
if is_given(strip_headers):
|
|
@@ -245,7 +228,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
|
245
228
|
request_id = utils.shortuuid()
|
|
246
229
|
|
|
247
230
|
decoder = utils.codecs.AudioStreamDecoder(
|
|
248
|
-
sample_rate=
|
|
231
|
+
sample_rate=DEFAULT_SAMPLE_RATE,
|
|
249
232
|
num_channels=DEFAULT_NUM_CHANNELS,
|
|
250
233
|
)
|
|
251
234
|
|
|
@@ -254,21 +237,24 @@ class ChunkedStream(tts.ChunkedStream):
|
|
|
254
237
|
try:
|
|
255
238
|
|
|
256
239
|
async def _decode_loop():
|
|
240
|
+
utterance_options = {
|
|
241
|
+
"voice": self._opts.voice,
|
|
242
|
+
"description": self._opts.description,
|
|
243
|
+
"speed": self._opts.speed,
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
utterance_kwargs = {
|
|
247
|
+
"text": self._input_text,
|
|
248
|
+
**{k: v for k, v in utterance_options.items() if v is not None},
|
|
249
|
+
}
|
|
250
|
+
|
|
257
251
|
try:
|
|
252
|
+
utterance = PostedUtterance(**utterance_kwargs)
|
|
253
|
+
|
|
258
254
|
async for chunk in self._client.tts.synthesize_json_streaming(
|
|
259
|
-
utterances=[
|
|
260
|
-
PostedUtterance(
|
|
261
|
-
text=self._input_text,
|
|
262
|
-
description=self._opts.utterance_options.description,
|
|
263
|
-
voice=self._opts.utterance_options.voice,
|
|
264
|
-
speed=self._opts.utterance_options.speed,
|
|
265
|
-
trailing_silence=self._opts.utterance_options.trailing_silence,
|
|
266
|
-
)
|
|
267
|
-
],
|
|
255
|
+
utterances=[utterance],
|
|
268
256
|
context=self._opts.context,
|
|
269
257
|
format=self._opts.format,
|
|
270
|
-
num_generations=self._opts.num_generations,
|
|
271
|
-
split_utterances=self._opts.split_utterances,
|
|
272
258
|
instant_mode=self._opts.instant_mode,
|
|
273
259
|
strip_headers=self._opts.strip_headers,
|
|
274
260
|
):
|
livekit/plugins/hume/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livekit-plugins-hume
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.19
|
|
4
4
|
Summary: Hume TTS plugin for LiveKit agents
|
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
|
6
6
|
Project-URL: Website, https://livekit.io/
|
|
@@ -17,8 +17,8 @@ Classifier: Topic :: Multimedia :: Sound/Audio
|
|
|
17
17
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
18
|
Requires-Python: >=3.9.0
|
|
19
19
|
Requires-Dist: aiohttp>=3.8.0
|
|
20
|
-
Requires-Dist: hume
|
|
21
|
-
Requires-Dist: livekit-agents>=1.0.
|
|
20
|
+
Requires-Dist: hume>=0.8.3
|
|
21
|
+
Requires-Dist: livekit-agents>=1.0.19
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
23
23
|
|
|
24
24
|
# LiveKit Plugins Hume AI TTS
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
livekit/plugins/hume/__init__.py,sha256=3pdWGwUA2DBleYqmHXrlicBv4QdBFlnMoNAQP8A5X-A,1236
|
|
2
|
+
livekit/plugins/hume/log.py,sha256=TwpK1FOwgD6Jb0A2nl-9nIgi0q5qWo9HGDrDuV_2g0g,67
|
|
3
|
+
livekit/plugins/hume/models.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
livekit/plugins/hume/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
|
5
|
+
livekit/plugins/hume/tts.py,sha256=JGUDNeVzPttrErCSP783bayByGBZraAHAZf23RgEz9A,11251
|
|
6
|
+
livekit/plugins/hume/version.py,sha256=KWKI0i88RsDNfgXXfCYo8ZHeBTwbIaCLBliemupGV-g,601
|
|
7
|
+
livekit_plugins_hume-1.0.19.dist-info/METADATA,sha256=Zub7XIe9XmmtnvOZajc7Q65QhB5J4A9zRJLFko2pWVE,1258
|
|
8
|
+
livekit_plugins_hume-1.0.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
9
|
+
livekit_plugins_hume-1.0.19.dist-info/RECORD,,
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
livekit/plugins/hume/__init__.py,sha256=CdEjcQRVL3dBso4xBL-zOgCESSqwH0Xdb01VT35P8u0,1362
|
|
2
|
-
livekit/plugins/hume/log.py,sha256=TwpK1FOwgD6Jb0A2nl-9nIgi0q5qWo9HGDrDuV_2g0g,67
|
|
3
|
-
livekit/plugins/hume/models.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
livekit/plugins/hume/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
|
5
|
-
livekit/plugins/hume/tts.py,sha256=aVlp-PebRsIily2mcsCewuZzcgHKwzbBSYwHcFnSo0w,12029
|
|
6
|
-
livekit/plugins/hume/version.py,sha256=oT9vgJC1WR2E9D9qKy-VZ5neWTTotVE-IZcSbmiQP98,601
|
|
7
|
-
livekit_plugins_hume-1.0.17.dist-info/METADATA,sha256=EpRs_Biq7BWbNk8P-COP1Sgm0LqZiMd6L1Zp--oDsN8,1251
|
|
8
|
-
livekit_plugins_hume-1.0.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
9
|
-
livekit_plugins_hume-1.0.17.dist-info/RECORD,,
|
|
File without changes
|