livekit-plugins-hume 1.0.18__tar.gz → 1.0.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of livekit-plugins-hume might be problematic. Click here for more details.
- {livekit_plugins_hume-1.0.18 → livekit_plugins_hume-1.0.20}/.gitignore +1 -0
- {livekit_plugins_hume-1.0.18 → livekit_plugins_hume-1.0.20}/PKG-INFO +6 -4
- livekit_plugins_hume-1.0.20/README.md +13 -0
- {livekit_plugins_hume-1.0.18 → livekit_plugins_hume-1.0.20}/livekit/plugins/hume/__init__.py +16 -6
- {livekit_plugins_hume-1.0.18 → livekit_plugins_hume-1.0.20}/livekit/plugins/hume/tts.py +58 -57
- {livekit_plugins_hume-1.0.18 → livekit_plugins_hume-1.0.20}/livekit/plugins/hume/version.py +1 -1
- {livekit_plugins_hume-1.0.18 → livekit_plugins_hume-1.0.20}/pyproject.toml +1 -1
- livekit_plugins_hume-1.0.18/README.md +0 -11
- {livekit_plugins_hume-1.0.18 → livekit_plugins_hume-1.0.20}/livekit/plugins/hume/log.py +0 -0
- {livekit_plugins_hume-1.0.18 → livekit_plugins_hume-1.0.20}/livekit/plugins/hume/models.py +0 -0
- {livekit_plugins_hume-1.0.18 → livekit_plugins_hume-1.0.20}/livekit/plugins/hume/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livekit-plugins-hume
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.20
|
|
4
4
|
Summary: Hume TTS plugin for LiveKit agents
|
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
|
6
6
|
Project-URL: Website, https://livekit.io/
|
|
@@ -18,12 +18,14 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
18
18
|
Requires-Python: >=3.9.0
|
|
19
19
|
Requires-Dist: aiohttp>=3.8.0
|
|
20
20
|
Requires-Dist: hume>=0.8.3
|
|
21
|
-
Requires-Dist: livekit-agents>=1.0.
|
|
21
|
+
Requires-Dist: livekit-agents>=1.0.20
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
23
23
|
|
|
24
|
-
#
|
|
24
|
+
# Hume AI TTS plugin for LiveKit Agents
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
Support for text-to-speech with [Hume](https://www.hume.ai/).
|
|
27
|
+
|
|
28
|
+
See [https://docs.livekit.io/agents/integrations/tts/hume/](https://docs.livekit.io/agents/integrations/tts/hume/) for more information.
|
|
27
29
|
|
|
28
30
|
## Installation
|
|
29
31
|
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Hume AI TTS plugin for LiveKit Agents
|
|
2
|
+
|
|
3
|
+
Support for text-to-speech with [Hume](https://www.hume.ai/).
|
|
4
|
+
|
|
5
|
+
See [https://docs.livekit.io/agents/integrations/tts/hume/](https://docs.livekit.io/agents/integrations/tts/hume/) for more information.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install livekit-plugins-hume
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
You will need an API Key from Hume, it can be set as an environment variable: `HUME_API_KEY`. You can get it from [here](https://platform.hume.ai/settings/keys)
|
{livekit_plugins_hume-1.0.18 → livekit_plugins_hume-1.0.20}/livekit/plugins/hume/__init__.py
RENAMED
|
@@ -12,6 +12,11 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
"""Hume AI TTS plugin for LiveKit Agents
|
|
16
|
+
|
|
17
|
+
See https://docs.livekit.io/agents/integrations/tts/hume/ for more information.
|
|
18
|
+
"""
|
|
19
|
+
|
|
15
20
|
from __future__ import annotations
|
|
16
21
|
|
|
17
22
|
__version__ = "1.0.0"
|
|
@@ -20,9 +25,7 @@ __version__ = "1.0.0"
|
|
|
20
25
|
from hume.tts import (
|
|
21
26
|
Format,
|
|
22
27
|
PostedContext,
|
|
23
|
-
|
|
24
|
-
PostedUtteranceVoiceWithId,
|
|
25
|
-
PostedUtteranceVoiceWithName,
|
|
28
|
+
PostedUtteranceVoice,
|
|
26
29
|
)
|
|
27
30
|
from livekit.agents import Plugin
|
|
28
31
|
|
|
@@ -32,10 +35,8 @@ from .tts import TTS
|
|
|
32
35
|
__all__ = [
|
|
33
36
|
"TTS",
|
|
34
37
|
"Format",
|
|
35
|
-
"PostedUtterance",
|
|
36
38
|
"PostedContext",
|
|
37
|
-
"
|
|
38
|
-
"PostedUtteranceVoiceWithId",
|
|
39
|
+
"PostedUtteranceVoice",
|
|
39
40
|
]
|
|
40
41
|
|
|
41
42
|
|
|
@@ -54,3 +55,12 @@ __pdoc__ = {}
|
|
|
54
55
|
|
|
55
56
|
for n in NOT_IN_ALL:
|
|
56
57
|
__pdoc__[n] = False
|
|
58
|
+
|
|
59
|
+
# Cleanup docs of unexported modules
|
|
60
|
+
_module = dir()
|
|
61
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
|
62
|
+
|
|
63
|
+
__pdoc__ = {}
|
|
64
|
+
|
|
65
|
+
for n in NOT_IN_ALL:
|
|
66
|
+
__pdoc__[n] = False
|
|
@@ -22,7 +22,7 @@ from dataclasses import dataclass
|
|
|
22
22
|
import aiohttp
|
|
23
23
|
|
|
24
24
|
from hume import AsyncHumeClient
|
|
25
|
-
from hume.tts import Format, FormatWav, PostedContext, PostedUtterance
|
|
25
|
+
from hume.tts import Format, FormatWav, PostedContext, PostedUtterance, PostedUtteranceVoice
|
|
26
26
|
from livekit.agents import (
|
|
27
27
|
APIConnectionError,
|
|
28
28
|
APIConnectOptions,
|
|
@@ -39,25 +39,21 @@ from livekit.agents.types import (
|
|
|
39
39
|
from livekit.agents.utils import is_given
|
|
40
40
|
|
|
41
41
|
# Default audio settings
|
|
42
|
-
DEFAULT_SAMPLE_RATE =
|
|
42
|
+
DEFAULT_SAMPLE_RATE = 48000
|
|
43
43
|
DEFAULT_NUM_CHANNELS = 1
|
|
44
44
|
|
|
45
|
-
# Default TTS settings
|
|
46
|
-
DEFAULT_UTTERANCE = PostedUtterance(text="")
|
|
47
|
-
|
|
48
45
|
|
|
49
46
|
@dataclass
|
|
50
47
|
class _TTSOptions:
|
|
51
48
|
"""TTS options for Hume API"""
|
|
52
49
|
|
|
53
50
|
api_key: str
|
|
54
|
-
|
|
51
|
+
voice: PostedUtteranceVoice | None
|
|
52
|
+
description: str | None
|
|
53
|
+
speed: float | None
|
|
55
54
|
context: PostedContext | None
|
|
56
55
|
format: Format
|
|
57
|
-
sample_rate: int
|
|
58
|
-
split_utterances: bool
|
|
59
56
|
strip_headers: bool
|
|
60
|
-
num_generations: int
|
|
61
57
|
instant_mode: bool
|
|
62
58
|
word_tokenizer: tokenize.WordTokenizer
|
|
63
59
|
|
|
@@ -66,35 +62,37 @@ class TTS(tts.TTS):
|
|
|
66
62
|
def __init__(
|
|
67
63
|
self,
|
|
68
64
|
*,
|
|
69
|
-
|
|
65
|
+
voice: NotGivenOr[PostedUtteranceVoice] = NOT_GIVEN,
|
|
66
|
+
description: NotGivenOr[str] = NOT_GIVEN,
|
|
67
|
+
speed: NotGivenOr[float] = NOT_GIVEN,
|
|
70
68
|
context: NotGivenOr[PostedContext] = NOT_GIVEN,
|
|
71
69
|
format: NotGivenOr[Format] = NOT_GIVEN,
|
|
72
|
-
split_utterances: bool = False,
|
|
73
|
-
num_generations: int = 1,
|
|
74
70
|
instant_mode: bool = False,
|
|
75
71
|
strip_headers: bool = True,
|
|
76
72
|
api_key: NotGivenOr[str] = NOT_GIVEN,
|
|
77
73
|
word_tokenizer: tokenize.WordTokenizer | None = None,
|
|
78
74
|
http_session: aiohttp.ClientSession | None = None,
|
|
79
|
-
sample_rate: int = 24000,
|
|
80
75
|
) -> None:
|
|
81
76
|
"""Initialize the Hume TTS client.
|
|
82
77
|
|
|
83
78
|
See https://dev.hume.ai/reference/text-to-speech-tts/synthesize-json-streaming for API doc
|
|
84
79
|
|
|
85
80
|
Args:
|
|
86
|
-
|
|
87
|
-
|
|
81
|
+
voice (NotGivenOr[PostedUtteranceVoice]): The voice, specified by name or id, to be
|
|
82
|
+
used. When no voice is specified, a novel voice will be generated based on the
|
|
83
|
+
text and optionally provided description.
|
|
84
|
+
description (NotGivenOr[str]): Natural language instructions describing how the
|
|
85
|
+
synthesized speech should sound, including but not limited to tone, intonation,
|
|
86
|
+
pacing, and accent. If a Voice is specified in the request, this description
|
|
87
|
+
serves as acting instructions. If no Voice is specified, a new voice is generated
|
|
88
|
+
based on this description.
|
|
89
|
+
speed: (NotGivenOr[float]): Adjusts the relative speaking rate on a non-linear scale
|
|
90
|
+
from 0.25 (much slower) to 3.0 (much faster), where 1.0 represents normal speaking
|
|
91
|
+
pace.
|
|
88
92
|
context (NotGivenOr[PostedContext]): Utterances to use as context for generating
|
|
89
93
|
consistent speech style and prosody across multiple requests.
|
|
90
94
|
format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
|
|
91
95
|
Defaults to WAV format.
|
|
92
|
-
split_utterances (bool): Controls how audio output is segmented in the response.
|
|
93
|
-
When enabled (True), input utterances are split into natural-sounding segments.
|
|
94
|
-
When disabled (False), maintains one-to-one mapping between input and output.
|
|
95
|
-
Defaults to False.
|
|
96
|
-
num_generations (int): Number of generations of the audio to produce.
|
|
97
|
-
Must be between 1 and 5. Defaults to 1.
|
|
98
96
|
instant_mode (bool): Enables ultra-low latency streaming, reducing time to first chunk.
|
|
99
97
|
Recommended for real-time applications. Only for streaming endpoints.
|
|
100
98
|
With this enabled, requests incur 10% higher cost. Defaults to False.
|
|
@@ -107,14 +105,13 @@ class TTS(tts.TTS):
|
|
|
107
105
|
If None, a basic word tokenizer will be used.
|
|
108
106
|
http_session (aiohttp.ClientSession | None): Optional HTTP session for API requests.
|
|
109
107
|
If None, a new session will be created.
|
|
110
|
-
sample_rate (int): Audio sample rate in Hz. Defaults to 24000.
|
|
111
108
|
"""
|
|
112
109
|
|
|
113
110
|
super().__init__(
|
|
114
111
|
capabilities=tts.TTSCapabilities(
|
|
115
112
|
streaming=False,
|
|
116
113
|
),
|
|
117
|
-
sample_rate=
|
|
114
|
+
sample_rate=DEFAULT_SAMPLE_RATE,
|
|
118
115
|
num_channels=DEFAULT_NUM_CHANNELS,
|
|
119
116
|
)
|
|
120
117
|
|
|
@@ -128,15 +125,12 @@ class TTS(tts.TTS):
|
|
|
128
125
|
word_tokenizer = tokenize.basic.WordTokenizer(ignore_punctuation=False)
|
|
129
126
|
|
|
130
127
|
self._opts = _TTSOptions(
|
|
131
|
-
|
|
132
|
-
if is_given(
|
|
133
|
-
else
|
|
128
|
+
voice=voice if is_given(voice) else None,
|
|
129
|
+
description=description if is_given(description) else None,
|
|
130
|
+
speed=speed if is_given(speed) else None,
|
|
134
131
|
context=context if is_given(context) else None,
|
|
135
132
|
format=format if is_given(format) else FormatWav(),
|
|
136
133
|
api_key=self._api_key,
|
|
137
|
-
sample_rate=self.sample_rate,
|
|
138
|
-
split_utterances=split_utterances,
|
|
139
|
-
num_generations=num_generations,
|
|
140
134
|
strip_headers=strip_headers,
|
|
141
135
|
instant_mode=instant_mode,
|
|
142
136
|
word_tokenizer=word_tokenizer,
|
|
@@ -153,26 +147,31 @@ class TTS(tts.TTS):
|
|
|
153
147
|
def update_options(
|
|
154
148
|
self,
|
|
155
149
|
*,
|
|
156
|
-
|
|
150
|
+
voice: NotGivenOr[PostedUtteranceVoice] = NOT_GIVEN,
|
|
151
|
+
description: NotGivenOr[str] = NOT_GIVEN,
|
|
152
|
+
speed: NotGivenOr[float] = NOT_GIVEN,
|
|
157
153
|
context: NotGivenOr[PostedContext] = NOT_GIVEN,
|
|
158
154
|
format: NotGivenOr[Format] = NOT_GIVEN,
|
|
159
|
-
split_utterances: NotGivenOr[bool] = NOT_GIVEN,
|
|
160
|
-
num_generations: NotGivenOr[int] = NOT_GIVEN,
|
|
161
155
|
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
|
|
162
156
|
strip_headers: NotGivenOr[bool] = NOT_GIVEN,
|
|
163
157
|
) -> None:
|
|
164
158
|
"""Update TTS options for synthesizing speech.
|
|
165
159
|
|
|
166
160
|
Args:
|
|
167
|
-
|
|
168
|
-
|
|
161
|
+
voice (NotGivenOr[PostedUtteranceVoice]): The voice, specified by name or id, to be
|
|
162
|
+
used. When no voice is specified, a novel voice will be generated based on the
|
|
163
|
+
text and optionally provided description.
|
|
164
|
+
description (NotGivenOr[str]): Natural language instructions describing how the
|
|
165
|
+
synthesized speech should sound, including but not limited to tone, intonation,
|
|
166
|
+
pacing, and accent. If a Voice is specified in the request, this description
|
|
167
|
+
serves as acting instructions. If no Voice is specified, a new voice is generated
|
|
168
|
+
based on this description.
|
|
169
|
+
speed: (NotGivenOr[float]): Adjusts the relative speaking rate on a non-linear scale
|
|
170
|
+
from 0.25 (much slower) to 3.0 (much faster), where 1.0 represents normal speaking
|
|
171
|
+
pace.
|
|
169
172
|
context (Optional[PostedContext]): Utterances to use as context for generating
|
|
170
173
|
consistent speech style and prosody across multiple requests.
|
|
171
174
|
format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
|
|
172
|
-
split_utterances (NotGivenOr[bool]): Controls how audio output is segmented.
|
|
173
|
-
When True, utterances are split into natural-sounding segments.
|
|
174
|
-
When False, maintains one-to-one mapping between input and output.
|
|
175
|
-
num_generations (NotGivenOr[int]): Number of speech generations to produce (1-5).
|
|
176
175
|
instant_mode (NotGivenOr[bool]): Enables ultra-low latency streaming.
|
|
177
176
|
Reduces time to first audio chunk, recommended for real-time applications.
|
|
178
177
|
Note: Incurs 10% higher cost when enabled.
|
|
@@ -181,16 +180,16 @@ class TTS(tts.TTS):
|
|
|
181
180
|
If disabled, each chunk’s audio will be its own audio file, each with its headers.
|
|
182
181
|
"""
|
|
183
182
|
|
|
184
|
-
if is_given(
|
|
185
|
-
self._opts.
|
|
183
|
+
if is_given(voice):
|
|
184
|
+
self._opts.voice = voice
|
|
185
|
+
if is_given(description):
|
|
186
|
+
self._opts.description = description
|
|
187
|
+
if is_given(speed):
|
|
188
|
+
self._opts.speed = speed
|
|
186
189
|
if is_given(format):
|
|
187
190
|
self._opts.format = format
|
|
188
191
|
if is_given(context):
|
|
189
192
|
self._opts.context = context
|
|
190
|
-
if is_given(split_utterances):
|
|
191
|
-
self._opts.split_utterances = split_utterances
|
|
192
|
-
if is_given(num_generations):
|
|
193
|
-
self._opts.num_generations = num_generations
|
|
194
193
|
if is_given(instant_mode):
|
|
195
194
|
self._opts.instant_mode = instant_mode
|
|
196
195
|
if is_given(strip_headers):
|
|
@@ -229,7 +228,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
|
229
228
|
request_id = utils.shortuuid()
|
|
230
229
|
|
|
231
230
|
decoder = utils.codecs.AudioStreamDecoder(
|
|
232
|
-
sample_rate=
|
|
231
|
+
sample_rate=DEFAULT_SAMPLE_RATE,
|
|
233
232
|
num_channels=DEFAULT_NUM_CHANNELS,
|
|
234
233
|
)
|
|
235
234
|
|
|
@@ -238,22 +237,24 @@ class ChunkedStream(tts.ChunkedStream):
|
|
|
238
237
|
try:
|
|
239
238
|
|
|
240
239
|
async def _decode_loop():
|
|
240
|
+
utterance_options = {
|
|
241
|
+
"voice": self._opts.voice,
|
|
242
|
+
"description": self._opts.description,
|
|
243
|
+
"speed": self._opts.speed,
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
utterance_kwargs = {
|
|
247
|
+
"text": self._input_text,
|
|
248
|
+
**{k: v for k, v in utterance_options.items() if v is not None},
|
|
249
|
+
}
|
|
250
|
+
|
|
241
251
|
try:
|
|
252
|
+
utterance = PostedUtterance(**utterance_kwargs)
|
|
253
|
+
|
|
242
254
|
async for chunk in self._client.tts.synthesize_json_streaming(
|
|
243
|
-
utterances=[
|
|
244
|
-
PostedUtterance(
|
|
245
|
-
text=self._input_text,
|
|
246
|
-
**{
|
|
247
|
-
k: v
|
|
248
|
-
for k, v in self._opts.utterance_options.__dict__.items()
|
|
249
|
-
if v is not None and k != "text"
|
|
250
|
-
},
|
|
251
|
-
)
|
|
252
|
-
],
|
|
255
|
+
utterances=[utterance],
|
|
253
256
|
context=self._opts.context,
|
|
254
257
|
format=self._opts.format,
|
|
255
|
-
num_generations=self._opts.num_generations,
|
|
256
|
-
split_utterances=self._opts.split_utterances,
|
|
257
258
|
instant_mode=self._opts.instant_mode,
|
|
258
259
|
strip_headers=self._opts.strip_headers,
|
|
259
260
|
):
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
# LiveKit Plugins Hume AI TTS
|
|
2
|
-
|
|
3
|
-
LiveKit Agents Framework plugin for [Hume](https://www.hume.ai/) Text-to-Speech API.
|
|
4
|
-
|
|
5
|
-
## Installation
|
|
6
|
-
|
|
7
|
-
```bash
|
|
8
|
-
pip install livekit-plugins-hume
|
|
9
|
-
```
|
|
10
|
-
|
|
11
|
-
You will need an API Key from Hume, it can be set as an environment variable: `HUME_API_KEY`. You can get it from [here](https://platform.hume.ai/settings/keys)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|