livekit-plugins-hume 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of livekit-plugins-hume might be problematic. Click here for more details.
- livekit/plugins/hume/__init__.py +16 -2
- livekit/plugins/hume/tts.py +147 -50
- livekit/plugins/hume/version.py +1 -1
- {livekit_plugins_hume-1.1.1.dist-info → livekit_plugins_hume-1.1.3.dist-info}/METADATA +2 -2
- livekit_plugins_hume-1.1.3.dist-info/RECORD +8 -0
- livekit_plugins_hume-1.1.1.dist-info/RECORD +0 -8
- {livekit_plugins_hume-1.1.1.dist-info → livekit_plugins_hume-1.1.3.dist-info}/WHEEL +0 -0
livekit/plugins/hume/__init__.py
CHANGED
|
@@ -21,10 +21,24 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
from livekit.agents import Plugin
|
|
23
23
|
|
|
24
|
-
from .tts import
|
|
24
|
+
from .tts import (
|
|
25
|
+
TTS,
|
|
26
|
+
AudioFormat,
|
|
27
|
+
Utterance,
|
|
28
|
+
VoiceById,
|
|
29
|
+
VoiceByName,
|
|
30
|
+
VoiceProvider,
|
|
31
|
+
)
|
|
25
32
|
from .version import __version__
|
|
26
33
|
|
|
27
|
-
__all__ = [
|
|
34
|
+
__all__ = [
|
|
35
|
+
"TTS",
|
|
36
|
+
"AudioFormat",
|
|
37
|
+
"VoiceById",
|
|
38
|
+
"VoiceByName",
|
|
39
|
+
"VoiceProvider",
|
|
40
|
+
"Utterance",
|
|
41
|
+
]
|
|
28
42
|
|
|
29
43
|
|
|
30
44
|
class HumeAIPlugin(Plugin):
|
livekit/plugins/hume/tts.py
CHANGED
|
@@ -19,6 +19,7 @@ import base64
|
|
|
19
19
|
import json
|
|
20
20
|
import os
|
|
21
21
|
from dataclasses import dataclass, replace
|
|
22
|
+
from enum import Enum
|
|
22
23
|
from typing import Any, TypedDict
|
|
23
24
|
|
|
24
25
|
import aiohttp
|
|
@@ -27,32 +28,66 @@ from livekit.agents import APIConnectionError, APIConnectOptions, APITimeoutErro
|
|
|
27
28
|
from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
|
|
28
29
|
from livekit.agents.utils import is_given
|
|
29
30
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
from .version import __version__
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class VoiceById(TypedDict, total=False):
|
|
35
|
+
id: str
|
|
36
|
+
provider: VoiceProvider | None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class VoiceByName(TypedDict, total=False):
|
|
40
|
+
name: str
|
|
41
|
+
provider: VoiceProvider | None
|
|
33
42
|
|
|
34
43
|
|
|
35
|
-
class
|
|
44
|
+
class Utterance(TypedDict, total=False):
|
|
45
|
+
"""Utterance for TTS synthesis."""
|
|
46
|
+
|
|
36
47
|
text: str
|
|
37
|
-
description: str
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
trailing_silence: float
|
|
48
|
+
description: str | None
|
|
49
|
+
speed: float | None
|
|
50
|
+
voice: VoiceById | VoiceByName | None
|
|
51
|
+
trailing_silence: float | None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class VoiceProvider(str, Enum):
|
|
55
|
+
"""Voice provider for the voice library."""
|
|
56
|
+
|
|
57
|
+
hume = "HUME_AI"
|
|
58
|
+
custom = "CUSTOM_VOICE"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class AudioFormat(str, Enum):
|
|
62
|
+
"""Audio format for the synthesized speech."""
|
|
41
63
|
|
|
64
|
+
mp3 = "mp3"
|
|
65
|
+
wav = "wav"
|
|
66
|
+
pcm = "pcm"
|
|
42
67
|
|
|
43
|
-
|
|
44
|
-
|
|
68
|
+
|
|
69
|
+
DEFAULT_HEADERS = {
|
|
70
|
+
"X-Hume-Client-Name": "livekit",
|
|
71
|
+
"X-Hume-Client-Version": __version__,
|
|
72
|
+
}
|
|
73
|
+
API_AUTH_HEADER = "X-Hume-Api-Key"
|
|
74
|
+
STREAM_PATH = "/v0/tts/stream/json"
|
|
75
|
+
DEFAULT_BASE_URL = "https://api.hume.ai"
|
|
76
|
+
SUPPORTED_SAMPLE_RATE = 48000
|
|
77
|
+
DEFAULT_VOICE = VoiceByName(name="Male English Actor", provider=VoiceProvider.hume)
|
|
45
78
|
|
|
46
79
|
|
|
47
80
|
@dataclass
|
|
48
81
|
class _TTSOptions:
|
|
49
82
|
api_key: str
|
|
50
|
-
utterance_options: PostedUtterance
|
|
51
|
-
context: PostedContext | None
|
|
52
|
-
sample_rate: int
|
|
53
|
-
split_utterances: bool
|
|
54
|
-
instant_mode: bool
|
|
55
83
|
base_url: str
|
|
84
|
+
voice: VoiceById | VoiceByName | None
|
|
85
|
+
description: str | None
|
|
86
|
+
speed: float | None
|
|
87
|
+
trailing_silence: float | None
|
|
88
|
+
context: str | list[Utterance] | None
|
|
89
|
+
instant_mode: bool | None
|
|
90
|
+
audio_format: AudioFormat
|
|
56
91
|
|
|
57
92
|
def http_url(self, path: str) -> str:
|
|
58
93
|
return f"{self.base_url}{path}"
|
|
@@ -63,36 +98,64 @@ class TTS(tts.TTS):
|
|
|
63
98
|
self,
|
|
64
99
|
*,
|
|
65
100
|
api_key: str | None = None,
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
101
|
+
voice: VoiceById | VoiceByName | None = DEFAULT_VOICE,
|
|
102
|
+
description: str | None = None,
|
|
103
|
+
speed: float | None = None,
|
|
104
|
+
trailing_silence: float | None = None,
|
|
105
|
+
context: str | list[Utterance] | None = None,
|
|
106
|
+
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
|
|
107
|
+
audio_format: AudioFormat = AudioFormat.mp3,
|
|
70
108
|
base_url: str = DEFAULT_BASE_URL,
|
|
71
109
|
http_session: aiohttp.ClientSession | None = None,
|
|
72
110
|
):
|
|
111
|
+
"""Initialize the Hume AI TTS client. Options will be used for all future synthesis
|
|
112
|
+
(until updated with update_options).
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
api_key: Hume AI API key. If not provided, will look for HUME_API_KEY environment
|
|
116
|
+
variable.
|
|
117
|
+
voice: A voice from the voice library specifed by name or id.
|
|
118
|
+
description: Natural language instructions describing how the synthesized speech
|
|
119
|
+
should sound (≤1000 characters).
|
|
120
|
+
speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
|
|
121
|
+
trailing_silence: Duration of trailing silence (in seconds) to add to each utterance
|
|
122
|
+
(≥0, ≤5.0, default: 0.35).
|
|
123
|
+
context: Optional context for synthesis, either as text or list of utterances.
|
|
124
|
+
instant_mode: Whether to use instant mode. Defaults to True if voice specified,
|
|
125
|
+
False otherwise. Requires a voice to be specified when enabled.
|
|
126
|
+
audio_format: Output audio format (mp3, wav, or pcm). Defaults to mp3.
|
|
127
|
+
base_url: Base URL for Hume AI API. Defaults to https://api.hume.ai
|
|
128
|
+
http_session: Optional aiohttp ClientSession to use for requests.
|
|
129
|
+
"""
|
|
73
130
|
super().__init__(
|
|
74
|
-
capabilities=tts.TTSCapabilities(streaming=
|
|
75
|
-
sample_rate=
|
|
131
|
+
capabilities=tts.TTSCapabilities(streaming=False),
|
|
132
|
+
sample_rate=SUPPORTED_SAMPLE_RATE,
|
|
76
133
|
num_channels=1,
|
|
77
134
|
)
|
|
78
135
|
key = api_key or os.environ.get("HUME_API_KEY")
|
|
79
136
|
if not key:
|
|
80
137
|
raise ValueError("Hume API key is required via api_key or HUME_API_KEY env var")
|
|
81
138
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if is_given(
|
|
87
|
-
|
|
139
|
+
has_voice = voice is not None
|
|
140
|
+
|
|
141
|
+
# Default instant_mode is True if a voice is specified, otherwise False
|
|
142
|
+
# (Hume API requires a voice for instant mode)
|
|
143
|
+
if not is_given(instant_mode):
|
|
144
|
+
resolved_instant_mode = has_voice
|
|
145
|
+
elif instant_mode and not has_voice:
|
|
146
|
+
raise ValueError("Hume TTS: instant_mode cannot be enabled without specifying a voice")
|
|
147
|
+
else:
|
|
148
|
+
resolved_instant_mode = instant_mode
|
|
88
149
|
|
|
89
150
|
self._opts = _TTSOptions(
|
|
90
151
|
api_key=key,
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
152
|
+
voice=voice,
|
|
153
|
+
description=description,
|
|
154
|
+
speed=speed,
|
|
155
|
+
trailing_silence=trailing_silence,
|
|
156
|
+
context=context,
|
|
157
|
+
instant_mode=resolved_instant_mode,
|
|
158
|
+
audio_format=audio_format,
|
|
96
159
|
base_url=base_url,
|
|
97
160
|
)
|
|
98
161
|
self._session = http_session
|
|
@@ -106,19 +169,40 @@ class TTS(tts.TTS):
|
|
|
106
169
|
def update_options(
|
|
107
170
|
self,
|
|
108
171
|
*,
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
172
|
+
description: NotGivenOr[str | None] = NOT_GIVEN,
|
|
173
|
+
speed: NotGivenOr[float | None] = NOT_GIVEN,
|
|
174
|
+
voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN,
|
|
175
|
+
trailing_silence: NotGivenOr[float | None] = NOT_GIVEN,
|
|
176
|
+
context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN,
|
|
112
177
|
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
|
|
178
|
+
audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN,
|
|
113
179
|
) -> None:
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
180
|
+
"""Update TTS options used for all future synthesis (until updated again)
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
voice: A voice from the voice library specifed by name or id.
|
|
184
|
+
description: Natural language instructions describing how the synthesized speech
|
|
185
|
+
should sound (≤1000 characters).
|
|
186
|
+
speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
|
|
187
|
+
trailing_silence: Duration of trailing silence (in seconds) to add to each utterance.
|
|
188
|
+
context: Optional context for synthesis, either as text or list of utterances.
|
|
189
|
+
instant_mode: Whether to use instant mode.
|
|
190
|
+
audio_format: Output audio format (mp3, wav, or pcm).
|
|
191
|
+
"""
|
|
192
|
+
if is_given(description):
|
|
193
|
+
self._opts.description = description
|
|
194
|
+
if is_given(speed):
|
|
195
|
+
self._opts.speed = speed
|
|
196
|
+
if is_given(voice):
|
|
197
|
+
self._opts.voice = voice # type: ignore
|
|
198
|
+
if is_given(trailing_silence):
|
|
199
|
+
self._opts.trailing_silence = trailing_silence
|
|
200
|
+
if is_given(context):
|
|
201
|
+
self._opts.context = context # type: ignore
|
|
120
202
|
if is_given(instant_mode):
|
|
121
203
|
self._opts.instant_mode = instant_mode
|
|
204
|
+
if is_given(audio_format):
|
|
205
|
+
self._opts.audio_format = audio_format
|
|
122
206
|
|
|
123
207
|
def synthesize(
|
|
124
208
|
self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
|
|
@@ -133,34 +217,46 @@ class ChunkedStream(tts.ChunkedStream):
|
|
|
133
217
|
self._opts = replace(tts._opts)
|
|
134
218
|
|
|
135
219
|
async def _run(self, output_emitter: tts.AudioEmitter) -> None:
|
|
136
|
-
utterance:
|
|
137
|
-
|
|
220
|
+
utterance: Utterance = {
|
|
221
|
+
"text": self._input_text,
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
if self._opts.voice:
|
|
225
|
+
utterance["voice"] = self._opts.voice
|
|
226
|
+
if self._opts.description:
|
|
227
|
+
utterance["description"] = self._opts.description
|
|
228
|
+
if self._opts.speed:
|
|
229
|
+
utterance["speed"] = self._opts.speed
|
|
230
|
+
if self._opts.trailing_silence:
|
|
231
|
+
utterance["trailing_silence"] = self._opts.trailing_silence
|
|
138
232
|
|
|
139
233
|
payload: dict[str, Any] = {
|
|
140
234
|
"utterances": [utterance],
|
|
141
|
-
"split_utterances": self._opts.split_utterances,
|
|
142
235
|
"strip_headers": True,
|
|
143
236
|
"instant_mode": self._opts.instant_mode,
|
|
144
|
-
"format": {"type":
|
|
237
|
+
"format": {"type": self._opts.audio_format.value},
|
|
145
238
|
}
|
|
146
|
-
if self._opts.context:
|
|
147
|
-
payload["context"] = self._opts.context
|
|
239
|
+
if isinstance(self._opts.context, str):
|
|
240
|
+
payload["context"] = {"generation_id": self._opts.context}
|
|
241
|
+
elif isinstance(self._opts.context, list):
|
|
242
|
+
payload["context"] = {"utterances": self._opts.context}
|
|
148
243
|
|
|
149
244
|
try:
|
|
150
245
|
async with self._tts._ensure_session().post(
|
|
151
246
|
self._opts.http_url(STREAM_PATH),
|
|
152
|
-
headers={API_AUTH_HEADER: self._opts.api_key},
|
|
247
|
+
headers={**DEFAULT_HEADERS, API_AUTH_HEADER: self._opts.api_key},
|
|
153
248
|
json=payload,
|
|
154
249
|
timeout=aiohttp.ClientTimeout(total=None, sock_connect=self._conn_options.timeout),
|
|
155
250
|
# large read_bufsize to avoid `ValueError: Chunk too big`
|
|
156
251
|
read_bufsize=10 * 1024 * 1024,
|
|
157
252
|
) as resp:
|
|
158
253
|
resp.raise_for_status()
|
|
254
|
+
|
|
159
255
|
output_emitter.initialize(
|
|
160
256
|
request_id=utils.shortuuid(),
|
|
161
|
-
sample_rate=
|
|
257
|
+
sample_rate=SUPPORTED_SAMPLE_RATE,
|
|
162
258
|
num_channels=self._tts.num_channels,
|
|
163
|
-
mime_type="audio/
|
|
259
|
+
mime_type=f"audio/{self._opts.audio_format.value}",
|
|
164
260
|
)
|
|
165
261
|
|
|
166
262
|
async for raw_line in resp.content:
|
|
@@ -174,6 +270,7 @@ class ChunkedStream(tts.ChunkedStream):
|
|
|
174
270
|
output_emitter.push(base64.b64decode(audio_b64))
|
|
175
271
|
|
|
176
272
|
output_emitter.flush()
|
|
273
|
+
|
|
177
274
|
except asyncio.TimeoutError:
|
|
178
275
|
raise APITimeoutError() from None
|
|
179
276
|
except Exception as e:
|
livekit/plugins/hume/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: livekit-plugins-hume
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: Hume TTS plugin for LiveKit agents
|
|
5
5
|
Project-URL: Documentation, https://docs.livekit.io
|
|
6
6
|
Project-URL: Website, https://livekit.io/
|
|
@@ -17,7 +17,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
|
|
|
17
17
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
18
|
Requires-Python: >=3.9.0
|
|
19
19
|
Requires-Dist: aiohttp>=3.8.0
|
|
20
|
-
Requires-Dist: livekit-agents>=1.1.
|
|
20
|
+
Requires-Dist: livekit-agents>=1.1.3
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
|
|
23
23
|
# Hume AI TTS plugin for LiveKit Agents
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
livekit/plugins/hume/__init__.py,sha256=yYTwSJaYq5ufZ_EnoSuLa2FfSsnOZu-swAzYjNQAhhw,1374
|
|
2
|
+
livekit/plugins/hume/log.py,sha256=TwpK1FOwgD6Jb0A2nl-9nIgi0q5qWo9HGDrDuV_2g0g,67
|
|
3
|
+
livekit/plugins/hume/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
|
4
|
+
livekit/plugins/hume/tts.py,sha256=VYduFRxndfE0R-3A_Pt16pvcLd80VWnUJIda4iQBgPo,10301
|
|
5
|
+
livekit/plugins/hume/version.py,sha256=_-4Ui7Aa9dmOTog-I15Ct4mtOs5t7T2_Bi2bMdIRvcE,600
|
|
6
|
+
livekit_plugins_hume-1.1.3.dist-info/METADATA,sha256=mva7Jg6oH8gcaSLpz_gPo1aWY06gg99THwDhi2X5QpU,1354
|
|
7
|
+
livekit_plugins_hume-1.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
8
|
+
livekit_plugins_hume-1.1.3.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
livekit/plugins/hume/__init__.py,sha256=--F5e6CdoZM8eyw5ca-H-khoKdDJxdflwvrMCSwAHws,1250
|
|
2
|
-
livekit/plugins/hume/log.py,sha256=TwpK1FOwgD6Jb0A2nl-9nIgi0q5qWo9HGDrDuV_2g0g,67
|
|
3
|
-
livekit/plugins/hume/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
|
|
4
|
-
livekit/plugins/hume/tts.py,sha256=ZnVqxzzs75OpHe_YDMr5X_BgZlZRlQSiCYs0z1Yq5gg,6128
|
|
5
|
-
livekit/plugins/hume/version.py,sha256=NTkUKR1fwMpJvRho7A_ZH0gQcK_2G7aizsjhjTXvZf0,600
|
|
6
|
-
livekit_plugins_hume-1.1.1.dist-info/METADATA,sha256=q4oiVeukrGm3GXaPrcjekgQ2j73uacW1WYfI2DcjXXo,1354
|
|
7
|
-
livekit_plugins_hume-1.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
8
|
-
livekit_plugins_hume-1.1.1.dist-info/RECORD,,
|
|
File without changes
|