livekit-plugins-hume 1.0.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-hume might be problematic. Click here for more details.

@@ -0,0 +1,56 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ __version__ = "1.0.0"
18
+
19
+ # make imports available
20
+ from hume.tts import (
21
+ Format,
22
+ PostedContext,
23
+ PostedUtterance,
24
+ PostedUtteranceVoiceWithId,
25
+ PostedUtteranceVoiceWithName,
26
+ )
27
+ from livekit.agents import Plugin
28
+
29
+ from .tts import TTS
30
+
31
+ # all exports
32
+ __all__ = [
33
+ "TTS",
34
+ "Format",
35
+ "PostedUtterance",
36
+ "PostedContext",
37
+ "PostedUtteranceVoiceWithName",
38
+ "PostedUtteranceVoiceWithId",
39
+ ]
40
+
41
+
42
+ class HumeAIPlugin(Plugin):
43
+ def __init__(self) -> None:
44
+ super().__init__(__name__, __version__, __package__)
45
+
46
+
47
+ Plugin.register_plugin(HumeAIPlugin())
48
+
49
+ # Cleanup docs of unexported modules
50
+ _module = dir()
51
+ NOT_IN_ALL = [m for m in _module if m not in __all__]
52
+
53
+ __pdoc__ = {}
54
+
55
+ for n in NOT_IN_ALL:
56
+ __pdoc__[n] = False
@@ -0,0 +1,3 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("livekit.plugins.hume")
File without changes
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,297 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import base64
19
+ import os
20
+ from dataclasses import dataclass
21
+
22
+ import aiohttp
23
+
24
+ from hume import AsyncHumeClient
25
+ from hume.tts import Format, FormatWav, PostedContext, PostedUtterance, PostedUtteranceVoiceWithName
26
+ from livekit.agents import (
27
+ APIConnectionError,
28
+ APIConnectOptions,
29
+ APITimeoutError,
30
+ tokenize,
31
+ tts,
32
+ utils,
33
+ )
34
+ from livekit.agents.types import (
35
+ DEFAULT_API_CONNECT_OPTIONS,
36
+ NOT_GIVEN,
37
+ NotGivenOr,
38
+ )
39
+ from livekit.agents.utils import is_given
40
+
41
+ # Default audio settings
42
+ DEFAULT_SAMPLE_RATE = 24000
43
+ DEFAULT_NUM_CHANNELS = 1
44
+
45
+ # Default TTS settings
46
+ DEFAULT_VOICE = PostedUtteranceVoiceWithName(name="Colton Rivers", provider="HUME_AI")
47
+
48
+ # text is required in PostedUtterance but it is declared as an empty string
49
+ # it will be overwritten when input tokens are received
50
+ DEFAULT_UTTERANCE = PostedUtterance(
51
+ voice=DEFAULT_VOICE, speed=1, trailing_silence=0.35, description="", text=""
52
+ )
53
+
54
+
55
+ @dataclass
56
+ class _TTSOptions:
57
+ """TTS options for Hume API"""
58
+
59
+ api_key: str
60
+ utterance_options: PostedUtterance
61
+ context: PostedContext | None
62
+ format: Format
63
+ sample_rate: int
64
+ split_utterances: bool
65
+ strip_headers: bool
66
+ num_generations: int
67
+ instant_mode: bool
68
+ word_tokenizer: tokenize.WordTokenizer
69
+
70
+
71
+ class TTS(tts.TTS):
72
+ def __init__(
73
+ self,
74
+ *,
75
+ utterance_options: NotGivenOr[PostedUtterance] = NOT_GIVEN,
76
+ context: NotGivenOr[PostedContext] = NOT_GIVEN,
77
+ format: NotGivenOr[Format] = NOT_GIVEN,
78
+ split_utterances: bool = False,
79
+ num_generations: int = 1,
80
+ instant_mode: bool = False,
81
+ strip_headers: bool = True,
82
+ api_key: NotGivenOr[str] = NOT_GIVEN,
83
+ word_tokenizer: tokenize.WordTokenizer | None = None,
84
+ http_session: aiohttp.ClientSession | None = None,
85
+ sample_rate: int = 24000,
86
+ ) -> None:
87
+ """Initialize the Hume TTS client.
88
+
89
+ See https://dev.hume.ai/reference/text-to-speech-tts/synthesize-json-streaming for API doc
90
+
91
+ Args:
92
+ utterance_options (NotGivenOr[PostedUtterance]): Default options for utterances,
93
+ including description, voice, and delivery controls.
94
+ context (NotGivenOr[PostedContext]): Utterances to use as context for generating
95
+ consistent speech style and prosody across multiple requests.
96
+ format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
97
+ Defaults to WAV format.
98
+ split_utterances (bool): Controls how audio output is segmented in the response.
99
+ When enabled (True), input utterances are split into natural-sounding segments.
100
+ When disabled (False), maintains one-to-one mapping between input and output.
101
+ Defaults to False.
102
+ num_generations (int): Number of generations of the audio to produce.
103
+ Must be between 1 and 5. Defaults to 1.
104
+ instant_mode (bool): Enables ultra-low latency streaming, reducing time to first chunk.
105
+ Recommended for real-time applications. Only for streaming endpoints.
106
+ With this enabled, requests incur 10% higher cost. Defaults to False.
107
+ strip_headers (bool): If enabled, the audio for all the chunks of a generation.
108
+ Once concatenated together, will constitute a single audio file.
109
+ If disabled, each chunk’s audio will be its own audio file, each with its headers.
110
+ api_key (NotGivenOr[str]): Hume API key for authentication. If not provided,
111
+ will attempt to read from HUME_API_KEY environment variable.
112
+ word_tokenizer (tokenize.WordTokenizer | None): Custom word tokenizer to use for text.
113
+ If None, a basic word tokenizer will be used.
114
+ http_session (aiohttp.ClientSession | None): Optional HTTP session for API requests.
115
+ If None, a new session will be created.
116
+ sample_rate (int): Audio sample rate in Hz. Defaults to 24000.
117
+ """
118
+
119
+ super().__init__(
120
+ capabilities=tts.TTSCapabilities(
121
+ streaming=False,
122
+ ),
123
+ sample_rate=sample_rate,
124
+ num_channels=DEFAULT_NUM_CHANNELS,
125
+ )
126
+
127
+ self._api_key = api_key if is_given(api_key) else os.environ.get("HUME_API_KEY")
128
+ if not self._api_key:
129
+ raise ValueError(
130
+ "Hume API key is required, either as argument or set HUME_API_KEY env variable"
131
+ )
132
+
133
+ if not word_tokenizer:
134
+ word_tokenizer = tokenize.basic.WordTokenizer(ignore_punctuation=False)
135
+
136
+ self._opts = _TTSOptions(
137
+ utterance_options=utterance_options
138
+ if is_given(utterance_options)
139
+ else DEFAULT_UTTERANCE,
140
+ context=context if is_given(context) else None,
141
+ format=format if is_given(format) else FormatWav(),
142
+ api_key=self._api_key,
143
+ sample_rate=self.sample_rate,
144
+ split_utterances=split_utterances,
145
+ num_generations=num_generations,
146
+ strip_headers=strip_headers,
147
+ instant_mode=instant_mode,
148
+ word_tokenizer=word_tokenizer,
149
+ )
150
+
151
+ self._client = AsyncHumeClient(api_key=self._api_key)
152
+ self._session = http_session
153
+
154
+ def _ensure_session(self) -> aiohttp.ClientSession:
155
+ if not self._session:
156
+ self._session = utils.http_context.http_session()
157
+ return self._session
158
+
159
+ def update_options(
160
+ self,
161
+ *,
162
+ utterance_options: NotGivenOr[PostedUtterance] = NOT_GIVEN,
163
+ context: NotGivenOr[PostedContext] = NOT_GIVEN,
164
+ format: NotGivenOr[Format] = NOT_GIVEN,
165
+ split_utterances: NotGivenOr[bool] = NOT_GIVEN,
166
+ num_generations: NotGivenOr[int] = NOT_GIVEN,
167
+ instant_mode: NotGivenOr[bool] = NOT_GIVEN,
168
+ strip_headers: NotGivenOr[bool] = NOT_GIVEN,
169
+ ) -> None:
170
+ """Update TTS options for synthesizing speech.
171
+
172
+ Args:
173
+ utterance_options (NotGivenOr[PostedUtterance]): Options for utterances,
174
+ including text, description, voice, and additional controls.
175
+ context (Optional[PostedContext]): Utterances to use as context for generating
176
+ consistent speech style and prosody across multiple requests.
177
+ format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
178
+ split_utterances (NotGivenOr[bool]): Controls how audio output is segmented.
179
+ When True, utterances are split into natural-sounding segments.
180
+ When False, maintains one-to-one mapping between input and output.
181
+ num_generations (NotGivenOr[int]): Number of speech generations to produce (1-5).
182
+ instant_mode (NotGivenOr[bool]): Enables ultra-low latency streaming.
183
+ Reduces time to first audio chunk, recommended for real-time applications.
184
+ Note: Incurs 10% higher cost when enabled.
185
+ strip_headers (NotGivenOr[bool]): If enabled, the audio for the chunks of a generation.
186
+ Once concatenated together, will constitute a single audio file.
187
+ If disabled, each chunk’s audio will be its own audio file, each with its headers.
188
+ """
189
+
190
+ if is_given(utterance_options):
191
+ # text is required in PostedUtterance but it is declared as an empty string
192
+ # it will be overwritten when input tokens are received
193
+ self._opts.utterance_options = PostedUtterance(
194
+ description=utterance_options.description if utterance_options.description else "",
195
+ voice=utterance_options.voice if utterance_options.voice else DEFAULT_VOICE,
196
+ speed=utterance_options.speed if utterance_options.speed else 1,
197
+ trailing_silence=utterance_options.trailing_silence
198
+ if utterance_options.trailing_silence
199
+ else 0.35,
200
+ text="",
201
+ )
202
+ if is_given(format):
203
+ self._opts.format = format
204
+ if is_given(context):
205
+ self._opts.context = context
206
+ if is_given(split_utterances):
207
+ self._opts.split_utterances = split_utterances
208
+ if is_given(num_generations):
209
+ self._opts.num_generations = num_generations
210
+ if is_given(instant_mode):
211
+ self._opts.instant_mode = instant_mode
212
+ if is_given(strip_headers):
213
+ self._opts.strip_headers = strip_headers
214
+
215
+ def synthesize(
216
+ self,
217
+ text: str,
218
+ *,
219
+ conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
220
+ ) -> ChunkedStream:
221
+ return ChunkedStream(
222
+ tts=self,
223
+ input_text=text,
224
+ conn_options=conn_options,
225
+ opts=self._opts,
226
+ )
227
+
228
+
229
+ class ChunkedStream(tts.ChunkedStream):
230
+ """Stream for Hume TTS JSON streaming API."""
231
+
232
+ def __init__(
233
+ self,
234
+ *,
235
+ tts: TTS,
236
+ input_text: str,
237
+ opts: _TTSOptions,
238
+ conn_options: APIConnectOptions,
239
+ ) -> None:
240
+ super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
241
+ self._opts = opts
242
+ self._client = tts._client
243
+
244
+ async def _run(self) -> None:
245
+ request_id = utils.shortuuid()
246
+
247
+ decoder = utils.codecs.AudioStreamDecoder(
248
+ sample_rate=self._opts.sample_rate,
249
+ num_channels=DEFAULT_NUM_CHANNELS,
250
+ )
251
+
252
+ decode_task: asyncio.Task | None = None
253
+
254
+ try:
255
+
256
+ async def _decode_loop():
257
+ try:
258
+ async for chunk in self._client.tts.synthesize_json_streaming(
259
+ utterances=[
260
+ PostedUtterance(
261
+ text=self._input_text,
262
+ description=self._opts.utterance_options.description,
263
+ voice=self._opts.utterance_options.voice,
264
+ speed=self._opts.utterance_options.speed,
265
+ trailing_silence=self._opts.utterance_options.trailing_silence,
266
+ )
267
+ ],
268
+ context=self._opts.context,
269
+ format=self._opts.format,
270
+ num_generations=self._opts.num_generations,
271
+ split_utterances=self._opts.split_utterances,
272
+ instant_mode=self._opts.instant_mode,
273
+ strip_headers=self._opts.strip_headers,
274
+ ):
275
+ decoder.push(base64.b64decode(chunk.audio))
276
+
277
+ finally:
278
+ decoder.end_input()
279
+
280
+ decode_task = asyncio.create_task(_decode_loop())
281
+ emitter = tts.SynthesizedAudioEmitter(
282
+ event_ch=self._event_ch,
283
+ request_id=request_id,
284
+ )
285
+ async for frame in decoder:
286
+ emitter.push(frame)
287
+
288
+ emitter.flush()
289
+
290
+ except asyncio.TimeoutError:
291
+ raise APITimeoutError() from None
292
+ except Exception as e:
293
+ raise APIConnectionError() from e
294
+ finally:
295
+ if decode_task:
296
+ await utils.aio.gracefully_cancel(decode_task)
297
+ await decoder.aclose()
@@ -0,0 +1,15 @@
1
+ # Copyright 2024 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ __version__ = "1.0.17"
@@ -0,0 +1,34 @@
1
+ Metadata-Version: 2.4
2
+ Name: livekit-plugins-hume
3
+ Version: 1.0.17
4
+ Summary: Hume TTS plugin for LiveKit agents
5
+ Project-URL: Documentation, https://docs.livekit.io
6
+ Project-URL: Website, https://livekit.io/
7
+ Project-URL: Source, https://github.com/livekit/agents
8
+ Author-email: LiveKit <info@livekit.io>
9
+ License-Expression: Apache-2.0
10
+ Keywords: Hume,HumeAI,Octave,audio,livekit,realtime,webrtc
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3 :: Only
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Multimedia :: Sound/Audio
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.9.0
19
+ Requires-Dist: aiohttp>=3.8.0
20
+ Requires-Dist: hume
21
+ Requires-Dist: livekit-agents>=1.0.17
22
+ Description-Content-Type: text/markdown
23
+
24
+ # LiveKit Plugins Hume AI TTS
25
+
26
+ LiveKit Agents Framework plugin for [Hume](https://www.hume.ai/) Text-to-Speech API.
27
+
28
+ ## Installation
29
+
30
+ ```bash
31
+ pip install livekit-plugins-hume
32
+ ```
33
+
34
+ You will need an API Key from Hume, it can be set as an environment variable: `HUME_API_KEY`. You can get it from [here](https://platform.hume.ai/settings/keys)
@@ -0,0 +1,9 @@
1
+ livekit/plugins/hume/__init__.py,sha256=CdEjcQRVL3dBso4xBL-zOgCESSqwH0Xdb01VT35P8u0,1362
2
+ livekit/plugins/hume/log.py,sha256=TwpK1FOwgD6Jb0A2nl-9nIgi0q5qWo9HGDrDuV_2g0g,67
3
+ livekit/plugins/hume/models.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ livekit/plugins/hume/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
5
+ livekit/plugins/hume/tts.py,sha256=aVlp-PebRsIily2mcsCewuZzcgHKwzbBSYwHcFnSo0w,12029
6
+ livekit/plugins/hume/version.py,sha256=oT9vgJC1WR2E9D9qKy-VZ5neWTTotVE-IZcSbmiQP98,601
7
+ livekit_plugins_hume-1.0.17.dist-info/METADATA,sha256=EpRs_Biq7BWbNk8P-COP1Sgm0LqZiMd6L1Zp--oDsN8,1251
8
+ livekit_plugins_hume-1.0.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ livekit_plugins_hume-1.0.17.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any