livekit-plugins-hume 1.0.18__py3-none-any.whl → 1.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-hume might be problematic. Click here for more details.

@@ -20,9 +20,7 @@ __version__ = "1.0.0"
20
20
  from hume.tts import (
21
21
  Format,
22
22
  PostedContext,
23
- PostedUtterance,
24
- PostedUtteranceVoiceWithId,
25
- PostedUtteranceVoiceWithName,
23
+ PostedUtteranceVoice,
26
24
  )
27
25
  from livekit.agents import Plugin
28
26
 
@@ -32,10 +30,8 @@ from .tts import TTS
32
30
  __all__ = [
33
31
  "TTS",
34
32
  "Format",
35
- "PostedUtterance",
36
33
  "PostedContext",
37
- "PostedUtteranceVoiceWithName",
38
- "PostedUtteranceVoiceWithId",
34
+ "PostedUtteranceVoice",
39
35
  ]
40
36
 
41
37
 
@@ -22,7 +22,7 @@ from dataclasses import dataclass
22
22
  import aiohttp
23
23
 
24
24
  from hume import AsyncHumeClient
25
- from hume.tts import Format, FormatWav, PostedContext, PostedUtterance
25
+ from hume.tts import Format, FormatWav, PostedContext, PostedUtterance, PostedUtteranceVoice
26
26
  from livekit.agents import (
27
27
  APIConnectionError,
28
28
  APIConnectOptions,
@@ -39,25 +39,21 @@ from livekit.agents.types import (
39
39
  from livekit.agents.utils import is_given
40
40
 
41
41
  # Default audio settings
42
- DEFAULT_SAMPLE_RATE = 24000
42
+ DEFAULT_SAMPLE_RATE = 48000
43
43
  DEFAULT_NUM_CHANNELS = 1
44
44
 
45
- # Default TTS settings
46
- DEFAULT_UTTERANCE = PostedUtterance(text="")
47
-
48
45
 
49
46
  @dataclass
50
47
  class _TTSOptions:
51
48
  """TTS options for Hume API"""
52
49
 
53
50
  api_key: str
54
- utterance_options: PostedUtterance
51
+ voice: PostedUtteranceVoice | None
52
+ description: str | None
53
+ speed: float | None
55
54
  context: PostedContext | None
56
55
  format: Format
57
- sample_rate: int
58
- split_utterances: bool
59
56
  strip_headers: bool
60
- num_generations: int
61
57
  instant_mode: bool
62
58
  word_tokenizer: tokenize.WordTokenizer
63
59
 
@@ -66,35 +62,37 @@ class TTS(tts.TTS):
66
62
  def __init__(
67
63
  self,
68
64
  *,
69
- utterance_options: NotGivenOr[PostedUtterance] = NOT_GIVEN,
65
+ voice: NotGivenOr[PostedUtteranceVoice] = NOT_GIVEN,
66
+ description: NotGivenOr[str] = NOT_GIVEN,
67
+ speed: NotGivenOr[float] = NOT_GIVEN,
70
68
  context: NotGivenOr[PostedContext] = NOT_GIVEN,
71
69
  format: NotGivenOr[Format] = NOT_GIVEN,
72
- split_utterances: bool = False,
73
- num_generations: int = 1,
74
70
  instant_mode: bool = False,
75
71
  strip_headers: bool = True,
76
72
  api_key: NotGivenOr[str] = NOT_GIVEN,
77
73
  word_tokenizer: tokenize.WordTokenizer | None = None,
78
74
  http_session: aiohttp.ClientSession | None = None,
79
- sample_rate: int = 24000,
80
75
  ) -> None:
81
76
  """Initialize the Hume TTS client.
82
77
 
83
78
  See https://dev.hume.ai/reference/text-to-speech-tts/synthesize-json-streaming for API doc
84
79
 
85
80
  Args:
86
- utterance_options (NotGivenOr[PostedUtterance]): Default options for utterances,
87
- including description, voice, and delivery controls.
81
+ voice (NotGivenOr[PostedUtteranceVoice]): The voice, specified by name or id, to be
82
+ used. When no voice is specified, a novel voice will be generated based on the
83
+ text and optionally provided description.
84
+ description (NotGivenOr[str]): Natural language instructions describing how the
85
+ synthesized speech should sound, including but not limited to tone, intonation,
86
+ pacing, and accent. If a Voice is specified in the request, this description
87
+ serves as acting instructions. If no Voice is specified, a new voice is generated
88
+ based on this description.
89
+ speed: (NotGivenOr[float]): Adjusts the relative speaking rate on a non-linear scale
90
+ from 0.25 (much slower) to 3.0 (much faster), where 1.0 represents normal speaking
91
+ pace.
88
92
  context (NotGivenOr[PostedContext]): Utterances to use as context for generating
89
93
  consistent speech style and prosody across multiple requests.
90
94
  format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
91
95
  Defaults to WAV format.
92
- split_utterances (bool): Controls how audio output is segmented in the response.
93
- When enabled (True), input utterances are split into natural-sounding segments.
94
- When disabled (False), maintains one-to-one mapping between input and output.
95
- Defaults to False.
96
- num_generations (int): Number of generations of the audio to produce.
97
- Must be between 1 and 5. Defaults to 1.
98
96
  instant_mode (bool): Enables ultra-low latency streaming, reducing time to first chunk.
99
97
  Recommended for real-time applications. Only for streaming endpoints.
100
98
  With this enabled, requests incur 10% higher cost. Defaults to False.
@@ -107,14 +105,13 @@ class TTS(tts.TTS):
107
105
  If None, a basic word tokenizer will be used.
108
106
  http_session (aiohttp.ClientSession | None): Optional HTTP session for API requests.
109
107
  If None, a new session will be created.
110
- sample_rate (int): Audio sample rate in Hz. Defaults to 24000.
111
108
  """
112
109
 
113
110
  super().__init__(
114
111
  capabilities=tts.TTSCapabilities(
115
112
  streaming=False,
116
113
  ),
117
- sample_rate=sample_rate,
114
+ sample_rate=DEFAULT_SAMPLE_RATE,
118
115
  num_channels=DEFAULT_NUM_CHANNELS,
119
116
  )
120
117
 
@@ -128,15 +125,12 @@ class TTS(tts.TTS):
128
125
  word_tokenizer = tokenize.basic.WordTokenizer(ignore_punctuation=False)
129
126
 
130
127
  self._opts = _TTSOptions(
131
- utterance_options=utterance_options
132
- if is_given(utterance_options)
133
- else DEFAULT_UTTERANCE,
128
+ voice=voice if is_given(voice) else None,
129
+ description=description if is_given(description) else None,
130
+ speed=speed if is_given(speed) else None,
134
131
  context=context if is_given(context) else None,
135
132
  format=format if is_given(format) else FormatWav(),
136
133
  api_key=self._api_key,
137
- sample_rate=self.sample_rate,
138
- split_utterances=split_utterances,
139
- num_generations=num_generations,
140
134
  strip_headers=strip_headers,
141
135
  instant_mode=instant_mode,
142
136
  word_tokenizer=word_tokenizer,
@@ -153,26 +147,31 @@ class TTS(tts.TTS):
153
147
  def update_options(
154
148
  self,
155
149
  *,
156
- utterance_options: NotGivenOr[PostedUtterance] = NOT_GIVEN,
150
+ voice: NotGivenOr[PostedUtteranceVoice] = NOT_GIVEN,
151
+ description: NotGivenOr[str] = NOT_GIVEN,
152
+ speed: NotGivenOr[float] = NOT_GIVEN,
157
153
  context: NotGivenOr[PostedContext] = NOT_GIVEN,
158
154
  format: NotGivenOr[Format] = NOT_GIVEN,
159
- split_utterances: NotGivenOr[bool] = NOT_GIVEN,
160
- num_generations: NotGivenOr[int] = NOT_GIVEN,
161
155
  instant_mode: NotGivenOr[bool] = NOT_GIVEN,
162
156
  strip_headers: NotGivenOr[bool] = NOT_GIVEN,
163
157
  ) -> None:
164
158
  """Update TTS options for synthesizing speech.
165
159
 
166
160
  Args:
167
- utterance_options (NotGivenOr[PostedUtterance]): Options for utterances,
168
- including text, description, voice, and additional controls.
161
+ voice (NotGivenOr[PostedUtteranceVoice]): The voice, specified by name or id, to be
162
+ used. When no voice is specified, a novel voice will be generated based on the
163
+ text and optionally provided description.
164
+ description (NotGivenOr[str]): Natural language instructions describing how the
165
+ synthesized speech should sound, including but not limited to tone, intonation,
166
+ pacing, and accent. If a Voice is specified in the request, this description
167
+ serves as acting instructions. If no Voice is specified, a new voice is generated
168
+ based on this description.
169
+ speed: (NotGivenOr[float]): Adjusts the relative speaking rate on a non-linear scale
170
+ from 0.25 (much slower) to 3.0 (much faster), where 1.0 represents normal speaking
171
+ pace.
169
172
  context (Optional[PostedContext]): Utterances to use as context for generating
170
173
  consistent speech style and prosody across multiple requests.
171
174
  format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
172
- split_utterances (NotGivenOr[bool]): Controls how audio output is segmented.
173
- When True, utterances are split into natural-sounding segments.
174
- When False, maintains one-to-one mapping between input and output.
175
- num_generations (NotGivenOr[int]): Number of speech generations to produce (1-5).
176
175
  instant_mode (NotGivenOr[bool]): Enables ultra-low latency streaming.
177
176
  Reduces time to first audio chunk, recommended for real-time applications.
178
177
  Note: Incurs 10% higher cost when enabled.
@@ -181,16 +180,16 @@ class TTS(tts.TTS):
181
180
  If disabled, each chunk’s audio will be its own audio file, each with its headers.
182
181
  """
183
182
 
184
- if is_given(utterance_options):
185
- self._opts.utterance_options = utterance_options
183
+ if is_given(voice):
184
+ self._opts.voice = voice
185
+ if is_given(description):
186
+ self._opts.description = description
187
+ if is_given(speed):
188
+ self._opts.speed = speed
186
189
  if is_given(format):
187
190
  self._opts.format = format
188
191
  if is_given(context):
189
192
  self._opts.context = context
190
- if is_given(split_utterances):
191
- self._opts.split_utterances = split_utterances
192
- if is_given(num_generations):
193
- self._opts.num_generations = num_generations
194
193
  if is_given(instant_mode):
195
194
  self._opts.instant_mode = instant_mode
196
195
  if is_given(strip_headers):
@@ -229,7 +228,7 @@ class ChunkedStream(tts.ChunkedStream):
229
228
  request_id = utils.shortuuid()
230
229
 
231
230
  decoder = utils.codecs.AudioStreamDecoder(
232
- sample_rate=self._opts.sample_rate,
231
+ sample_rate=DEFAULT_SAMPLE_RATE,
233
232
  num_channels=DEFAULT_NUM_CHANNELS,
234
233
  )
235
234
 
@@ -238,22 +237,24 @@ class ChunkedStream(tts.ChunkedStream):
238
237
  try:
239
238
 
240
239
  async def _decode_loop():
240
+ utterance_options = {
241
+ "voice": self._opts.voice,
242
+ "description": self._opts.description,
243
+ "speed": self._opts.speed,
244
+ }
245
+
246
+ utterance_kwargs = {
247
+ "text": self._input_text,
248
+ **{k: v for k, v in utterance_options.items() if v is not None},
249
+ }
250
+
241
251
  try:
252
+ utterance = PostedUtterance(**utterance_kwargs)
253
+
242
254
  async for chunk in self._client.tts.synthesize_json_streaming(
243
- utterances=[
244
- PostedUtterance(
245
- text=self._input_text,
246
- **{
247
- k: v
248
- for k, v in self._opts.utterance_options.__dict__.items()
249
- if v is not None and k != "text"
250
- },
251
- )
252
- ],
255
+ utterances=[utterance],
253
256
  context=self._opts.context,
254
257
  format=self._opts.format,
255
- num_generations=self._opts.num_generations,
256
- split_utterances=self._opts.split_utterances,
257
258
  instant_mode=self._opts.instant_mode,
258
259
  strip_headers=self._opts.strip_headers,
259
260
  ):
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.0.18"
15
+ __version__ = "1.0.19"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-hume
3
- Version: 1.0.18
3
+ Version: 1.0.19
4
4
  Summary: Hume TTS plugin for LiveKit agents
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -18,7 +18,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
18
  Requires-Python: >=3.9.0
19
19
  Requires-Dist: aiohttp>=3.8.0
20
20
  Requires-Dist: hume>=0.8.3
21
- Requires-Dist: livekit-agents>=1.0.18
21
+ Requires-Dist: livekit-agents>=1.0.19
22
22
  Description-Content-Type: text/markdown
23
23
 
24
24
  # LiveKit Plugins Hume AI TTS
@@ -0,0 +1,9 @@
1
+ livekit/plugins/hume/__init__.py,sha256=3pdWGwUA2DBleYqmHXrlicBv4QdBFlnMoNAQP8A5X-A,1236
2
+ livekit/plugins/hume/log.py,sha256=TwpK1FOwgD6Jb0A2nl-9nIgi0q5qWo9HGDrDuV_2g0g,67
3
+ livekit/plugins/hume/models.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ livekit/plugins/hume/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
5
+ livekit/plugins/hume/tts.py,sha256=JGUDNeVzPttrErCSP783bayByGBZraAHAZf23RgEz9A,11251
6
+ livekit/plugins/hume/version.py,sha256=KWKI0i88RsDNfgXXfCYo8ZHeBTwbIaCLBliemupGV-g,601
7
+ livekit_plugins_hume-1.0.19.dist-info/METADATA,sha256=Zub7XIe9XmmtnvOZajc7Q65QhB5J4A9zRJLFko2pWVE,1258
8
+ livekit_plugins_hume-1.0.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ livekit_plugins_hume-1.0.19.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- livekit/plugins/hume/__init__.py,sha256=CdEjcQRVL3dBso4xBL-zOgCESSqwH0Xdb01VT35P8u0,1362
2
- livekit/plugins/hume/log.py,sha256=TwpK1FOwgD6Jb0A2nl-9nIgi0q5qWo9HGDrDuV_2g0g,67
3
- livekit/plugins/hume/models.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- livekit/plugins/hume/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
5
- livekit/plugins/hume/tts.py,sha256=X6AQyrHYHw3_yMu_L2714IbbpTUWJiTuIAn_y5xa8Xk,11032
6
- livekit/plugins/hume/version.py,sha256=lZIiOQF6Av8ptH13BtppSBfb58nO8XlwJyzWslLTnKs,601
7
- livekit_plugins_hume-1.0.18.dist-info/METADATA,sha256=VEeLAiSyZIGehiZp5ZinYHvQj9_wRkWp0PZWFp83FJM,1258
8
- livekit_plugins_hume-1.0.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- livekit_plugins_hume-1.0.18.dist-info/RECORD,,