livekit-plugins-hume 1.2.13__tar.gz → 1.2.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of livekit-plugins-hume might be problematic. Click here for more details.

@@ -172,4 +172,8 @@ pyrightconfig.json
172
172
  docs/
173
173
 
174
174
  # Database files
175
- *.db
175
+ *.db
176
+
177
+
178
+ # Examples for development
179
+ examples/dev/*
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: livekit-plugins-hume
3
- Version: 1.2.13
3
+ Version: 1.2.15
4
4
  Summary: Hume TTS plugin for LiveKit agents
5
5
  Project-URL: Documentation, https://docs.livekit.io
6
6
  Project-URL: Website, https://livekit.io/
@@ -17,7 +17,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
17
17
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
18
  Requires-Python: >=3.9.0
19
19
  Requires-Dist: aiohttp>=3.8.0
20
- Requires-Dist: livekit-agents>=1.2.13
20
+ Requires-Dist: livekit-agents>=1.2.15
21
21
  Description-Content-Type: text/markdown
22
22
 
23
23
  # Hume AI TTS plugin for LiveKit Agents
@@ -34,6 +34,7 @@ from .version import __version__
34
34
  __all__ = [
35
35
  "TTS",
36
36
  "AudioFormat",
37
+ "ModelVersion",
37
38
  "VoiceById",
38
39
  "VoiceByName",
39
40
  "VoiceProvider",
@@ -20,7 +20,7 @@ import json
20
20
  import os
21
21
  from dataclasses import dataclass, replace
22
22
  from enum import Enum
23
- from typing import Any, TypedDict
23
+ from typing import Any, Literal, TypedDict
24
24
 
25
25
  import aiohttp
26
26
 
@@ -73,6 +73,9 @@ class AudioFormat(str, Enum):
73
73
  pcm = "pcm"
74
74
 
75
75
 
76
+ ModelVersion = Literal["1", "2"]
77
+
78
+
76
79
  DEFAULT_HEADERS = {
77
80
  "X-Hume-Client-Name": "livekit",
78
81
  "X-Hume-Client-Version": __version__,
@@ -89,6 +92,7 @@ class _TTSOptions:
89
92
  api_key: str
90
93
  base_url: str
91
94
  voice: VoiceById | VoiceByName | None
95
+ model_version: ModelVersion | None
92
96
  description: str | None
93
97
  speed: float | None
94
98
  trailing_silence: float | None
@@ -106,6 +110,7 @@ class TTS(tts.TTS):
106
110
  *,
107
111
  api_key: str | None = None,
108
112
  voice: VoiceById | VoiceByName | None = DEFAULT_VOICE,
113
+ model_version: ModelVersion | None = "1",
109
114
  description: str | None = None,
110
115
  speed: float | None = None,
111
116
  trailing_silence: float | None = None,
@@ -121,10 +126,12 @@ class TTS(tts.TTS):
121
126
  Args:
122
127
  api_key: Hume AI API key. If not provided, will look for HUME_API_KEY environment
123
128
  variable.
124
- voice: A voice from the voice library specifed by name or id.
129
+ voice: A voice from the voice library specified by name or id.
130
+ model_version: Specifies which version of Octave to use. See Hume's documentation for
131
+ details on model version differences: https://dev.hume.ai/docs/text-to-speech-tts/overview.
125
132
  description: Natural language instructions describing how the synthesized speech
126
133
  should sound (≤1000 characters).
127
- speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
134
+ speed: Speed multiplier for the synthesized speech (≥0.5, ≤2.0, default: 1.0).
128
135
  trailing_silence: Duration of trailing silence (in seconds) to add to each utterance
129
136
  (≥0, ≤5.0, default: 0.35).
130
137
  context: Optional context for synthesis, either as text or list of utterances.
@@ -157,6 +164,7 @@ class TTS(tts.TTS):
157
164
  self._opts = _TTSOptions(
158
165
  api_key=key,
159
166
  voice=voice,
167
+ model_version=model_version,
160
168
  description=description,
161
169
  speed=speed,
162
170
  trailing_silence=trailing_silence,
@@ -195,10 +203,10 @@ class TTS(tts.TTS):
195
203
  """Update TTS options used for all future synthesis (until updated again)
196
204
 
197
205
  Args:
198
- voice: A voice from the voice library specifed by name or id.
206
+ voice: A voice from the voice library specified by name or id.
199
207
  description: Natural language instructions describing how the synthesized speech
200
208
  should sound (≤1000 characters).
201
- speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
209
+ speed: Speed multiplier for the synthesized speech (≥0.5, ≤2.0, default: 1.0).
202
210
  trailing_silence: Duration of trailing silence (in seconds) to add to each utterance.
203
211
  context: Optional context for synthesis, either as text or list of utterances.
204
212
  instant_mode: Whether to use instant mode.
@@ -247,6 +255,7 @@ class ChunkedStream(tts.ChunkedStream):
247
255
 
248
256
  payload: dict[str, Any] = {
249
257
  "utterances": [utterance],
258
+ "version": self._opts.model_version,
250
259
  "strip_headers": True,
251
260
  "instant_mode": self._opts.instant_mode,
252
261
  "format": {"type": self._opts.audio_format.value},
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.2.13"
15
+ __version__ = "1.2.15"
@@ -24,7 +24,7 @@ classifiers = [
24
24
  ]
25
25
  dependencies = [
26
26
  "aiohttp>=3.8.0",
27
- "livekit-agents>=1.2.13",
27
+ "livekit-agents>=1.2.15",
28
28
  ]
29
29
 
30
30
  [project.urls]