PyPI - livekit-plugins-hume - Versions diffs - 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

livekit-plugins-hume 1.1.1py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-hume might be problematic. Click here for more details.

Files changed (7) hide show

livekit/plugins/hume/__init__.py CHANGED Viewed

@@ -21,10 +21,24 @@ from __future__ import annotations
 from livekit.agents import Plugin
-from .tts import TTS, PostedContext, PostedUtterance
+from .tts import (
+    TTS,
+    AudioFormat,
+    Utterance,
+    VoiceById,
+    VoiceByName,
+    VoiceProvider,
+)
 from .version import __version__
-__all__ = ["TTS", "PostedContext", "PostedUtterance"]
+__all__ = [
+    "TTS",
+    "AudioFormat",
+    "VoiceById",
+    "VoiceByName",
+    "VoiceProvider",
+    "Utterance",
+]
 class HumeAIPlugin(Plugin):

livekit/plugins/hume/tts.py CHANGED Viewed

@@ -19,6 +19,7 @@ import base64
 import json
 import os
 from dataclasses import dataclass, replace
+from enum import Enum
 from typing import Any, TypedDict
 import aiohttp
@@ -27,32 +28,66 @@ from livekit.agents import APIConnectionError, APIConnectOptions, APITimeoutErro
 from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
 from livekit.agents.utils import is_given
-API_AUTH_HEADER = "X-Hume-Api-Key"
-STREAM_PATH = "/v0/tts/stream/json"
-DEFAULT_BASE_URL = "https://api.hume.ai"
+from .version import __version__
+class VoiceById(TypedDict, total=False):
+    id: str
+    provider: VoiceProvider | None
+class VoiceByName(TypedDict, total=False):
+    name: str
+    provider: VoiceProvider | None
-class PostedUtterance(TypedDict, total=False):
+class Utterance(TypedDict, total=False):
+    """Utterance for TTS synthesis."""
     text: str
-    description: str
-    voice: dict[str, Any]
-    speed: float
-    trailing_silence: float
+    description: str | None
+    speed: float | None
+    voice: VoiceById | VoiceByName | None
+    trailing_silence: float | None
+class VoiceProvider(str, Enum):
+    """Voice provider for the voice library."""
+    hume = "HUME_AI"
+    custom = "CUSTOM_VOICE"
+class AudioFormat(str, Enum):
+    """Audio format for the synthesized speech."""
+    mp3 = "mp3"
+    wav = "wav"
+    pcm = "pcm"
-class PostedContext(TypedDict, total=False):
-    utterances: list[PostedUtterance]
+DEFAULT_HEADERS = {
+    "X-Hume-Client-Name": "livekit",
+    "X-Hume-Client-Version": __version__,
+}
+API_AUTH_HEADER = "X-Hume-Api-Key"
+STREAM_PATH = "/v0/tts/stream/json"
+DEFAULT_BASE_URL = "https://api.hume.ai"
+SUPPORTED_SAMPLE_RATE = 48000
+DEFAULT_VOICE = VoiceByName(name="Male English Actor", provider=VoiceProvider.hume)
 @dataclass
 class _TTSOptions:
     api_key: str
-    utterance_options: PostedUtterance
-    context: PostedContext | None
-    sample_rate: int
-    split_utterances: bool
-    instant_mode: bool
     base_url: str
+    voice: VoiceById | VoiceByName | None
+    description: str | None
+    speed: float | None
+    trailing_silence: float | None
+    context: str | list[Utterance] | None
+    instant_mode: bool | None
+    audio_format: AudioFormat
     def http_url(self, path: str) -> str:
         return f"{self.base_url}{path}"
@@ -63,36 +98,64 @@ class TTS(tts.TTS):
         self,
         *,
         api_key: str | None = None,
-        utterance_options: NotGivenOr[PostedUtterance] = NOT_GIVEN,
-        split_utterances: bool = True,
-        instant_mode: bool = True,
-        sample_rate: int = 24000,
+        voice: VoiceById | VoiceByName | None = DEFAULT_VOICE,
+        description: str | None = None,
+        speed: float | None = None,
+        trailing_silence: float | None = None,
+        context: str | list[Utterance] | None = None,
+        instant_mode: NotGivenOr[bool] = NOT_GIVEN,
+        audio_format: AudioFormat = AudioFormat.mp3,
         base_url: str = DEFAULT_BASE_URL,
         http_session: aiohttp.ClientSession | None = None,
     ):
+        """Initialize the Hume AI TTS client. Options will be used for all future synthesis
+        (until updated with update_options).
+        Args:
+            api_key: Hume AI API key. If not provided, will look for HUME_API_KEY environment
+                variable.
+            voice: A voice from the voice library specifed by name or id.
+            description: Natural language instructions describing how the synthesized speech
+                should sound (≤1000 characters).
+            speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
+            trailing_silence: Duration of trailing silence (in seconds) to add to each utterance
+                (≥0, ≤5.0, default: 0.35).
+            context: Optional context for synthesis, either as text or list of utterances.
+            instant_mode: Whether to use instant mode. Defaults to True if voice specified,
+                False otherwise. Requires a voice to be specified when enabled.
+            audio_format: Output audio format (mp3, wav, or pcm). Defaults to mp3.
+            base_url: Base URL for Hume AI API. Defaults to https://api.hume.ai
+            http_session: Optional aiohttp ClientSession to use for requests.
+        """
         super().__init__(
-            capabilities=tts.TTSCapabilities(streaming=True),
-            sample_rate=sample_rate,
+            capabilities=tts.TTSCapabilities(streaming=False),
+            sample_rate=SUPPORTED_SAMPLE_RATE,
             num_channels=1,
         )
         key = api_key or os.environ.get("HUME_API_KEY")
         if not key:
             raise ValueError("Hume API key is required via api_key or HUME_API_KEY env var")
-        default_utterance: PostedUtterance = {
-            "speed": 1.0,
-            "trailing_silence": 0.35,
-        }
-        if is_given(utterance_options):
-            default_utterance.update(utterance_options)
+        has_voice = voice is not None
+        # Default instant_mode is True if a voice is specified, otherwise False
+        # (Hume API requires a voice for instant mode)
+        if not is_given(instant_mode):
+            resolved_instant_mode = has_voice
+        elif instant_mode and not has_voice:
+            raise ValueError("Hume TTS: instant_mode cannot be enabled without specifying a voice")
+        else:
+            resolved_instant_mode = instant_mode
         self._opts = _TTSOptions(
             api_key=key,
-            utterance_options=default_utterance,
-            context=None,
-            sample_rate=sample_rate,
-            split_utterances=split_utterances,
-            instant_mode=instant_mode,
+            voice=voice,
+            description=description,
+            speed=speed,
+            trailing_silence=trailing_silence,
+            context=context,
+            instant_mode=resolved_instant_mode,
+            audio_format=audio_format,
             base_url=base_url,
         )
         self._session = http_session
@@ -106,19 +169,40 @@ class TTS(tts.TTS):
     def update_options(
         self,
         *,
-        utterance_options: NotGivenOr[PostedUtterance] = NOT_GIVEN,
-        context: NotGivenOr[PostedContext] = NOT_GIVEN,
-        split_utterances: NotGivenOr[bool] = NOT_GIVEN,
+        description: NotGivenOr[str | None] = NOT_GIVEN,
+        speed: NotGivenOr[float | None] = NOT_GIVEN,
+        voice: NotGivenOr[VoiceById | VoiceByName | None] = NOT_GIVEN,
+        trailing_silence: NotGivenOr[float | None] = NOT_GIVEN,
+        context: NotGivenOr[str | list[Utterance] | None] = NOT_GIVEN,
         instant_mode: NotGivenOr[bool] = NOT_GIVEN,
+        audio_format: NotGivenOr[AudioFormat] = NOT_GIVEN,
     ) -> None:
-        if is_given(utterance_options):
-            self._opts.utterance_options = utterance_options
-        if is_given(context):  #
-            self._opts.context = context
-        if is_given(split_utterances):
-            self._opts.split_utterances = split_utterances
+        """Update TTS options used for all future synthesis (until updated again)
+        Args:
+            voice: A voice from the voice library specifed by name or id.
+            description: Natural language instructions describing how the synthesized speech
+                should sound (≤1000 characters).
+            speed: Speed multiplier for the synthesized speech (≥0.25, ≤3.0, default: 1.0).
+            trailing_silence: Duration of trailing silence (in seconds) to add to each utterance.
+            context: Optional context for synthesis, either as text or list of utterances.
+            instant_mode: Whether to use instant mode.
+            audio_format: Output audio format (mp3, wav, or pcm).
+        """
+        if is_given(description):
+            self._opts.description = description
+        if is_given(speed):
+            self._opts.speed = speed
+        if is_given(voice):
+            self._opts.voice = voice  # type: ignore
+        if is_given(trailing_silence):
+            self._opts.trailing_silence = trailing_silence
+        if is_given(context):
+            self._opts.context = context  # type: ignore
         if is_given(instant_mode):
             self._opts.instant_mode = instant_mode
+        if is_given(audio_format):
+            self._opts.audio_format = audio_format
     def synthesize(
         self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
@@ -133,34 +217,46 @@ class ChunkedStream(tts.ChunkedStream):
         self._opts = replace(tts._opts)
     async def _run(self, output_emitter: tts.AudioEmitter) -> None:
-        utterance: PostedUtterance = {"text": self._input_text}
-        utterance.update(self._opts.utterance_options)
+        utterance: Utterance = {
+            "text": self._input_text,
+        }
+        if self._opts.voice:
+            utterance["voice"] = self._opts.voice
+        if self._opts.description:
+            utterance["description"] = self._opts.description
+        if self._opts.speed:
+            utterance["speed"] = self._opts.speed
+        if self._opts.trailing_silence:
+            utterance["trailing_silence"] = self._opts.trailing_silence
         payload: dict[str, Any] = {
             "utterances": [utterance],
-            "split_utterances": self._opts.split_utterances,
             "strip_headers": True,
             "instant_mode": self._opts.instant_mode,
-            "format": {"type": "mp3"},
+            "format": {"type": self._opts.audio_format.value},
         }
-        if self._opts.context:
-            payload["context"] = self._opts.context
+        if isinstance(self._opts.context, str):
+            payload["context"] = {"generation_id": self._opts.context}
+        elif isinstance(self._opts.context, list):
+            payload["context"] = {"utterances": self._opts.context}
         try:
             async with self._tts._ensure_session().post(
                 self._opts.http_url(STREAM_PATH),
-                headers={API_AUTH_HEADER: self._opts.api_key},
+                headers={**DEFAULT_HEADERS, API_AUTH_HEADER: self._opts.api_key},
                 json=payload,
                 timeout=aiohttp.ClientTimeout(total=None, sock_connect=self._conn_options.timeout),
                 # large read_bufsize to avoid `ValueError: Chunk too big`
                 read_bufsize=10 * 1024 * 1024,
             ) as resp:
                 resp.raise_for_status()
                 output_emitter.initialize(
                     request_id=utils.shortuuid(),
-                    sample_rate=self._opts.sample_rate,
+                    sample_rate=SUPPORTED_SAMPLE_RATE,
                     num_channels=self._tts.num_channels,
-                    mime_type="audio/mp3",
+                    mime_type=f"audio/{self._opts.audio_format.value}",
                 )
                 async for raw_line in resp.content:
@@ -174,6 +270,7 @@ class ChunkedStream(tts.ChunkedStream):
                         output_emitter.push(base64.b64decode(audio_b64))
                 output_emitter.flush()
         except asyncio.TimeoutError:
             raise APITimeoutError() from None
         except Exception as e:

livekit/plugins/hume/version.py CHANGED Viewed

@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.1.1"
+__version__ = "1.1.3"

{livekit_plugins_hume-1.1.1.dist-info → livekit_plugins_hume-1.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: livekit-plugins-hume
-Version: 1.1.1
+Version: 1.1.3
 Summary: Hume TTS plugin for LiveKit agents
 Project-URL: Documentation, https://docs.livekit.io
 Project-URL: Website, https://livekit.io/
@@ -17,7 +17,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0
 Requires-Dist: aiohttp>=3.8.0
-Requires-Dist: livekit-agents>=1.1.1
+Requires-Dist: livekit-agents>=1.1.3
 Description-Content-Type: text/markdown
 # Hume AI TTS plugin for LiveKit Agents

livekit_plugins_hume-1.1.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+livekit/plugins/hume/__init__.py,sha256=yYTwSJaYq5ufZ_EnoSuLa2FfSsnOZu-swAzYjNQAhhw,1374
+livekit/plugins/hume/log.py,sha256=TwpK1FOwgD6Jb0A2nl-9nIgi0q5qWo9HGDrDuV_2g0g,67
+livekit/plugins/hume/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
+livekit/plugins/hume/tts.py,sha256=VYduFRxndfE0R-3A_Pt16pvcLd80VWnUJIda4iQBgPo,10301
+livekit/plugins/hume/version.py,sha256=_-4Ui7Aa9dmOTog-I15Ct4mtOs5t7T2_Bi2bMdIRvcE,600
+livekit_plugins_hume-1.1.3.dist-info/METADATA,sha256=mva7Jg6oH8gcaSLpz_gPo1aWY06gg99THwDhi2X5QpU,1354
+livekit_plugins_hume-1.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_hume-1.1.3.dist-info/RECORD,,

livekit_plugins_hume-1.1.1.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-livekit/plugins/hume/__init__.py,sha256=--F5e6CdoZM8eyw5ca-H-khoKdDJxdflwvrMCSwAHws,1250
-livekit/plugins/hume/log.py,sha256=TwpK1FOwgD6Jb0A2nl-9nIgi0q5qWo9HGDrDuV_2g0g,67
-livekit/plugins/hume/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
-livekit/plugins/hume/tts.py,sha256=ZnVqxzzs75OpHe_YDMr5X_BgZlZRlQSiCYs0z1Yq5gg,6128
-livekit/plugins/hume/version.py,sha256=NTkUKR1fwMpJvRho7A_ZH0gQcK_2G7aizsjhjTXvZf0,600
-livekit_plugins_hume-1.1.1.dist-info/METADATA,sha256=q4oiVeukrGm3GXaPrcjekgQ2j73uacW1WYfI2DcjXXo,1354
-livekit_plugins_hume-1.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-livekit_plugins_hume-1.1.1.dist-info/RECORD,,

{livekit_plugins_hume-1.1.1.dist-info → livekit_plugins_hume-1.1.3.dist-info}/WHEEL RENAMED Viewed

File without changes

livekit-plugins-hume 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl

Potentially problematic release.

livekit-plugins-hume 1.1.1py3-none-any.whl → 1.1.3py3-none-any.whl