PyPI - livekit-plugins-hume - Versions diffs - 1.0.17__py3-none-any.whl - Mend

livekit-plugins-hume 1.0.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of livekit-plugins-hume might be problematic. Click here for more details.

Files changed (9) hide show

livekit/plugins/hume/__init__.py +56 -0
livekit/plugins/hume/log.py +3 -0
livekit/plugins/hume/models.py +0 -0
livekit/plugins/hume/py.typed +1 -0
livekit/plugins/hume/tts.py +297 -0
livekit/plugins/hume/version.py +15 -0
livekit_plugins_hume-1.0.17.dist-info/METADATA +34 -0
livekit_plugins_hume-1.0.17.dist-info/RECORD +9 -0
livekit_plugins_hume-1.0.17.dist-info/WHEEL +4 -0

livekit/plugins/hume/__init__.py ADDED Viewed

@@ -0,0 +1,56 @@
+# Copyright 2023 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+__version__ = "1.0.0"
+# make imports available
+from hume.tts import (
+    Format,
+    PostedContext,
+    PostedUtterance,
+    PostedUtteranceVoiceWithId,
+    PostedUtteranceVoiceWithName,
+)
+from livekit.agents import Plugin
+from .tts import TTS
+# all exports
+__all__ = [
+    "TTS",
+    "Format",
+    "PostedUtterance",
+    "PostedContext",
+    "PostedUtteranceVoiceWithName",
+    "PostedUtteranceVoiceWithId",
+]
+class HumeAIPlugin(Plugin):
+    def __init__(self) -> None:
+        super().__init__(__name__, __version__, __package__)
+Plugin.register_plugin(HumeAIPlugin())
+# Cleanup docs of unexported modules
+_module = dir()
+NOT_IN_ALL = [m for m in _module if m not in __all__]
+__pdoc__ = {}
+for n in NOT_IN_ALL:
+    __pdoc__[n] = False

livekit/plugins/hume/log.py ADDED Viewed

@@ -0,0 +1,3 @@
+import logging
+logger = logging.getLogger("livekit.plugins.hume")

livekit/plugins/hume/models.py ADDED Viewed

File without changes

livekit/plugins/hume/py.typed ADDED Viewed

	@@ -0,0 +1 @@
1	+

livekit/plugins/hume/tts.py ADDED Viewed

@@ -0,0 +1,297 @@
+# Copyright 2023 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import asyncio
+import base64
+import os
+from dataclasses import dataclass
+import aiohttp
+from hume import AsyncHumeClient
+from hume.tts import Format, FormatWav, PostedContext, PostedUtterance, PostedUtteranceVoiceWithName
+from livekit.agents import (
+    APIConnectionError,
+    APIConnectOptions,
+    APITimeoutError,
+    tokenize,
+    tts,
+    utils,
+)
+from livekit.agents.types import (
+    DEFAULT_API_CONNECT_OPTIONS,
+    NOT_GIVEN,
+    NotGivenOr,
+)
+from livekit.agents.utils import is_given
+# Default audio settings
+DEFAULT_SAMPLE_RATE = 24000
+DEFAULT_NUM_CHANNELS = 1
+# Default TTS settings
+DEFAULT_VOICE = PostedUtteranceVoiceWithName(name="Colton Rivers", provider="HUME_AI")
+# text is required in PostedUtterance but it is declared as an empty string
+# it will be overwritten when input tokens are received
+DEFAULT_UTTERANCE = PostedUtterance(
+    voice=DEFAULT_VOICE, speed=1, trailing_silence=0.35, description="", text=""
+)
+@dataclass
+class _TTSOptions:
+    """TTS options for Hume API"""
+    api_key: str
+    utterance_options: PostedUtterance
+    context: PostedContext | None
+    format: Format
+    sample_rate: int
+    split_utterances: bool
+    strip_headers: bool
+    num_generations: int
+    instant_mode: bool
+    word_tokenizer: tokenize.WordTokenizer
+class TTS(tts.TTS):
+    def __init__(
+        self,
+        *,
+        utterance_options: NotGivenOr[PostedUtterance] = NOT_GIVEN,
+        context: NotGivenOr[PostedContext] = NOT_GIVEN,
+        format: NotGivenOr[Format] = NOT_GIVEN,
+        split_utterances: bool = False,
+        num_generations: int = 1,
+        instant_mode: bool = False,
+        strip_headers: bool = True,
+        api_key: NotGivenOr[str] = NOT_GIVEN,
+        word_tokenizer: tokenize.WordTokenizer | None = None,
+        http_session: aiohttp.ClientSession | None = None,
+        sample_rate: int = 24000,
+    ) -> None:
+        """Initialize the Hume TTS client.
+        See https://dev.hume.ai/reference/text-to-speech-tts/synthesize-json-streaming for API doc
+        Args:
+            utterance_options (NotGivenOr[PostedUtterance]): Default options for utterances,
+                including description, voice, and delivery controls.
+            context (NotGivenOr[PostedContext]): Utterances to use as context for generating
+                consistent speech style and prosody across multiple requests.
+            format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
+                Defaults to WAV format.
+            split_utterances (bool): Controls how audio output is segmented in the response.
+                When enabled (True), input utterances are split into natural-sounding segments.
+                When disabled (False), maintains one-to-one mapping between input and output.
+                Defaults to False.
+            num_generations (int): Number of generations of the audio to produce.
+                Must be between 1 and 5. Defaults to 1.
+            instant_mode (bool): Enables ultra-low latency streaming, reducing time to first chunk.
+                Recommended for real-time applications. Only for streaming endpoints.
+                With this enabled, requests incur 10% higher cost. Defaults to False.
+            strip_headers (bool): If enabled, the audio for all the chunks of a generation.
+                Once concatenated together, will constitute a single audio file.
+                If disabled, each chunk’s audio will be its own audio file, each with its headers.
+            api_key (NotGivenOr[str]): Hume API key for authentication. If not provided,
+                will attempt to read from HUME_API_KEY environment variable.
+            word_tokenizer (tokenize.WordTokenizer | None): Custom word tokenizer to use for text.
+                If None, a basic word tokenizer will be used.
+            http_session (aiohttp.ClientSession | None): Optional HTTP session for API requests.
+                If None, a new session will be created.
+            sample_rate (int): Audio sample rate in Hz. Defaults to 24000.
+        """
+        super().__init__(
+            capabilities=tts.TTSCapabilities(
+                streaming=False,
+            ),
+            sample_rate=sample_rate,
+            num_channels=DEFAULT_NUM_CHANNELS,
+        )
+        self._api_key = api_key if is_given(api_key) else os.environ.get("HUME_API_KEY")
+        if not self._api_key:
+            raise ValueError(
+                "Hume API key is required, either as argument or set HUME_API_KEY env variable"
+            )
+        if not word_tokenizer:
+            word_tokenizer = tokenize.basic.WordTokenizer(ignore_punctuation=False)
+        self._opts = _TTSOptions(
+            utterance_options=utterance_options
+            if is_given(utterance_options)
+            else DEFAULT_UTTERANCE,
+            context=context if is_given(context) else None,
+            format=format if is_given(format) else FormatWav(),
+            api_key=self._api_key,
+            sample_rate=self.sample_rate,
+            split_utterances=split_utterances,
+            num_generations=num_generations,
+            strip_headers=strip_headers,
+            instant_mode=instant_mode,
+            word_tokenizer=word_tokenizer,
+        )
+        self._client = AsyncHumeClient(api_key=self._api_key)
+        self._session = http_session
+    def _ensure_session(self) -> aiohttp.ClientSession:
+        if not self._session:
+            self._session = utils.http_context.http_session()
+        return self._session
+    def update_options(
+        self,
+        *,
+        utterance_options: NotGivenOr[PostedUtterance] = NOT_GIVEN,
+        context: NotGivenOr[PostedContext] = NOT_GIVEN,
+        format: NotGivenOr[Format] = NOT_GIVEN,
+        split_utterances: NotGivenOr[bool] = NOT_GIVEN,
+        num_generations: NotGivenOr[int] = NOT_GIVEN,
+        instant_mode: NotGivenOr[bool] = NOT_GIVEN,
+        strip_headers: NotGivenOr[bool] = NOT_GIVEN,
+    ) -> None:
+        """Update TTS options for synthesizing speech.
+        Args:
+            utterance_options (NotGivenOr[PostedUtterance]): Options for utterances,
+                including text, description, voice, and additional controls.
+            context (Optional[PostedContext]): Utterances to use as context for generating
+                consistent speech style and prosody across multiple requests.
+            format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
+            split_utterances (NotGivenOr[bool]): Controls how audio output is segmented.
+                When True, utterances are split into natural-sounding segments.
+                When False, maintains one-to-one mapping between input and output.
+            num_generations (NotGivenOr[int]): Number of speech generations to produce (1-5).
+            instant_mode (NotGivenOr[bool]): Enables ultra-low latency streaming.
+                Reduces time to first audio chunk, recommended for real-time applications.
+                Note: Incurs 10% higher cost when enabled.
+            strip_headers (NotGivenOr[bool]): If enabled, the audio for the chunks of a generation.
+                Once concatenated together, will constitute a single audio file.
+                If disabled, each chunk’s audio will be its own audio file, each with its headers.
+        """
+        if is_given(utterance_options):
+            # text is required in PostedUtterance but it is declared as an empty string
+            # it will be overwritten when input tokens are received
+            self._opts.utterance_options = PostedUtterance(
+                description=utterance_options.description if utterance_options.description else "",
+                voice=utterance_options.voice if utterance_options.voice else DEFAULT_VOICE,
+                speed=utterance_options.speed if utterance_options.speed else 1,
+                trailing_silence=utterance_options.trailing_silence
+                if utterance_options.trailing_silence
+                else 0.35,
+                text="",
+            )
+        if is_given(format):
+            self._opts.format = format
+        if is_given(context):
+            self._opts.context = context
+        if is_given(split_utterances):
+            self._opts.split_utterances = split_utterances
+        if is_given(num_generations):
+            self._opts.num_generations = num_generations
+        if is_given(instant_mode):
+            self._opts.instant_mode = instant_mode
+        if is_given(strip_headers):
+            self._opts.strip_headers = strip_headers
+    def synthesize(
+        self,
+        text: str,
+        *,
+        conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
+    ) -> ChunkedStream:
+        return ChunkedStream(
+            tts=self,
+            input_text=text,
+            conn_options=conn_options,
+            opts=self._opts,
+        )
+class ChunkedStream(tts.ChunkedStream):
+    """Stream for Hume TTS JSON streaming API."""
+    def __init__(
+        self,
+        *,
+        tts: TTS,
+        input_text: str,
+        opts: _TTSOptions,
+        conn_options: APIConnectOptions,
+    ) -> None:
+        super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
+        self._opts = opts
+        self._client = tts._client
+    async def _run(self) -> None:
+        request_id = utils.shortuuid()
+        decoder = utils.codecs.AudioStreamDecoder(
+            sample_rate=self._opts.sample_rate,
+            num_channels=DEFAULT_NUM_CHANNELS,
+        )
+        decode_task: asyncio.Task | None = None
+        try:
+            async def _decode_loop():
+                try:
+                    async for chunk in self._client.tts.synthesize_json_streaming(
+                        utterances=[
+                            PostedUtterance(
+                                text=self._input_text,
+                                description=self._opts.utterance_options.description,
+                                voice=self._opts.utterance_options.voice,
+                                speed=self._opts.utterance_options.speed,
+                                trailing_silence=self._opts.utterance_options.trailing_silence,
+                            )
+                        ],
+                        context=self._opts.context,
+                        format=self._opts.format,
+                        num_generations=self._opts.num_generations,
+                        split_utterances=self._opts.split_utterances,
+                        instant_mode=self._opts.instant_mode,
+                        strip_headers=self._opts.strip_headers,
+                    ):
+                        decoder.push(base64.b64decode(chunk.audio))
+                finally:
+                    decoder.end_input()
+            decode_task = asyncio.create_task(_decode_loop())
+            emitter = tts.SynthesizedAudioEmitter(
+                event_ch=self._event_ch,
+                request_id=request_id,
+            )
+            async for frame in decoder:
+                emitter.push(frame)
+            emitter.flush()
+        except asyncio.TimeoutError:
+            raise APITimeoutError() from None
+        except Exception as e:
+            raise APIConnectionError() from e
+        finally:
+            if decode_task:
+                await utils.aio.gracefully_cancel(decode_task)
+            await decoder.aclose()

livekit/plugins/hume/version.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright 2024 LiveKit, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__version__ = "1.0.17"

livekit_plugins_hume-1.0.17.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,34 @@
+Metadata-Version: 2.4
+Name: livekit-plugins-hume
+Version: 1.0.17
+Summary: Hume TTS plugin for LiveKit agents
+Project-URL: Documentation, https://docs.livekit.io
+Project-URL: Website, https://livekit.io/
+Project-URL: Source, https://github.com/livekit/agents
+Author-email: LiveKit <info@livekit.io>
+License-Expression: Apache-2.0
+Keywords: Hume,HumeAI,Octave,audio,livekit,realtime,webrtc
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Multimedia :: Sound/Audio
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.9.0
+Requires-Dist: aiohttp>=3.8.0
+Requires-Dist: hume
+Requires-Dist: livekit-agents>=1.0.17
+Description-Content-Type: text/markdown
+# LiveKit Plugins Hume AI TTS
+LiveKit Agents Framework plugin for [Hume](https://www.hume.ai/) Text-to-Speech API.
+## Installation
+```bash
+pip install livekit-plugins-hume
+```
+You will need an API Key from Hume, it can be set as an environment variable: `HUME_API_KEY`. You can get it from [here](https://platform.hume.ai/settings/keys)

livekit_plugins_hume-1.0.17.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+livekit/plugins/hume/__init__.py,sha256=CdEjcQRVL3dBso4xBL-zOgCESSqwH0Xdb01VT35P8u0,1362
+livekit/plugins/hume/log.py,sha256=TwpK1FOwgD6Jb0A2nl-9nIgi0q5qWo9HGDrDuV_2g0g,67
+livekit/plugins/hume/models.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+livekit/plugins/hume/py.typed,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
+livekit/plugins/hume/tts.py,sha256=aVlp-PebRsIily2mcsCewuZzcgHKwzbBSYwHcFnSo0w,12029
+livekit/plugins/hume/version.py,sha256=oT9vgJC1WR2E9D9qKy-VZ5neWTTotVE-IZcSbmiQP98,601
+livekit_plugins_hume-1.0.17.dist-info/METADATA,sha256=EpRs_Biq7BWbNk8P-COP1Sgm0LqZiMd6L1Zp--oDsN8,1251
+livekit_plugins_hume-1.0.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+livekit_plugins_hume-1.0.17.dist-info/RECORD,,

livekit_plugins_hume-1.0.17.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.27.0
+Root-Is-Purelib: true
+Tag: py3-none-any