livekit-plugins-hume 1.0.17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of livekit-plugins-hume might be problematic. Click here for more details.
- livekit_plugins_hume-1.0.17/.gitignore +168 -0
- livekit_plugins_hume-1.0.17/PKG-INFO +34 -0
- livekit_plugins_hume-1.0.17/README.md +11 -0
- livekit_plugins_hume-1.0.17/livekit/plugins/hume/__init__.py +56 -0
- livekit_plugins_hume-1.0.17/livekit/plugins/hume/log.py +3 -0
- livekit_plugins_hume-1.0.17/livekit/plugins/hume/models.py +0 -0
- livekit_plugins_hume-1.0.17/livekit/plugins/hume/py.typed +1 -0
- livekit_plugins_hume-1.0.17/livekit/plugins/hume/tts.py +297 -0
- livekit_plugins_hume-1.0.17/livekit/plugins/hume/version.py +15 -0
- livekit_plugins_hume-1.0.17/pyproject.toml +43 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
**/.vscode
|
|
2
|
+
**/.DS_Store
|
|
3
|
+
|
|
4
|
+
# Byte-compiled / optimized / DLL files
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.py[cod]
|
|
7
|
+
*$py.class
|
|
8
|
+
|
|
9
|
+
# C extensions
|
|
10
|
+
*.so
|
|
11
|
+
|
|
12
|
+
# Distribution / packaging
|
|
13
|
+
.Python
|
|
14
|
+
build/
|
|
15
|
+
develop-eggs/
|
|
16
|
+
dist/
|
|
17
|
+
downloads/
|
|
18
|
+
eggs/
|
|
19
|
+
.eggs/
|
|
20
|
+
lib/
|
|
21
|
+
lib64/
|
|
22
|
+
parts/
|
|
23
|
+
sdist/
|
|
24
|
+
var/
|
|
25
|
+
wheels/
|
|
26
|
+
share/python-wheels/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
.installed.cfg
|
|
29
|
+
*.egg
|
|
30
|
+
MANIFEST
|
|
31
|
+
|
|
32
|
+
# PyInstaller
|
|
33
|
+
# Usually these files are written by a python script from a template
|
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
35
|
+
*.manifest
|
|
36
|
+
*.spec
|
|
37
|
+
|
|
38
|
+
# Installer logs
|
|
39
|
+
pip-log.txt
|
|
40
|
+
pip-delete-this-directory.txt
|
|
41
|
+
|
|
42
|
+
# Unit test / coverage reports
|
|
43
|
+
htmlcov/
|
|
44
|
+
.tox/
|
|
45
|
+
.nox/
|
|
46
|
+
.coverage
|
|
47
|
+
.coverage.*
|
|
48
|
+
.cache
|
|
49
|
+
nosetests.xml
|
|
50
|
+
coverage.xml
|
|
51
|
+
*.cover
|
|
52
|
+
*.py,cover
|
|
53
|
+
.hypothesis/
|
|
54
|
+
.pytest_cache/
|
|
55
|
+
cover/
|
|
56
|
+
|
|
57
|
+
# Translations
|
|
58
|
+
*.mo
|
|
59
|
+
*.pot
|
|
60
|
+
|
|
61
|
+
# Django stuff:
|
|
62
|
+
*.log
|
|
63
|
+
local_settings.py
|
|
64
|
+
db.sqlite3
|
|
65
|
+
db.sqlite3-journal
|
|
66
|
+
|
|
67
|
+
# Flask stuff:
|
|
68
|
+
instance/
|
|
69
|
+
.webassets-cache
|
|
70
|
+
|
|
71
|
+
# Scrapy stuff:
|
|
72
|
+
.scrapy
|
|
73
|
+
|
|
74
|
+
# Sphinx documentation
|
|
75
|
+
docs/_build/
|
|
76
|
+
|
|
77
|
+
# PyBuilder
|
|
78
|
+
.pybuilder/
|
|
79
|
+
target/
|
|
80
|
+
|
|
81
|
+
# Jupyter Notebook
|
|
82
|
+
.ipynb_checkpoints
|
|
83
|
+
|
|
84
|
+
# IPython
|
|
85
|
+
profile_default/
|
|
86
|
+
ipython_config.py
|
|
87
|
+
|
|
88
|
+
# pyenv
|
|
89
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
90
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
91
|
+
# .python-version
|
|
92
|
+
|
|
93
|
+
# pipenv
|
|
94
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
95
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
96
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
97
|
+
# install all needed dependencies.
|
|
98
|
+
#Pipfile.lock
|
|
99
|
+
|
|
100
|
+
# poetry
|
|
101
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
102
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
103
|
+
# commonly ignored for libraries.
|
|
104
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
105
|
+
#poetry.lock
|
|
106
|
+
|
|
107
|
+
# pdm
|
|
108
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
109
|
+
#pdm.lock
|
|
110
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
111
|
+
# in version control.
|
|
112
|
+
# https://pdm.fming.dev/#use-with-ide
|
|
113
|
+
.pdm.toml
|
|
114
|
+
|
|
115
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
116
|
+
__pypackages__/
|
|
117
|
+
|
|
118
|
+
# Celery stuff
|
|
119
|
+
celerybeat-schedule
|
|
120
|
+
celerybeat.pid
|
|
121
|
+
|
|
122
|
+
# SageMath parsed files
|
|
123
|
+
*.sage.py
|
|
124
|
+
|
|
125
|
+
# Environments
|
|
126
|
+
.env
|
|
127
|
+
.venv
|
|
128
|
+
env/
|
|
129
|
+
venv/
|
|
130
|
+
ENV/
|
|
131
|
+
env.bak/
|
|
132
|
+
venv.bak/
|
|
133
|
+
|
|
134
|
+
# Spyder project settings
|
|
135
|
+
.spyderproject
|
|
136
|
+
.spyproject
|
|
137
|
+
|
|
138
|
+
# Rope project settings
|
|
139
|
+
.ropeproject
|
|
140
|
+
|
|
141
|
+
# mkdocs documentation
|
|
142
|
+
/site
|
|
143
|
+
|
|
144
|
+
# mypy
|
|
145
|
+
.mypy_cache/
|
|
146
|
+
.dmypy.json
|
|
147
|
+
dmypy.json
|
|
148
|
+
|
|
149
|
+
# Pyre type checker
|
|
150
|
+
.pyre/
|
|
151
|
+
|
|
152
|
+
# pytype static type analyzer
|
|
153
|
+
.pytype/
|
|
154
|
+
|
|
155
|
+
# Cython debug symbols
|
|
156
|
+
cython_debug/
|
|
157
|
+
|
|
158
|
+
# PyCharm
|
|
159
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
160
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
161
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
162
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
163
|
+
.idea/
|
|
164
|
+
|
|
165
|
+
node_modules
|
|
166
|
+
|
|
167
|
+
credentials.json
|
|
168
|
+
pyrightconfig.json
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: livekit-plugins-hume
|
|
3
|
+
Version: 1.0.17
|
|
4
|
+
Summary: Hume TTS plugin for LiveKit agents
|
|
5
|
+
Project-URL: Documentation, https://docs.livekit.io
|
|
6
|
+
Project-URL: Website, https://livekit.io/
|
|
7
|
+
Project-URL: Source, https://github.com/livekit/agents
|
|
8
|
+
Author-email: LiveKit <info@livekit.io>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
Keywords: Hume,HumeAI,Octave,audio,livekit,realtime,webrtc
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
18
|
+
Requires-Python: >=3.9.0
|
|
19
|
+
Requires-Dist: aiohttp>=3.8.0
|
|
20
|
+
Requires-Dist: hume
|
|
21
|
+
Requires-Dist: livekit-agents>=1.0.17
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# LiveKit Plugins Hume AI TTS
|
|
25
|
+
|
|
26
|
+
LiveKit Agents Framework plugin for [Hume](https://www.hume.ai/) Text-to-Speech API.
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install livekit-plugins-hume
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
You will need an API Key from Hume, it can be set as an environment variable: `HUME_API_KEY`. You can get it from [here](https://platform.hume.ai/settings/keys)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# LiveKit Plugins Hume AI TTS
|
|
2
|
+
|
|
3
|
+
LiveKit Agents Framework plugin for [Hume](https://www.hume.ai/) Text-to-Speech API.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install livekit-plugins-hume
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
You will need an API Key from Hume, it can be set as an environment variable: `HUME_API_KEY`. You can get it from [here](https://platform.hume.ai/settings/keys)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Copyright 2023 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
__version__ = "1.0.0"
|
|
18
|
+
|
|
19
|
+
# make imports available
|
|
20
|
+
from hume.tts import (
|
|
21
|
+
Format,
|
|
22
|
+
PostedContext,
|
|
23
|
+
PostedUtterance,
|
|
24
|
+
PostedUtteranceVoiceWithId,
|
|
25
|
+
PostedUtteranceVoiceWithName,
|
|
26
|
+
)
|
|
27
|
+
from livekit.agents import Plugin
|
|
28
|
+
|
|
29
|
+
from .tts import TTS
|
|
30
|
+
|
|
31
|
+
# all exports
|
|
32
|
+
__all__ = [
|
|
33
|
+
"TTS",
|
|
34
|
+
"Format",
|
|
35
|
+
"PostedUtterance",
|
|
36
|
+
"PostedContext",
|
|
37
|
+
"PostedUtteranceVoiceWithName",
|
|
38
|
+
"PostedUtteranceVoiceWithId",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class HumeAIPlugin(Plugin):
|
|
43
|
+
def __init__(self) -> None:
|
|
44
|
+
super().__init__(__name__, __version__, __package__)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
Plugin.register_plugin(HumeAIPlugin())
|
|
48
|
+
|
|
49
|
+
# Cleanup docs of unexported modules
|
|
50
|
+
_module = dir()
|
|
51
|
+
NOT_IN_ALL = [m for m in _module if m not in __all__]
|
|
52
|
+
|
|
53
|
+
__pdoc__ = {}
|
|
54
|
+
|
|
55
|
+
for n in NOT_IN_ALL:
|
|
56
|
+
__pdoc__[n] = False
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
# Copyright 2023 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
import base64
|
|
19
|
+
import os
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
|
|
22
|
+
import aiohttp
|
|
23
|
+
|
|
24
|
+
from hume import AsyncHumeClient
|
|
25
|
+
from hume.tts import Format, FormatWav, PostedContext, PostedUtterance, PostedUtteranceVoiceWithName
|
|
26
|
+
from livekit.agents import (
|
|
27
|
+
APIConnectionError,
|
|
28
|
+
APIConnectOptions,
|
|
29
|
+
APITimeoutError,
|
|
30
|
+
tokenize,
|
|
31
|
+
tts,
|
|
32
|
+
utils,
|
|
33
|
+
)
|
|
34
|
+
from livekit.agents.types import (
|
|
35
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
|
36
|
+
NOT_GIVEN,
|
|
37
|
+
NotGivenOr,
|
|
38
|
+
)
|
|
39
|
+
from livekit.agents.utils import is_given
|
|
40
|
+
|
|
41
|
+
# Default audio settings
|
|
42
|
+
DEFAULT_SAMPLE_RATE = 24000
|
|
43
|
+
DEFAULT_NUM_CHANNELS = 1
|
|
44
|
+
|
|
45
|
+
# Default TTS settings
|
|
46
|
+
DEFAULT_VOICE = PostedUtteranceVoiceWithName(name="Colton Rivers", provider="HUME_AI")
|
|
47
|
+
|
|
48
|
+
# text is required in PostedUtterance but it is declared as an empty string
|
|
49
|
+
# it will be overwritten when input tokens are received
|
|
50
|
+
DEFAULT_UTTERANCE = PostedUtterance(
|
|
51
|
+
voice=DEFAULT_VOICE, speed=1, trailing_silence=0.35, description="", text=""
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class _TTSOptions:
|
|
57
|
+
"""TTS options for Hume API"""
|
|
58
|
+
|
|
59
|
+
api_key: str
|
|
60
|
+
utterance_options: PostedUtterance
|
|
61
|
+
context: PostedContext | None
|
|
62
|
+
format: Format
|
|
63
|
+
sample_rate: int
|
|
64
|
+
split_utterances: bool
|
|
65
|
+
strip_headers: bool
|
|
66
|
+
num_generations: int
|
|
67
|
+
instant_mode: bool
|
|
68
|
+
word_tokenizer: tokenize.WordTokenizer
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class TTS(tts.TTS):
|
|
72
|
+
def __init__(
|
|
73
|
+
self,
|
|
74
|
+
*,
|
|
75
|
+
utterance_options: NotGivenOr[PostedUtterance] = NOT_GIVEN,
|
|
76
|
+
context: NotGivenOr[PostedContext] = NOT_GIVEN,
|
|
77
|
+
format: NotGivenOr[Format] = NOT_GIVEN,
|
|
78
|
+
split_utterances: bool = False,
|
|
79
|
+
num_generations: int = 1,
|
|
80
|
+
instant_mode: bool = False,
|
|
81
|
+
strip_headers: bool = True,
|
|
82
|
+
api_key: NotGivenOr[str] = NOT_GIVEN,
|
|
83
|
+
word_tokenizer: tokenize.WordTokenizer | None = None,
|
|
84
|
+
http_session: aiohttp.ClientSession | None = None,
|
|
85
|
+
sample_rate: int = 24000,
|
|
86
|
+
) -> None:
|
|
87
|
+
"""Initialize the Hume TTS client.
|
|
88
|
+
|
|
89
|
+
See https://dev.hume.ai/reference/text-to-speech-tts/synthesize-json-streaming for API doc
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
utterance_options (NotGivenOr[PostedUtterance]): Default options for utterances,
|
|
93
|
+
including description, voice, and delivery controls.
|
|
94
|
+
context (NotGivenOr[PostedContext]): Utterances to use as context for generating
|
|
95
|
+
consistent speech style and prosody across multiple requests.
|
|
96
|
+
format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
|
|
97
|
+
Defaults to WAV format.
|
|
98
|
+
split_utterances (bool): Controls how audio output is segmented in the response.
|
|
99
|
+
When enabled (True), input utterances are split into natural-sounding segments.
|
|
100
|
+
When disabled (False), maintains one-to-one mapping between input and output.
|
|
101
|
+
Defaults to False.
|
|
102
|
+
num_generations (int): Number of generations of the audio to produce.
|
|
103
|
+
Must be between 1 and 5. Defaults to 1.
|
|
104
|
+
instant_mode (bool): Enables ultra-low latency streaming, reducing time to first chunk.
|
|
105
|
+
Recommended for real-time applications. Only for streaming endpoints.
|
|
106
|
+
With this enabled, requests incur 10% higher cost. Defaults to False.
|
|
107
|
+
strip_headers (bool): If enabled, the audio for all the chunks of a generation.
|
|
108
|
+
Once concatenated together, will constitute a single audio file.
|
|
109
|
+
If disabled, each chunk’s audio will be its own audio file, each with its headers.
|
|
110
|
+
api_key (NotGivenOr[str]): Hume API key for authentication. If not provided,
|
|
111
|
+
will attempt to read from HUME_API_KEY environment variable.
|
|
112
|
+
word_tokenizer (tokenize.WordTokenizer | None): Custom word tokenizer to use for text.
|
|
113
|
+
If None, a basic word tokenizer will be used.
|
|
114
|
+
http_session (aiohttp.ClientSession | None): Optional HTTP session for API requests.
|
|
115
|
+
If None, a new session will be created.
|
|
116
|
+
sample_rate (int): Audio sample rate in Hz. Defaults to 24000.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
super().__init__(
|
|
120
|
+
capabilities=tts.TTSCapabilities(
|
|
121
|
+
streaming=False,
|
|
122
|
+
),
|
|
123
|
+
sample_rate=sample_rate,
|
|
124
|
+
num_channels=DEFAULT_NUM_CHANNELS,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
self._api_key = api_key if is_given(api_key) else os.environ.get("HUME_API_KEY")
|
|
128
|
+
if not self._api_key:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
"Hume API key is required, either as argument or set HUME_API_KEY env variable"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
if not word_tokenizer:
|
|
134
|
+
word_tokenizer = tokenize.basic.WordTokenizer(ignore_punctuation=False)
|
|
135
|
+
|
|
136
|
+
self._opts = _TTSOptions(
|
|
137
|
+
utterance_options=utterance_options
|
|
138
|
+
if is_given(utterance_options)
|
|
139
|
+
else DEFAULT_UTTERANCE,
|
|
140
|
+
context=context if is_given(context) else None,
|
|
141
|
+
format=format if is_given(format) else FormatWav(),
|
|
142
|
+
api_key=self._api_key,
|
|
143
|
+
sample_rate=self.sample_rate,
|
|
144
|
+
split_utterances=split_utterances,
|
|
145
|
+
num_generations=num_generations,
|
|
146
|
+
strip_headers=strip_headers,
|
|
147
|
+
instant_mode=instant_mode,
|
|
148
|
+
word_tokenizer=word_tokenizer,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
self._client = AsyncHumeClient(api_key=self._api_key)
|
|
152
|
+
self._session = http_session
|
|
153
|
+
|
|
154
|
+
def _ensure_session(self) -> aiohttp.ClientSession:
|
|
155
|
+
if not self._session:
|
|
156
|
+
self._session = utils.http_context.http_session()
|
|
157
|
+
return self._session
|
|
158
|
+
|
|
159
|
+
def update_options(
|
|
160
|
+
self,
|
|
161
|
+
*,
|
|
162
|
+
utterance_options: NotGivenOr[PostedUtterance] = NOT_GIVEN,
|
|
163
|
+
context: NotGivenOr[PostedContext] = NOT_GIVEN,
|
|
164
|
+
format: NotGivenOr[Format] = NOT_GIVEN,
|
|
165
|
+
split_utterances: NotGivenOr[bool] = NOT_GIVEN,
|
|
166
|
+
num_generations: NotGivenOr[int] = NOT_GIVEN,
|
|
167
|
+
instant_mode: NotGivenOr[bool] = NOT_GIVEN,
|
|
168
|
+
strip_headers: NotGivenOr[bool] = NOT_GIVEN,
|
|
169
|
+
) -> None:
|
|
170
|
+
"""Update TTS options for synthesizing speech.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
utterance_options (NotGivenOr[PostedUtterance]): Options for utterances,
|
|
174
|
+
including text, description, voice, and additional controls.
|
|
175
|
+
context (Optional[PostedContext]): Utterances to use as context for generating
|
|
176
|
+
consistent speech style and prosody across multiple requests.
|
|
177
|
+
format (NotGivenOr[Format]): Specifies the output audio file format (WAV, MP3 or PCM).
|
|
178
|
+
split_utterances (NotGivenOr[bool]): Controls how audio output is segmented.
|
|
179
|
+
When True, utterances are split into natural-sounding segments.
|
|
180
|
+
When False, maintains one-to-one mapping between input and output.
|
|
181
|
+
num_generations (NotGivenOr[int]): Number of speech generations to produce (1-5).
|
|
182
|
+
instant_mode (NotGivenOr[bool]): Enables ultra-low latency streaming.
|
|
183
|
+
Reduces time to first audio chunk, recommended for real-time applications.
|
|
184
|
+
Note: Incurs 10% higher cost when enabled.
|
|
185
|
+
strip_headers (NotGivenOr[bool]): If enabled, the audio for the chunks of a generation.
|
|
186
|
+
Once concatenated together, will constitute a single audio file.
|
|
187
|
+
If disabled, each chunk’s audio will be its own audio file, each with its headers.
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
if is_given(utterance_options):
|
|
191
|
+
# text is required in PostedUtterance but it is declared as an empty string
|
|
192
|
+
# it will be overwritten when input tokens are received
|
|
193
|
+
self._opts.utterance_options = PostedUtterance(
|
|
194
|
+
description=utterance_options.description if utterance_options.description else "",
|
|
195
|
+
voice=utterance_options.voice if utterance_options.voice else DEFAULT_VOICE,
|
|
196
|
+
speed=utterance_options.speed if utterance_options.speed else 1,
|
|
197
|
+
trailing_silence=utterance_options.trailing_silence
|
|
198
|
+
if utterance_options.trailing_silence
|
|
199
|
+
else 0.35,
|
|
200
|
+
text="",
|
|
201
|
+
)
|
|
202
|
+
if is_given(format):
|
|
203
|
+
self._opts.format = format
|
|
204
|
+
if is_given(context):
|
|
205
|
+
self._opts.context = context
|
|
206
|
+
if is_given(split_utterances):
|
|
207
|
+
self._opts.split_utterances = split_utterances
|
|
208
|
+
if is_given(num_generations):
|
|
209
|
+
self._opts.num_generations = num_generations
|
|
210
|
+
if is_given(instant_mode):
|
|
211
|
+
self._opts.instant_mode = instant_mode
|
|
212
|
+
if is_given(strip_headers):
|
|
213
|
+
self._opts.strip_headers = strip_headers
|
|
214
|
+
|
|
215
|
+
def synthesize(
|
|
216
|
+
self,
|
|
217
|
+
text: str,
|
|
218
|
+
*,
|
|
219
|
+
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
|
|
220
|
+
) -> ChunkedStream:
|
|
221
|
+
return ChunkedStream(
|
|
222
|
+
tts=self,
|
|
223
|
+
input_text=text,
|
|
224
|
+
conn_options=conn_options,
|
|
225
|
+
opts=self._opts,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class ChunkedStream(tts.ChunkedStream):
|
|
230
|
+
"""Stream for Hume TTS JSON streaming API."""
|
|
231
|
+
|
|
232
|
+
def __init__(
|
|
233
|
+
self,
|
|
234
|
+
*,
|
|
235
|
+
tts: TTS,
|
|
236
|
+
input_text: str,
|
|
237
|
+
opts: _TTSOptions,
|
|
238
|
+
conn_options: APIConnectOptions,
|
|
239
|
+
) -> None:
|
|
240
|
+
super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
|
|
241
|
+
self._opts = opts
|
|
242
|
+
self._client = tts._client
|
|
243
|
+
|
|
244
|
+
async def _run(self) -> None:
|
|
245
|
+
request_id = utils.shortuuid()
|
|
246
|
+
|
|
247
|
+
decoder = utils.codecs.AudioStreamDecoder(
|
|
248
|
+
sample_rate=self._opts.sample_rate,
|
|
249
|
+
num_channels=DEFAULT_NUM_CHANNELS,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
decode_task: asyncio.Task | None = None
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
|
|
256
|
+
async def _decode_loop():
|
|
257
|
+
try:
|
|
258
|
+
async for chunk in self._client.tts.synthesize_json_streaming(
|
|
259
|
+
utterances=[
|
|
260
|
+
PostedUtterance(
|
|
261
|
+
text=self._input_text,
|
|
262
|
+
description=self._opts.utterance_options.description,
|
|
263
|
+
voice=self._opts.utterance_options.voice,
|
|
264
|
+
speed=self._opts.utterance_options.speed,
|
|
265
|
+
trailing_silence=self._opts.utterance_options.trailing_silence,
|
|
266
|
+
)
|
|
267
|
+
],
|
|
268
|
+
context=self._opts.context,
|
|
269
|
+
format=self._opts.format,
|
|
270
|
+
num_generations=self._opts.num_generations,
|
|
271
|
+
split_utterances=self._opts.split_utterances,
|
|
272
|
+
instant_mode=self._opts.instant_mode,
|
|
273
|
+
strip_headers=self._opts.strip_headers,
|
|
274
|
+
):
|
|
275
|
+
decoder.push(base64.b64decode(chunk.audio))
|
|
276
|
+
|
|
277
|
+
finally:
|
|
278
|
+
decoder.end_input()
|
|
279
|
+
|
|
280
|
+
decode_task = asyncio.create_task(_decode_loop())
|
|
281
|
+
emitter = tts.SynthesizedAudioEmitter(
|
|
282
|
+
event_ch=self._event_ch,
|
|
283
|
+
request_id=request_id,
|
|
284
|
+
)
|
|
285
|
+
async for frame in decoder:
|
|
286
|
+
emitter.push(frame)
|
|
287
|
+
|
|
288
|
+
emitter.flush()
|
|
289
|
+
|
|
290
|
+
except asyncio.TimeoutError:
|
|
291
|
+
raise APITimeoutError() from None
|
|
292
|
+
except Exception as e:
|
|
293
|
+
raise APIConnectionError() from e
|
|
294
|
+
finally:
|
|
295
|
+
if decode_task:
|
|
296
|
+
await utils.aio.gracefully_cancel(decode_task)
|
|
297
|
+
await decoder.aclose()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 2024 LiveKit, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
__version__ = "1.0.17"
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "livekit-plugins-hume"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
description = "Hume TTS plugin for LiveKit agents"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
requires-python = ">=3.9.0"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "LiveKit", email = "info@livekit.io"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["webrtc", "realtime", "audio", "livekit", "HumeAI", "Hume", "Octave"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Topic :: Multimedia :: Sound/Audio",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"aiohttp>=3.8.0",
|
|
27
|
+
"livekit-agents>=1.0.17",
|
|
28
|
+
"hume"
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Documentation = "https://docs.livekit.io"
|
|
33
|
+
Website = "https://livekit.io/"
|
|
34
|
+
Source = "https://github.com/livekit/agents"
|
|
35
|
+
|
|
36
|
+
[tool.hatch.version]
|
|
37
|
+
path = "livekit/plugins/hume/version.py"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["livekit"]
|
|
41
|
+
|
|
42
|
+
[tool.hatch.build.targets.sdist]
|
|
43
|
+
include = ["/livekit"]
|