livekit-plugins-upliftai 1.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,175 @@
1
+ **/.vscode
2
+ **/.DS_Store
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/#use-with-ide
113
+ .pdm.toml
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # trunk
150
+ .trunk/
151
+
152
+ # Pyre type checker
153
+ .pyre/
154
+
155
+ # pytype static type analyzer
156
+ .pytype/
157
+
158
+ # Cython debug symbols
159
+ cython_debug/
160
+
161
+ # PyCharm
162
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
165
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
166
+ .idea/
167
+
168
+ node_modules
169
+
170
+ credentials.json
171
+ pyrightconfig.json
172
+ docs/
173
+
174
+ # Database files
175
+ *.db
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.4
2
+ Name: livekit-plugins-upliftai
3
+ Version: 1.2.7
4
+ Summary: Agent Framework plugin for speech synthesis with the Uplift AI.
5
+ Project-URL: Documentation, https://docs.livekit.io
6
+ Project-URL: Website, https://livekit.io/
7
+ Project-URL: Source, https://github.com/livekit/agents
8
+ Author-email: LiveKit <hello@livekit.io>
9
+ License-Expression: Apache-2.0
10
+ Keywords: audio,livekit,multilingual,realtime,tts,upliftai,urdu,video,webrtc
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Topic :: Multimedia :: Sound/Audio
18
+ Classifier: Topic :: Multimedia :: Video
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.9.0
21
+ Requires-Dist: livekit-agents[codecs]>=1.2.7
22
+ Requires-Dist: numpy>=1.26
23
+ Description-Content-Type: text/markdown
24
+
25
+ # Uplift AI plugin for LiveKit Agents
26
+
27
+ Support for voice synthesis with [Uplift AI](https://upliftai.org) for underserved languages.
28
+
29
+ See [https://docs.upliftai.org/orator_voices](https://docs.upliftai.org/orator_voices) for supported voices and languages.
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install livekit-plugins-upliftai
35
+ ```
36
+
37
+ ## Pre-requisites
38
+
39
+ You'll need an API key from Uplift AI. It can be set as an environment variable: `UPLIFTAI_API_KEY`. You can get your API key by signing up at [https://upliftai.org](https://upliftai.org).
40
+
41
+
42
+ ## Tutorial
43
+
44
+ Follow along at [https://docs.upliftai.org/tutorials/livekit-voice-agent](https://docs.upliftai.org/tutorials/livekit-voice-agent) where we build a voice agent using LiveKit and Uplift AI.
@@ -0,0 +1,20 @@
1
+ # Uplift AI plugin for LiveKit Agents
2
+
3
+ Support for voice synthesis with [Uplift AI](https://upliftai.org) for underserved languages.
4
+
5
+ See [https://docs.upliftai.org/orator_voices](https://docs.upliftai.org/orator_voices) for supported voices and languages.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install livekit-plugins-upliftai
11
+ ```
12
+
13
+ ## Pre-requisites
14
+
15
+ You'll need an API key from Uplift AI. It can be set as an environment variable: `UPLIFTAI_API_KEY`. You can get your API key by signing up at [https://upliftai.org](https://upliftai.org).
16
+
17
+
18
+ ## Tutorial
19
+
20
+ Follow along at [https://docs.upliftai.org/tutorials/livekit-voice-agent](https://docs.upliftai.org/tutorials/livekit-voice-agent) where we build a voice agent using LiveKit and Uplift AI.
@@ -0,0 +1,49 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Uplift AI plugin for LiveKit Agents
16
+
17
+ See https://docs.livekit.io/agents/integrations/tts/upliftai/ for more information.
18
+ """
19
+
20
+ from .tts import DEFAULT_VOICE_ID, TTS, OutputFormat
21
+ from .version import __version__
22
+
23
+ __all__ = [
24
+ "TTS",
25
+ "OutputFormat",
26
+ "DEFAULT_VOICE_ID",
27
+ "__version__",
28
+ ]
29
+
30
+ from livekit.agents import Plugin
31
+
32
+ from .log import logger
33
+
34
+
35
+ class UpliftAIPlugin(Plugin):
36
+ def __init__(self) -> None:
37
+ super().__init__(__name__, __version__, __package__, logger)
38
+
39
+
40
+ Plugin.register_plugin(UpliftAIPlugin())
41
+
42
+ # Cleanup docs of unexported modules
43
+ _module = dir()
44
+ NOT_IN_ALL = [m for m in _module if m not in __all__]
45
+
46
+ __pdoc__ = {}
47
+
48
+ for n in NOT_IN_ALL:
49
+ __pdoc__[n] = False
@@ -0,0 +1,3 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("livekit.plugins.upliftai")
@@ -0,0 +1,515 @@
1
+ """
2
+ Uplift TTS Plugin for LiveKit, this will soon be available as a python lib
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import asyncio
8
+ import base64
9
+ import os
10
+ import time
11
+ import uuid
12
+ import weakref
13
+ from dataclasses import dataclass
14
+ from typing import Literal
15
+
16
+ import socketio
17
+
18
+ from livekit.agents import (
19
+ APIConnectionError,
20
+ APIConnectOptions,
21
+ APIError,
22
+ APITimeoutError,
23
+ tokenize,
24
+ tts,
25
+ utils,
26
+ )
27
+ from livekit.agents.types import DEFAULT_API_CONNECT_OPTIONS, NOT_GIVEN, NotGivenOr
28
+ from livekit.agents.utils import is_given
29
+
30
+ from .log import logger
31
+
32
+ # Output format options
33
+ OutputFormat = Literal[
34
+ "PCM_22050_16",
35
+ "WAV_22050_16",
36
+ "WAV_22050_32",
37
+ "MP3_22050_32",
38
+ "MP3_22050_64",
39
+ "MP3_22050_128",
40
+ "OGG_22050_16",
41
+ "ULAW_8000_8",
42
+ ]
43
+
44
+ # Default configuration
45
+ DEFAULT_BASE_URL = "wss://api.upliftai.org"
46
+ DEFAULT_SAMPLE_RATE = 22050
47
+ DEFAULT_NUM_CHANNELS = 1
48
+ DEFAULT_VOICE_ID = "v_meklc281"
49
+ DEFAULT_OUTPUT_FORMAT: OutputFormat = "MP3_22050_32"
50
+ WEBSOCKET_NAMESPACE = "/text-to-speech/multi-stream"
51
+
52
+
53
+ def get_content_type_from_output_format(output_format: OutputFormat):
54
+ """Get MIME type based on output format"""
55
+ if output_format == "PCM_22050_16":
56
+ return "audio/pcm"
57
+ elif output_format == "WAV_22050_16":
58
+ return "audio/wav"
59
+ elif output_format == "WAV_22050_32":
60
+ return "audio/wav"
61
+ elif output_format.startswith("MP3"):
62
+ return "audio/mpeg"
63
+ elif output_format.startswith("OGG"):
64
+ return "audio/ogg"
65
+ elif output_format == "ULAW_8000_8":
66
+ return "audio/x-mulaw"
67
+ else:
68
+ raise ValueError(f"Unsupported output format: {output_format}")
69
+
70
+
71
+ @dataclass
72
+ class VoiceSettings:
73
+ """Voice configuration settings"""
74
+
75
+ voice_id: str = DEFAULT_VOICE_ID
76
+ output_format: OutputFormat = DEFAULT_OUTPUT_FORMAT
77
+
78
+
79
+ @dataclass
80
+ class _TTSOptions:
81
+ """Internal TTS options"""
82
+
83
+ base_url: str
84
+ api_key: str
85
+ voice_settings: VoiceSettings
86
+ word_tokenizer: tokenize.WordTokenizer | tokenize.SentenceTokenizer
87
+ sample_rate: int
88
+ num_channels: int
89
+
90
+
91
+ class TTS(tts.TTS):
92
+ """Uplift TTS implementation for LiveKit"""
93
+
94
+ def __init__(
95
+ self,
96
+ *,
97
+ base_url: NotGivenOr[str] = NOT_GIVEN,
98
+ api_key: NotGivenOr[str] = NOT_GIVEN,
99
+ voice_id: str = DEFAULT_VOICE_ID,
100
+ output_format: OutputFormat = DEFAULT_OUTPUT_FORMAT,
101
+ num_channels: int = DEFAULT_NUM_CHANNELS,
102
+ word_tokenizer: NotGivenOr[tokenize.WordTokenizer | tokenize.SentenceTokenizer] = NOT_GIVEN,
103
+ ) -> None:
104
+ """
105
+ Create a new instance of Uplift TTS.
106
+
107
+ Args:
108
+ base_url: Base URL for TTS service. Defaults to wss://api.upliftai.org
109
+ api_key: API key for authentication
110
+ voice_id: Voice ID to use. Defaults to "17"
111
+ output_format: Audio output format. Options:
112
+ - 'PCM_22050_16': PCM format, 22.05kHz, 16-bit
113
+ - 'WAV_22050_16': WAV format, 22.05kHz, 16-bit
114
+ - 'WAV_22050_32': WAV format, 22.05kHz, 32-bit
115
+ - 'MP3_22050_32': MP3 format, 22.05kHz, 32kbps (default)
116
+ - 'MP3_22050_64': MP3 format, 22.05kHz, 64kbps
117
+ - 'MP3_22050_128': MP3 format, 22.05kHz, 128kbps
118
+ - 'OGG_22050_16': OGG format, 22.05kHz, 16-bit
119
+ - 'ULAW_8000_8': μ-law format, 8kHz, 8-bit
120
+ sample_rate: Sample rate for audio output. Defaults to 22050
121
+ num_channels: Number of audio channels. Defaults to 1 (mono)
122
+ word_tokenizer: Tokenizer for processing text
123
+ """
124
+ super().__init__(
125
+ capabilities=tts.TTSCapabilities(
126
+ streaming=True,
127
+ aligned_transcript=False,
128
+ ),
129
+ sample_rate=DEFAULT_SAMPLE_RATE,
130
+ num_channels=num_channels,
131
+ )
132
+
133
+ # Get configuration from environment if not provided
134
+ base_url = (
135
+ base_url
136
+ if is_given(base_url)
137
+ else os.environ.get("UPLIFTAI_BASE_URL", DEFAULT_BASE_URL)
138
+ )
139
+ api_key = api_key if is_given(api_key) else os.environ.get("UPLIFTAI_API_KEY")
140
+
141
+ if not api_key:
142
+ raise ValueError(
143
+ "API key is required, either as argument or set UPLIFTAI_API_KEY environment variable"
144
+ )
145
+
146
+ # Use provided tokenizer or create default
147
+ if not is_given(word_tokenizer):
148
+ word_tokenizer = tokenize.basic.WordTokenizer(ignore_punctuation=False)
149
+
150
+ self._opts = _TTSOptions(
151
+ base_url=base_url,
152
+ api_key=api_key,
153
+ voice_settings=VoiceSettings(voice_id=voice_id, output_format=output_format),
154
+ word_tokenizer=word_tokenizer,
155
+ sample_rate=DEFAULT_SAMPLE_RATE,
156
+ num_channels=num_channels,
157
+ )
158
+
159
+ self._client: WebSocketClient | None = None
160
+ self._streams = weakref.WeakSet[SynthesizeStream]()
161
+
162
+ def update_options(
163
+ self,
164
+ *,
165
+ voice_id: NotGivenOr[str] = NOT_GIVEN,
166
+ output_format: NotGivenOr[OutputFormat] = NOT_GIVEN,
167
+ ) -> None:
168
+ """
169
+ Update TTS configuration options.
170
+
171
+ Args:
172
+ voice_id: New voice ID
173
+ output_format: New output format (see __init__ for options)
174
+ """
175
+ if is_given(voice_id):
176
+ self._opts.voice_settings.voice_id = voice_id
177
+ if is_given(output_format):
178
+ self._opts.voice_settings.output_format = output_format
179
+
180
+ def synthesize(
181
+ self, text: str, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
182
+ ) -> ChunkedStream:
183
+ """Synthesize text to speech using chunked stream."""
184
+ return ChunkedStream(tts=self, input_text=text, conn_options=conn_options)
185
+
186
+ def stream(
187
+ self, *, conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS
188
+ ) -> SynthesizeStream:
189
+ """Create a streaming synthesis session."""
190
+ stream = SynthesizeStream(tts=self, conn_options=conn_options)
191
+ self._streams.add(stream)
192
+ return stream
193
+
194
+ async def aclose(self) -> None:
195
+ """Clean up resources"""
196
+ for stream in list(self._streams):
197
+ await stream.aclose()
198
+
199
+ self._streams.clear()
200
+
201
+ if self._client:
202
+ await self._client.disconnect()
203
+ self._client = None
204
+
205
+
206
+ class WebSocketClient:
207
+ """Manages WebSocket connection to TTS service"""
208
+
209
+ def __init__(self, opts: _TTSOptions):
210
+ self.opts = opts
211
+ self.sio: socketio.AsyncClient | None = None
212
+ self.connected = False
213
+ self.audio_callbacks: dict[str, asyncio.Queue] = {}
214
+ self.active_requests: dict[str, bool] = {}
215
+
216
+ async def connect(self) -> bool:
217
+ """Establish WebSocket connection"""
218
+ if self.connected:
219
+ return True
220
+
221
+ try:
222
+ self.sio = socketio.AsyncClient(
223
+ reconnection=True,
224
+ reconnection_attempts=3,
225
+ reconnection_delay=1,
226
+ logger=False,
227
+ engineio_logger=False,
228
+ )
229
+
230
+ # Register handlers
231
+ self.sio.on("message", self._on_message, namespace=WEBSOCKET_NAMESPACE)
232
+ self.sio.on("connect", self._on_connect, namespace=WEBSOCKET_NAMESPACE)
233
+ self.sio.on("disconnect", self._on_disconnect, namespace=WEBSOCKET_NAMESPACE)
234
+
235
+ # Prepare auth
236
+ auth_data = {"token": self.opts.api_key}
237
+
238
+ # Connect
239
+ await self.sio.connect(
240
+ self.opts.base_url,
241
+ auth=auth_data,
242
+ namespaces=[WEBSOCKET_NAMESPACE],
243
+ transports=["websocket"],
244
+ wait_timeout=10,
245
+ )
246
+
247
+ # Wait for connection
248
+ max_wait = 5.0
249
+ start_time = time.time()
250
+ while not self.connected and (time.time() - start_time) < max_wait:
251
+ await asyncio.sleep(0.1)
252
+
253
+ if not self.connected and self.sio.connected:
254
+ self.connected = True
255
+
256
+ return self.connected
257
+
258
+ except Exception as e:
259
+ logger.error(f"Connection failed: {e}")
260
+ return False
261
+
262
+ async def synthesize(self, text: str, request_id: str | None = None) -> asyncio.Queue:
263
+ """Send synthesis request and return audio queue"""
264
+ if not self.sio or not self.connected:
265
+ if not await self.connect():
266
+ raise ConnectionError("Failed to connect to TTS service")
267
+
268
+ if not request_id:
269
+ request_id = str(uuid.uuid4())
270
+
271
+ # Create audio queue
272
+ audio_queue = asyncio.Queue()
273
+ self.audio_callbacks[request_id] = audio_queue
274
+ self.active_requests[request_id] = True
275
+
276
+ # Build message
277
+ message = {
278
+ "type": "synthesize",
279
+ "requestId": request_id,
280
+ "text": text,
281
+ "voiceId": self.opts.voice_settings.voice_id,
282
+ "outputFormat": self.opts.voice_settings.output_format,
283
+ }
284
+
285
+ logger.debug(f"Sending synthesis request {request_id[:8]} for text: '{text[:50]}...'")
286
+
287
+ try:
288
+ await self.sio.emit("synthesize", message, namespace=WEBSOCKET_NAMESPACE)
289
+ except Exception as e:
290
+ logger.error(f"Failed to emit synthesis: {e}")
291
+ del self.audio_callbacks[request_id]
292
+ del self.active_requests[request_id]
293
+ raise
294
+
295
+ return audio_queue
296
+
297
+ async def disconnect(self):
298
+ """Disconnect from service"""
299
+ if self.sio and self.connected:
300
+ await self.sio.disconnect()
301
+ self.connected = False
302
+
303
+ async def _on_connect(self):
304
+ """Handle connection"""
305
+ logger.debug("WebSocket connected")
306
+
307
+ async def _on_message(self, data):
308
+ """Handle messages"""
309
+ message_type = data.get("type")
310
+
311
+ if message_type == "ready":
312
+ self.connected = True
313
+ logger.debug(f"Ready with session: {data.get('sessionId')}")
314
+
315
+ elif message_type == "audio":
316
+ request_id = data.get("requestId")
317
+ audio_b64 = data.get("audio")
318
+
319
+ if audio_b64 and request_id in self.audio_callbacks:
320
+ audio_bytes = base64.b64decode(audio_b64)
321
+ if self.active_requests.get(request_id, False):
322
+ await self.audio_callbacks[request_id].put(audio_bytes)
323
+
324
+ elif message_type == "audio_end":
325
+ request_id = data.get("requestId")
326
+ if request_id in self.audio_callbacks:
327
+ await self.audio_callbacks[request_id].put(None)
328
+ del self.audio_callbacks[request_id]
329
+ if request_id in self.active_requests:
330
+ del self.active_requests[request_id]
331
+
332
+ elif message_type == "error":
333
+ request_id = data.get("requestId", "unknown")
334
+ error_msg = data.get("message", str(data))
335
+ logger.error(f"Error for {request_id}: {error_msg}")
336
+
337
+ if request_id in self.audio_callbacks:
338
+ await self.audio_callbacks[request_id].put(None)
339
+ del self.audio_callbacks[request_id]
340
+ if request_id in self.active_requests:
341
+ del self.active_requests[request_id]
342
+
343
+ async def _on_disconnect(self):
344
+ """Handle disconnection"""
345
+ self.connected = False
346
+ for queue in self.audio_callbacks.values():
347
+ await queue.put(None)
348
+ self.audio_callbacks.clear()
349
+ self.active_requests.clear()
350
+
351
+
352
+ class ChunkedStream(tts.ChunkedStream):
353
+ """Chunked synthesis implementation"""
354
+
355
+ def __init__(self, *, tts: TTS, input_text: str, conn_options: APIConnectOptions) -> None:
356
+ super().__init__(tts=tts, input_text=input_text, conn_options=conn_options)
357
+ self._tts: TTS = tts
358
+
359
+ async def _run(self, output_emitter: tts.AudioEmitter) -> None:
360
+ """Execute synthesis"""
361
+ request_id = utils.shortuuid()
362
+
363
+ try:
364
+ # Initialize emitter
365
+ output_emitter.initialize(
366
+ request_id=request_id,
367
+ sample_rate=self._tts._opts.sample_rate,
368
+ num_channels=self._tts._opts.num_channels,
369
+ mime_type=get_content_type_from_output_format(
370
+ self._tts._opts.voice_settings.output_format
371
+ ),
372
+ )
373
+
374
+ # Create client if needed
375
+ if not self._tts._client:
376
+ self._tts._client = WebSocketClient(self._tts._opts)
377
+
378
+ # Get audio queue
379
+ audio_queue = await self._tts._client.synthesize(self._input_text, request_id)
380
+
381
+ # Stream audio
382
+ while True:
383
+ try:
384
+ audio_data = await asyncio.wait_for(audio_queue.get(), timeout=30.0)
385
+
386
+ if audio_data is None:
387
+ break
388
+
389
+ output_emitter.push(audio_data)
390
+
391
+ except asyncio.TimeoutError:
392
+ logger.warning("Audio timeout")
393
+ break
394
+
395
+ output_emitter.flush()
396
+
397
+ except asyncio.TimeoutError as e:
398
+ raise APITimeoutError() from e
399
+ except Exception as e:
400
+ raise APIConnectionError(f"TTS synthesis failed: {str(e)}") from e
401
+
402
+
403
+ class SynthesizeStream(tts.SynthesizeStream):
404
+ """Streaming synthesis implementation"""
405
+
406
+ def __init__(self, *, tts: TTS, conn_options: APIConnectOptions):
407
+ super().__init__(tts=tts, conn_options=conn_options)
408
+ self._tts: TTS = tts
409
+ self._segments_ch = utils.aio.Chan[tokenize.WordStream | tokenize.SentenceStream]()
410
+
411
+ async def _run(self, output_emitter: tts.AudioEmitter) -> None:
412
+ """Execute streaming synthesis"""
413
+ request_id = utils.shortuuid()
414
+
415
+ output_emitter.initialize(
416
+ request_id=request_id,
417
+ sample_rate=self._tts._opts.sample_rate,
418
+ num_channels=self._tts._opts.num_channels,
419
+ stream=True,
420
+ mime_type=get_content_type_from_output_format(
421
+ self._tts._opts.voice_settings.output_format
422
+ ),
423
+ )
424
+
425
+ async def _tokenize_input() -> None:
426
+ """Tokenize input text"""
427
+ word_stream = None
428
+ async for input in self._input_ch:
429
+ if isinstance(input, str):
430
+ if word_stream is None:
431
+ word_stream = self._tts._opts.word_tokenizer.stream()
432
+ self._segments_ch.send_nowait(word_stream)
433
+
434
+ word_stream.push_text(input)
435
+ elif isinstance(input, self._FlushSentinel):
436
+ if word_stream is not None:
437
+ word_stream.end_input()
438
+ word_stream = None
439
+
440
+ if word_stream is not None:
441
+ word_stream.end_input()
442
+
443
+ self._segments_ch.close()
444
+
445
+ async def _process_segments() -> None:
446
+ """Process segments"""
447
+ async for word_stream in self._segments_ch:
448
+ await self._run_segment(word_stream, output_emitter)
449
+
450
+ tasks = [
451
+ asyncio.create_task(_tokenize_input()),
452
+ asyncio.create_task(_process_segments()),
453
+ ]
454
+
455
+ try:
456
+ await asyncio.gather(*tasks)
457
+ except asyncio.TimeoutError:
458
+ raise APITimeoutError() from None
459
+ except Exception as e:
460
+ raise APIConnectionError() from e
461
+ finally:
462
+ await utils.aio.gracefully_cancel(*tasks)
463
+
464
+ async def _run_segment(
465
+ self,
466
+ word_stream: tokenize.WordStream | tokenize.SentenceStream,
467
+ output_emitter: tts.AudioEmitter,
468
+ ) -> None:
469
+ """Process a single segment"""
470
+ segment_id = utils.shortuuid()
471
+ output_emitter.start_segment(segment_id=segment_id)
472
+
473
+ try:
474
+ # Create client if needed
475
+ if not self._tts._client:
476
+ self._tts._client = WebSocketClient(self._tts._opts)
477
+
478
+ # Collect text
479
+ text_parts = []
480
+ async for data in word_stream:
481
+ text_parts.append(data.token)
482
+
483
+ if not text_parts:
484
+ return
485
+
486
+ # Format text
487
+ if isinstance(self._tts._opts.word_tokenizer, tokenize.WordTokenizer):
488
+ full_text = self._tts._opts.word_tokenizer.format_words(text_parts)
489
+ else:
490
+ full_text = " ".join(text_parts)
491
+
492
+ self._mark_started()
493
+
494
+ # Synthesize
495
+ request_id = str(uuid.uuid4())
496
+ audio_queue = await self._tts._client.synthesize(full_text, request_id)
497
+
498
+ # Stream audio
499
+ while True:
500
+ try:
501
+ audio_data = await asyncio.wait_for(audio_queue.get(), timeout=30.0)
502
+
503
+ if audio_data is None:
504
+ break
505
+
506
+ output_emitter.push(audio_data)
507
+
508
+ except asyncio.TimeoutError:
509
+ break
510
+
511
+ output_emitter.end_input()
512
+
513
+ except Exception as e:
514
+ logger.error(f"Segment synthesis error: {e}")
515
+ raise APIError(f"Segment synthesis failed: {str(e)}") from e
@@ -0,0 +1,15 @@
1
+ # Copyright 2025 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ __version__ = "1.2.7"
@@ -0,0 +1,39 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "livekit-plugins-upliftai"
7
+ dynamic = ["version"]
8
+ description = "Agent Framework plugin for speech synthesis with the Uplift AI."
9
+ readme = "README.md"
10
+ license = "Apache-2.0"
11
+ requires-python = ">=3.9.0"
12
+ authors = [{ name = "LiveKit", email = "hello@livekit.io" }]
13
+ keywords = ["webrtc", "realtime", "audio", "video", "livekit", "upliftai", "multilingual", "tts", "urdu"]
14
+ classifiers = [
15
+ "Intended Audience :: Developers",
16
+ "License :: OSI Approved :: Apache Software License",
17
+ "Topic :: Multimedia :: Sound/Audio",
18
+ "Topic :: Multimedia :: Video",
19
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3 :: Only",
24
+ ]
25
+ dependencies = ["livekit-agents[codecs]>=1.2.7", "numpy>=1.26"]
26
+
27
+ [project.urls]
28
+ Documentation = "https://docs.livekit.io"
29
+ Website = "https://livekit.io/"
30
+ Source = "https://github.com/livekit/agents"
31
+
32
+ [tool.hatch.version]
33
+ path = "livekit/plugins/upliftai/version.py"
34
+
35
+ [tool.hatch.build.targets.wheel]
36
+ packages = ["livekit"]
37
+
38
+ [tool.hatch.build.targets.sdist]
39
+ include = ["/livekit"]