livekit-plugins-cartesia 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .tts import TTS, ChunkedStream
16
+ from .version import __version__
17
+
18
+ __all__ = ["TTS", "ChunkedStream", "__version__"]
19
+
20
+ from livekit.agents import Plugin
21
+
22
+
23
+ class CartesiaPlugin(Plugin):
24
+ def __init__(self):
25
+ super().__init__(__name__, __version__, __package__)
26
+
27
+ def download_files(self):
28
+ pass
29
+
30
+
31
+ Plugin.register_plugin(CartesiaPlugin())
@@ -0,0 +1,3 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("livekit.plugins.cartesia")
@@ -0,0 +1,47 @@
1
+ from typing import Literal
2
+
3
+ TTSEncoding = Literal[
4
+ "pcm_s16le",
5
+ # Not yet supported
6
+ # "pcm_f32le",
7
+ # "pcm_mulaw",
8
+ # "pcm_alaw",
9
+ ]
10
+
11
+
12
+ TTSModels = Literal["upbeat-moon"]
13
+
14
+
15
+ # fmt: off
16
+ # Barbershop Man in upbeat-moon
17
+ TTSDefaultVoiceEmbedding: list[float] = [
18
+ -0.033633083, 0.072083704, -0.01807767, -0.083488315, -0.04407617, 0.0022592682, 0.070505895,
19
+ 0.023946615, -0.04788024, -0.06388413, -0.0716355, -0.0022612812, -0.0053448505, -0.07848381,
20
+ 0.0348162, -0.053745482, -0.092399485, -0.02950225, 0.028591828, -0.10556894, 0.023313355,
21
+ 0.06224387, 0.0362463, 0.029258432, 0.10769641, 0.043595582, -0.058543224, -0.080402784,
22
+ -0.0953816, -0.008988032, -0.0028981369, -0.004752721, -0.20742874, 0.058907595, 0.08813939,
23
+ -0.06192675, 0.099082634, -0.09661578, -0.0077761724, -0.013982456, -0.025798267, 0.04467142,
24
+ 0.026222011, 0.023023574, 0.011227064, -0.17462021, -0.09880612, -0.1521035, -0.060464993,
25
+ -0.04735665, -0.09725187, -0.006127679, 0.15818526, -0.039493002, -0.067719474, 0.0066190436,
26
+ -0.10636633, 0.17073768, -0.051717706, 0.03186961, -0.020547207, -0.02244247, 0.013196935,
27
+ -0.06431055, -0.115360335, 0.016918058, -0.033195216, 0.11255181, 0.020366343, -0.041032124,
28
+ 0.08780918, -0.040567942, 0.057276532, 0.05848221, -0.077479474, -0.073524915, -0.01913317,
29
+ -0.029291833, 0.11210393, -0.09859328, 0.2152541, -0.022976823, 0.028627992, -0.039598297,
30
+ 0.041829932, -0.05593181, -0.06444655, -0.018057477, -0.008098263, 0.05994528, 0.10430693,
31
+ -0.13121894, -0.06512868, -0.026126215, 0.046727825, -0.17180993, -0.10577226, -0.08610466,
32
+ 0.008862588, 0.09547498, -0.010965332, -0.061217085, -0.038954042, 0.019930292, -0.017192135,
33
+ 0.007296275, 0.03273872, 0.04389937, -0.056483064, 0.003420891, -0.10319067, -0.015706042,
34
+ 0.1308774, -0.0018035866, -0.03582506, 0.077131025, 0.013398928, 0.003188886, 0.12039741,
35
+ -0.033974767, 0.06899378, -0.059775922, -0.026934423, 0.028482193, 0.100996524, 0.004498743,
36
+ -0.02291186, 0.078752205, -0.0063796206, 0.04206536, 0.05721349, 0.06290694, 0.06130212,
37
+ 0.096969016, -0.057664312, -0.16727506, -0.035220966, 0.090760484, 0.010039947, 0.06513242,
38
+ 0.011055657, -0.004258431, -0.08316792, -0.15650468, -0.076931365, 0.11385587, -0.038372636,
39
+ 0.015648656, -0.12029895, -0.06604956, 0.009441591, -0.11912808, 0.013378132, 0.029525978,
40
+ -0.0056742397, -0.0075976513, 0.019999338, -0.05521377, -0.07650746, -0.017710293, -0.033986397,
41
+ -0.047768556, 0.13857274, 0.099290825, 0.11736938, 0.017834296, -0.07140237, -0.052047748,
42
+ -0.06398965, -0.037033975, -0.061061256, -0.03330076, -0.024472248, -0.059656, 0.05359946,
43
+ -0.043915518, -0.086325996, 0.14189173, 0.021086395, 0.02945159, 0.1029604, 0.018490415,
44
+ -0.028736332, -0.025272416, -0.06082937, -0.031339463, -0.0007249595, 0.025595888, 0.007144545,
45
+ -0.16938712, -0.1160664, -0.0654145,
46
+ ]
47
+ # fmt: on
File without changes
@@ -0,0 +1,180 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import contextlib
19
+ import os
20
+ from dataclasses import dataclass
21
+ from typing import Optional
22
+
23
+ import aiohttp
24
+ from livekit import rtc
25
+ from livekit.agents import tts, utils
26
+
27
+ from .log import logger
28
+ from .models import TTSDefaultVoiceEmbedding, TTSEncoding, TTSModels
29
+
30
+ API_AUTH_HEADER = "X-API-Key"
31
+ API_VERSION_HEADER = "Cartesia-Version"
32
+ API_VERSION = "2024-06-10"
33
+
34
+
35
+ @dataclass
36
+ class _TTSOptions:
37
+ model: TTSModels
38
+ encoding: TTSEncoding
39
+ sample_rate: int
40
+ voice: str | list[float]
41
+ api_key: str
42
+
43
+
44
+ class TTS(tts.TTS):
45
+ def __init__(
46
+ self,
47
+ *,
48
+ model: TTSModels = "upbeat-moon",
49
+ encoding: TTSEncoding = "pcm_s16le",
50
+ voice: str | list[float] = TTSDefaultVoiceEmbedding,
51
+ sample_rate: int = 24000,
52
+ api_key: str | None = None,
53
+ http_session: aiohttp.ClientSession | None = None,
54
+ ) -> None:
55
+ super().__init__(
56
+ streaming_supported=False,
57
+ sample_rate=sample_rate,
58
+ num_channels=1,
59
+ )
60
+
61
+ api_key = api_key or os.environ.get("CARTESIA_API_KEY")
62
+ if not api_key:
63
+ raise ValueError("CARTESIA_API_KEY must be set")
64
+
65
+ self._opts = _TTSOptions(
66
+ model=model,
67
+ encoding=encoding,
68
+ sample_rate=sample_rate,
69
+ voice=voice,
70
+ api_key=api_key,
71
+ )
72
+ self._session = http_session
73
+
74
+ def _ensure_session(self) -> aiohttp.ClientSession:
75
+ if not self._session:
76
+ self._session = utils.http_session()
77
+
78
+ return self._session
79
+
80
+ def synthesize(
81
+ self,
82
+ text: str,
83
+ ) -> "ChunkedStream":
84
+ return ChunkedStream(text, self._opts, self._ensure_session())
85
+
86
+
87
+ class ChunkedStream(tts.ChunkedStream):
88
+ def __init__(
89
+ self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
90
+ ) -> None:
91
+ self._opts = opts
92
+ self._text = text
93
+ self._session = session
94
+ self._main_task: asyncio.Task | None = None
95
+ self._queue = asyncio.Queue[Optional[tts.SynthesizedAudio]]()
96
+
97
+ @utils.log_exceptions(logger=logger)
98
+ async def _run(self):
99
+ voice = {}
100
+ if isinstance(self._opts.voice, str):
101
+ voice["mode"] = "id"
102
+ voice["id"] = self._opts.voice
103
+ else:
104
+ voice["mode"] = "embedding"
105
+ voice["embedding"] = self._opts.voice
106
+
107
+ try:
108
+ async with self._session.post(
109
+ "https://api.cartesia.ai/tts/bytes",
110
+ headers={
111
+ API_AUTH_HEADER: f"{self._opts.api_key}",
112
+ API_VERSION_HEADER: API_VERSION,
113
+ },
114
+ json={
115
+ "model_id": self._opts.model,
116
+ "transcript": self._text,
117
+ "voice": voice,
118
+ "output_format": {
119
+ "container": "raw",
120
+ "encoding": self._opts.encoding,
121
+ "sample_rate": self._opts.sample_rate,
122
+ },
123
+ },
124
+ ) as resp:
125
+ bytes_per_frame = (self._opts.sample_rate // 100) * 2
126
+ buf = bytearray()
127
+
128
+ async for data, _ in resp.content.iter_chunks():
129
+ buf.extend(data)
130
+
131
+ while len(buf) >= bytes_per_frame:
132
+ frame_data = buf[:bytes_per_frame]
133
+ buf = buf[bytes_per_frame:]
134
+
135
+ self._queue.put_nowait(
136
+ tts.SynthesizedAudio(
137
+ text=self._text,
138
+ data=rtc.AudioFrame(
139
+ data=frame_data,
140
+ sample_rate=self._opts.sample_rate,
141
+ num_channels=1,
142
+ samples_per_channel=len(frame_data) // 2,
143
+ ),
144
+ )
145
+ )
146
+
147
+ # send any remaining data
148
+ if len(buf) > 0:
149
+ self._queue.put_nowait(
150
+ tts.SynthesizedAudio(
151
+ text=self._text,
152
+ data=rtc.AudioFrame(
153
+ data=buf,
154
+ sample_rate=self._opts.sample_rate,
155
+ num_channels=1,
156
+ samples_per_channel=len(buf) // 2,
157
+ ),
158
+ )
159
+ )
160
+
161
+ finally:
162
+ self._queue.put_nowait(None)
163
+
164
+ async def __anext__(self) -> tts.SynthesizedAudio:
165
+ if not self._main_task:
166
+ self._main_task = asyncio.create_task(self._run())
167
+
168
+ frame = await self._queue.get()
169
+ if frame is None:
170
+ raise StopAsyncIteration
171
+
172
+ return frame
173
+
174
+ async def aclose(self) -> None:
175
+ if not self._main_task:
176
+ return
177
+
178
+ self._main_task.cancel()
179
+ with contextlib.suppress(asyncio.CancelledError):
180
+ await self._main_task
@@ -0,0 +1,15 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ __version__ = "0.1.0"
@@ -0,0 +1,38 @@
1
+ Metadata-Version: 2.1
2
+ Name: livekit-plugins-cartesia
3
+ Version: 0.1.0
4
+ Summary: LiveKit Agents Plugin for Cartesia
5
+ Home-page: https://github.com/livekit/agents
6
+ License: Apache-2.0
7
+ Project-URL: Documentation, https://docs.livekit.io
8
+ Project-URL: Website, https://livekit.io/
9
+ Project-URL: Source, https://github.com/livekit/agents
10
+ Keywords: webrtc,realtime,audio,video,livekit
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Topic :: Multimedia :: Sound/Audio
14
+ Classifier: Topic :: Multimedia :: Video
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3 :: Only
21
+ Requires-Python: >=3.8.0
22
+ Description-Content-Type: text/markdown
23
+ Requires-Dist: livekit ~=0.11
24
+ Requires-Dist: livekit-agents ~=0.7
25
+
26
+ # LiveKit Plugins Cartesia
27
+
28
+ Agent Framework plugin for voice synthesis with [Cartesia](https://cartesia.ai/) API.
29
+
30
+ ## Installation
31
+
32
+ ```bash
33
+ pip install livekit-plugins-cartesia
34
+ ```
35
+
36
+ ## Pre-requisites
37
+
38
+ You'll need an API key from Cartesia. It can be set as an environment variable: `CARTESIA_API_KEY`
@@ -0,0 +1,10 @@
1
+ livekit/plugins/cartesia/__init__.py,sha256=_a8u7qqya1pjZTV19gNOpMKTO7ccAVZAeCukiDKAG-U,937
2
+ livekit/plugins/cartesia/log.py,sha256=4Mnhjng_DU1dIWP9IWjIQGZ67EV3LnQhWMWCHVudJbo,71
3
+ livekit/plugins/cartesia/models.py,sha256=Qhl51ZScuB61bEzN1tBlHMuHO_kCXSzuVOicYa16EL8,2922
4
+ livekit/plugins/cartesia/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ livekit/plugins/cartesia/tts.py,sha256=16BneZFQQsS-lB9Ug1HYj4QW7-VnNdpTJ0CW5A1b9EU,5725
6
+ livekit/plugins/cartesia/version.py,sha256=vQH9cItKAVYAmrLbOntkbLqmxrUZrPiKb1TjkZ8jRKQ,600
7
+ livekit_plugins_cartesia-0.1.0.dist-info/METADATA,sha256=lpj60lrnFHBzooiT3f3Dtt74J0vq9ZvHfxqCb7YCBmg,1325
8
+ livekit_plugins_cartesia-0.1.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
9
+ livekit_plugins_cartesia-0.1.0.dist-info/top_level.txt,sha256=OoDok3xUmXbZRvOrfvvXB-Juu4DX79dlq188E19YHoo,8
10
+ livekit_plugins_cartesia-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.43.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ livekit