livekit-plugins-cartesia 0.1.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/PKG-INFO +2 -2
  2. livekit_plugins_cartesia-0.2.0/livekit/plugins/cartesia/models.py +14 -0
  3. livekit_plugins_cartesia-0.2.0/livekit/plugins/cartesia/tts.py +139 -0
  4. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/livekit/plugins/cartesia/version.py +1 -1
  5. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/livekit_plugins_cartesia.egg-info/PKG-INFO +2 -2
  6. livekit_plugins_cartesia-0.2.0/livekit_plugins_cartesia.egg-info/requires.txt +1 -0
  7. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/setup.py +1 -3
  8. livekit_plugins_cartesia-0.1.1/livekit/plugins/cartesia/models.py +0 -47
  9. livekit_plugins_cartesia-0.1.1/livekit/plugins/cartesia/tts.py +0 -180
  10. livekit_plugins_cartesia-0.1.1/livekit_plugins_cartesia.egg-info/requires.txt +0 -1
  11. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/README.md +0 -0
  12. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/livekit/plugins/cartesia/__init__.py +0 -0
  13. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/livekit/plugins/cartesia/log.py +0 -0
  14. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/livekit/plugins/cartesia/py.typed +0 -0
  15. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/livekit_plugins_cartesia.egg-info/SOURCES.txt +0 -0
  16. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/livekit_plugins_cartesia.egg-info/dependency_links.txt +0 -0
  17. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/livekit_plugins_cartesia.egg-info/top_level.txt +0 -0
  18. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/pyproject.toml +0 -0
  19. {livekit_plugins_cartesia-0.1.1 → livekit_plugins_cartesia-0.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-cartesia
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: LiveKit Agents Plugin for Cartesia
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.9.0
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit-agents~=0.7
22
+ Requires-Dist: livekit-agents>=0.8.0.dev0
23
23
 
24
24
  # LiveKit Plugins Cartesia
25
25
 
@@ -0,0 +1,14 @@
1
+ from typing import Literal
2
+
3
+ TTSEncoding = Literal[
4
+ "pcm_s16le",
5
+ # Not yet supported
6
+ # "pcm_f32le",
7
+ # "pcm_mulaw",
8
+ # "pcm_alaw",
9
+ ]
10
+
11
+
12
+ TTSModels = Literal["sonic-english", "sonic-multilingual"]
13
+ TTSLanguages = Literal["en", "es", "fr", "de", "pt", "zh", "ja"]
14
+ TTSDefaultVoiceId = "248be419-c632-4f23-adf1-5324ed7dbf1d"
@@ -0,0 +1,139 @@
1
+ # Copyright 2023 LiveKit, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from __future__ import annotations
16
+
17
+ import os
18
+ from dataclasses import dataclass
19
+
20
+ import aiohttp
21
+ from livekit.agents import tts, utils
22
+
23
+ from .log import logger
24
+ from .models import TTSDefaultVoiceId, TTSEncoding, TTSModels
25
+
26
+ API_AUTH_HEADER = "X-API-Key"
27
+ API_VERSION_HEADER = "Cartesia-Version"
28
+ API_VERSION = "2024-06-10"
29
+
30
+
31
+ @dataclass
32
+ class _TTSOptions:
33
+ model: TTSModels
34
+ encoding: TTSEncoding
35
+ sample_rate: int
36
+ voice: str | list[float]
37
+ api_key: str
38
+ language: str
39
+
40
+
41
+ class TTS(tts.TTS):
42
+ def __init__(
43
+ self,
44
+ *,
45
+ model: TTSModels = "sonic-english",
46
+ language: str = "en",
47
+ encoding: TTSEncoding = "pcm_s16le",
48
+ voice: str | list[float] = TTSDefaultVoiceId,
49
+ sample_rate: int = 24000,
50
+ api_key: str | None = None,
51
+ http_session: aiohttp.ClientSession | None = None,
52
+ ) -> None:
53
+ super().__init__(
54
+ capabilities=tts.TTSCapabilities(streaming=False),
55
+ sample_rate=sample_rate,
56
+ num_channels=1,
57
+ )
58
+
59
+ api_key = api_key or os.environ.get("CARTESIA_API_KEY")
60
+ if not api_key:
61
+ raise ValueError("CARTESIA_API_KEY must be set")
62
+
63
+ self._opts = _TTSOptions(
64
+ model=model,
65
+ language=language,
66
+ encoding=encoding,
67
+ sample_rate=sample_rate,
68
+ voice=voice,
69
+ api_key=api_key,
70
+ )
71
+ self._session = http_session
72
+
73
+ def _ensure_session(self) -> aiohttp.ClientSession:
74
+ if not self._session:
75
+ self._session = utils.http_context.http_session()
76
+
77
+ return self._session
78
+
79
+ def synthesize(self, text: str) -> "ChunkedStream":
80
+ return ChunkedStream(text, self._opts, self._ensure_session())
81
+
82
+
83
+ class ChunkedStream(tts.ChunkedStream):
84
+ def __init__(
85
+ self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
86
+ ) -> None:
87
+ super().__init__()
88
+ self._text, self._opts, self._session = text, opts, session
89
+
90
+ @utils.log_exceptions(logger=logger)
91
+ async def _main_task(self):
92
+ bstream = utils.audio.AudioByteStream(
93
+ sample_rate=self._opts.sample_rate, num_channels=1
94
+ )
95
+ request_id = utils.shortuuid()
96
+ segment_id = utils.shortuuid()
97
+
98
+ voice = {}
99
+ if isinstance(self._opts.voice, str):
100
+ voice["mode"] = "id"
101
+ voice["id"] = self._opts.voice
102
+ else:
103
+ voice["mode"] = "embedding"
104
+ voice["embedding"] = self._opts.voice
105
+
106
+ data = {
107
+ "model_id": self._opts.model,
108
+ "transcript": self._text,
109
+ "voice": voice,
110
+ "output_format": {
111
+ "container": "raw",
112
+ "encoding": self._opts.encoding,
113
+ "sample_rate": self._opts.sample_rate,
114
+ },
115
+ "language": self._opts.language,
116
+ }
117
+
118
+ async with self._session.post(
119
+ "https://api.cartesia.ai/tts/bytes",
120
+ headers={
121
+ API_AUTH_HEADER: f"{self._opts.api_key}",
122
+ API_VERSION_HEADER: API_VERSION,
123
+ },
124
+ json=data,
125
+ ) as resp:
126
+ async for data, _ in resp.content.iter_chunks():
127
+ for frame in bstream.write(data):
128
+ self._event_ch.send_nowait(
129
+ tts.SynthesizedAudio(
130
+ request_id=request_id, segment_id=segment_id, frame=frame
131
+ )
132
+ )
133
+
134
+ for frame in bstream.flush():
135
+ self._event_ch.send_nowait(
136
+ tts.SynthesizedAudio(
137
+ request_id=request_id, segment_id=segment_id, frame=frame
138
+ )
139
+ )
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "0.1.1"
15
+ __version__ = "0.2.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: livekit-plugins-cartesia
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: LiveKit Agents Plugin for Cartesia
5
5
  Home-page: https://github.com/livekit/agents
6
6
  License: Apache-2.0
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
19
19
  Classifier: Programming Language :: Python :: 3 :: Only
20
20
  Requires-Python: >=3.9.0
21
21
  Description-Content-Type: text/markdown
22
- Requires-Dist: livekit-agents~=0.7
22
+ Requires-Dist: livekit-agents>=0.8.0.dev0
23
23
 
24
24
  # LiveKit Plugins Cartesia
25
25
 
@@ -0,0 +1 @@
1
+ livekit-agents>=0.8.0.dev0
@@ -47,9 +47,7 @@ setuptools.setup(
47
47
  license="Apache-2.0",
48
48
  packages=setuptools.find_namespace_packages(include=["livekit.*"]),
49
49
  python_requires=">=3.9.0",
50
- install_requires=[
51
- "livekit-agents~=0.7",
52
- ],
50
+ install_requires=["livekit-agents>=0.8.0.dev0"],
53
51
  project_urls={
54
52
  "Documentation": "https://docs.livekit.io",
55
53
  "Website": "https://livekit.io/",
@@ -1,47 +0,0 @@
1
- from typing import Literal
2
-
3
- TTSEncoding = Literal[
4
- "pcm_s16le",
5
- # Not yet supported
6
- # "pcm_f32le",
7
- # "pcm_mulaw",
8
- # "pcm_alaw",
9
- ]
10
-
11
-
12
- TTSModels = Literal["upbeat-moon"]
13
-
14
-
15
- # fmt: off
16
- # Barbershop Man in upbeat-moon
17
- TTSDefaultVoiceEmbedding: list[float] = [
18
- -0.033633083, 0.072083704, -0.01807767, -0.083488315, -0.04407617, 0.0022592682, 0.070505895,
19
- 0.023946615, -0.04788024, -0.06388413, -0.0716355, -0.0022612812, -0.0053448505, -0.07848381,
20
- 0.0348162, -0.053745482, -0.092399485, -0.02950225, 0.028591828, -0.10556894, 0.023313355,
21
- 0.06224387, 0.0362463, 0.029258432, 0.10769641, 0.043595582, -0.058543224, -0.080402784,
22
- -0.0953816, -0.008988032, -0.0028981369, -0.004752721, -0.20742874, 0.058907595, 0.08813939,
23
- -0.06192675, 0.099082634, -0.09661578, -0.0077761724, -0.013982456, -0.025798267, 0.04467142,
24
- 0.026222011, 0.023023574, 0.011227064, -0.17462021, -0.09880612, -0.1521035, -0.060464993,
25
- -0.04735665, -0.09725187, -0.006127679, 0.15818526, -0.039493002, -0.067719474, 0.0066190436,
26
- -0.10636633, 0.17073768, -0.051717706, 0.03186961, -0.020547207, -0.02244247, 0.013196935,
27
- -0.06431055, -0.115360335, 0.016918058, -0.033195216, 0.11255181, 0.020366343, -0.041032124,
28
- 0.08780918, -0.040567942, 0.057276532, 0.05848221, -0.077479474, -0.073524915, -0.01913317,
29
- -0.029291833, 0.11210393, -0.09859328, 0.2152541, -0.022976823, 0.028627992, -0.039598297,
30
- 0.041829932, -0.05593181, -0.06444655, -0.018057477, -0.008098263, 0.05994528, 0.10430693,
31
- -0.13121894, -0.06512868, -0.026126215, 0.046727825, -0.17180993, -0.10577226, -0.08610466,
32
- 0.008862588, 0.09547498, -0.010965332, -0.061217085, -0.038954042, 0.019930292, -0.017192135,
33
- 0.007296275, 0.03273872, 0.04389937, -0.056483064, 0.003420891, -0.10319067, -0.015706042,
34
- 0.1308774, -0.0018035866, -0.03582506, 0.077131025, 0.013398928, 0.003188886, 0.12039741,
35
- -0.033974767, 0.06899378, -0.059775922, -0.026934423, 0.028482193, 0.100996524, 0.004498743,
36
- -0.02291186, 0.078752205, -0.0063796206, 0.04206536, 0.05721349, 0.06290694, 0.06130212,
37
- 0.096969016, -0.057664312, -0.16727506, -0.035220966, 0.090760484, 0.010039947, 0.06513242,
38
- 0.011055657, -0.004258431, -0.08316792, -0.15650468, -0.076931365, 0.11385587, -0.038372636,
39
- 0.015648656, -0.12029895, -0.06604956, 0.009441591, -0.11912808, 0.013378132, 0.029525978,
40
- -0.0056742397, -0.0075976513, 0.019999338, -0.05521377, -0.07650746, -0.017710293, -0.033986397,
41
- -0.047768556, 0.13857274, 0.099290825, 0.11736938, 0.017834296, -0.07140237, -0.052047748,
42
- -0.06398965, -0.037033975, -0.061061256, -0.03330076, -0.024472248, -0.059656, 0.05359946,
43
- -0.043915518, -0.086325996, 0.14189173, 0.021086395, 0.02945159, 0.1029604, 0.018490415,
44
- -0.028736332, -0.025272416, -0.06082937, -0.031339463, -0.0007249595, 0.025595888, 0.007144545,
45
- -0.16938712, -0.1160664, -0.0654145,
46
- ]
47
- # fmt: on
@@ -1,180 +0,0 @@
1
- # Copyright 2023 LiveKit, Inc.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from __future__ import annotations
16
-
17
- import asyncio
18
- import contextlib
19
- import os
20
- from dataclasses import dataclass
21
- from typing import Optional
22
-
23
- import aiohttp
24
- from livekit import rtc
25
- from livekit.agents import tts, utils
26
-
27
- from .log import logger
28
- from .models import TTSDefaultVoiceEmbedding, TTSEncoding, TTSModels
29
-
30
- API_AUTH_HEADER = "X-API-Key"
31
- API_VERSION_HEADER = "Cartesia-Version"
32
- API_VERSION = "2024-06-10"
33
-
34
-
35
- @dataclass
36
- class _TTSOptions:
37
- model: TTSModels
38
- encoding: TTSEncoding
39
- sample_rate: int
40
- voice: str | list[float]
41
- api_key: str
42
-
43
-
44
- class TTS(tts.TTS):
45
- def __init__(
46
- self,
47
- *,
48
- model: TTSModels = "upbeat-moon",
49
- encoding: TTSEncoding = "pcm_s16le",
50
- voice: str | list[float] = TTSDefaultVoiceEmbedding,
51
- sample_rate: int = 24000,
52
- api_key: str | None = None,
53
- http_session: aiohttp.ClientSession | None = None,
54
- ) -> None:
55
- super().__init__(
56
- streaming_supported=False,
57
- sample_rate=sample_rate,
58
- num_channels=1,
59
- )
60
-
61
- api_key = api_key or os.environ.get("CARTESIA_API_KEY")
62
- if not api_key:
63
- raise ValueError("CARTESIA_API_KEY must be set")
64
-
65
- self._opts = _TTSOptions(
66
- model=model,
67
- encoding=encoding,
68
- sample_rate=sample_rate,
69
- voice=voice,
70
- api_key=api_key,
71
- )
72
- self._session = http_session
73
-
74
- def _ensure_session(self) -> aiohttp.ClientSession:
75
- if not self._session:
76
- self._session = utils.http_session()
77
-
78
- return self._session
79
-
80
- def synthesize(
81
- self,
82
- text: str,
83
- ) -> "ChunkedStream":
84
- return ChunkedStream(text, self._opts, self._ensure_session())
85
-
86
-
87
- class ChunkedStream(tts.ChunkedStream):
88
- def __init__(
89
- self, text: str, opts: _TTSOptions, session: aiohttp.ClientSession
90
- ) -> None:
91
- self._opts = opts
92
- self._text = text
93
- self._session = session
94
- self._main_task: asyncio.Task | None = None
95
- self._queue = asyncio.Queue[Optional[tts.SynthesizedAudio]]()
96
-
97
- @utils.log_exceptions(logger=logger)
98
- async def _run(self):
99
- voice = {}
100
- if isinstance(self._opts.voice, str):
101
- voice["mode"] = "id"
102
- voice["id"] = self._opts.voice
103
- else:
104
- voice["mode"] = "embedding"
105
- voice["embedding"] = self._opts.voice
106
-
107
- try:
108
- async with self._session.post(
109
- "https://api.cartesia.ai/tts/bytes",
110
- headers={
111
- API_AUTH_HEADER: f"{self._opts.api_key}",
112
- API_VERSION_HEADER: API_VERSION,
113
- },
114
- json={
115
- "model_id": self._opts.model,
116
- "transcript": self._text,
117
- "voice": voice,
118
- "output_format": {
119
- "container": "raw",
120
- "encoding": self._opts.encoding,
121
- "sample_rate": self._opts.sample_rate,
122
- },
123
- },
124
- ) as resp:
125
- bytes_per_frame = (self._opts.sample_rate // 100) * 2
126
- buf = bytearray()
127
-
128
- async for data, _ in resp.content.iter_chunks():
129
- buf.extend(data)
130
-
131
- while len(buf) >= bytes_per_frame:
132
- frame_data = buf[:bytes_per_frame]
133
- buf = buf[bytes_per_frame:]
134
-
135
- self._queue.put_nowait(
136
- tts.SynthesizedAudio(
137
- text=self._text,
138
- data=rtc.AudioFrame(
139
- data=frame_data,
140
- sample_rate=self._opts.sample_rate,
141
- num_channels=1,
142
- samples_per_channel=len(frame_data) // 2,
143
- ),
144
- )
145
- )
146
-
147
- # send any remaining data
148
- if len(buf) > 0:
149
- self._queue.put_nowait(
150
- tts.SynthesizedAudio(
151
- text=self._text,
152
- data=rtc.AudioFrame(
153
- data=buf,
154
- sample_rate=self._opts.sample_rate,
155
- num_channels=1,
156
- samples_per_channel=len(buf) // 2,
157
- ),
158
- )
159
- )
160
-
161
- finally:
162
- self._queue.put_nowait(None)
163
-
164
- async def __anext__(self) -> tts.SynthesizedAudio:
165
- if not self._main_task:
166
- self._main_task = asyncio.create_task(self._run())
167
-
168
- frame = await self._queue.get()
169
- if frame is None:
170
- raise StopAsyncIteration
171
-
172
- return frame
173
-
174
- async def aclose(self) -> None:
175
- if not self._main_task:
176
- return
177
-
178
- self._main_task.cancel()
179
- with contextlib.suppress(asyncio.CancelledError):
180
- await self._main_task