meshagent-livekit 0.0.29__py3-none-any.whl → 0.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of meshagent-livekit might be problematic. Click here for more details.
- meshagent/livekit/version.py +1 -1
- {meshagent_livekit-0.0.29.dist-info → meshagent_livekit-0.0.31.dist-info}/METADATA +3 -6
- {meshagent_livekit-0.0.29.dist-info → meshagent_livekit-0.0.31.dist-info}/RECORD +6 -7
- meshagent/livekit/tools/speech.py +0 -260
- {meshagent_livekit-0.0.29.dist-info → meshagent_livekit-0.0.31.dist-info}/WHEEL +0 -0
- {meshagent_livekit-0.0.29.dist-info → meshagent_livekit-0.0.31.dist-info}/licenses/LICENSE +0 -0
- {meshagent_livekit-0.0.29.dist-info → meshagent_livekit-0.0.31.dist-info}/top_level.txt +0 -0
meshagent/livekit/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.31"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: meshagent-livekit
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.31
|
|
4
4
|
Summary: Livekit support for Meshagent
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
Project-URL: Documentation, https://docs.meshagent.com
|
|
@@ -15,13 +15,10 @@ Requires-Dist: strip-markdown~=1.3
|
|
|
15
15
|
Requires-Dist: livekit-api~=1.0.2
|
|
16
16
|
Requires-Dist: livekit-agents~=1.0.19
|
|
17
17
|
Requires-Dist: livekit-plugins-openai~=1.0.19
|
|
18
|
-
Requires-Dist: livekit-plugins-cartesia~=1.0.19
|
|
19
|
-
Requires-Dist: livekit-plugins-elevenlabs~=1.0.19
|
|
20
|
-
Requires-Dist: livekit-plugins-playai~=1.0.19
|
|
21
18
|
Requires-Dist: livekit-plugins-silero~=1.0.19
|
|
22
19
|
Requires-Dist: livekit-plugins-turn-detector~=1.0.19
|
|
23
|
-
Requires-Dist: meshagent-api~=0.0.
|
|
24
|
-
Requires-Dist: meshagent-tools~=0.0.
|
|
20
|
+
Requires-Dist: meshagent-api~=0.0.31
|
|
21
|
+
Requires-Dist: meshagent-tools~=0.0.31
|
|
25
22
|
Dynamic: license-file
|
|
26
23
|
|
|
27
24
|
### Meshagent LiveKit
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
meshagent/livekit/__init__.py,sha256=8zLGg-DfQhnDl2Ky0n-zXpN-8e-g7iR0AcaI4l4Vvpk,32
|
|
2
2
|
meshagent/livekit/livekit_protocol.py,sha256=K9yP-qpxag5_7TXlKjFEx3cOJJJpYI_z6zGzFHoN1Hs,1421
|
|
3
3
|
meshagent/livekit/livekit_protocol_test.py,sha256=n_ZQjt7n4u7TM7eENzH8L0tw8LvypS_JHF_PuJ2o6h4,2836
|
|
4
|
-
meshagent/livekit/version.py,sha256=
|
|
4
|
+
meshagent/livekit/version.py,sha256=JkmidIUbQFwwAhFKnz2l-8dxIwGERIblSCC4CXcpX08,22
|
|
5
5
|
meshagent/livekit/agents/transcriber.py,sha256=Dq1Ijx4gmA-0jQGM-f3w7X-JIZpkRCFDxWae9AOwz-k,12290
|
|
6
6
|
meshagent/livekit/agents/voice.py,sha256=ZAjcIsiTRXYm77Gsv6pMVScI6ozYE-8loGgW8ZQ9N8E,12520
|
|
7
|
-
|
|
8
|
-
meshagent_livekit-0.0.
|
|
9
|
-
meshagent_livekit-0.0.
|
|
10
|
-
meshagent_livekit-0.0.
|
|
11
|
-
meshagent_livekit-0.0.
|
|
12
|
-
meshagent_livekit-0.0.29.dist-info/RECORD,,
|
|
7
|
+
meshagent_livekit-0.0.31.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
|
|
8
|
+
meshagent_livekit-0.0.31.dist-info/METADATA,sha256=cH5yiqR2mqU4Gv6IN_tCxxp4_6XmcrGwaB98PPRgBwM,808
|
|
9
|
+
meshagent_livekit-0.0.31.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
+
meshagent_livekit-0.0.31.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
|
|
11
|
+
meshagent_livekit-0.0.31.dist-info/RECORD,,
|
|
@@ -1,260 +0,0 @@
|
|
|
1
|
-
import uuid
|
|
2
|
-
from livekit import rtc
|
|
3
|
-
from livekit.agents.tts import TTS
|
|
4
|
-
from typing import Optional
|
|
5
|
-
|
|
6
|
-
from ..agents.voice import VoiceConnection
|
|
7
|
-
from meshagent.tools.toolkit import Toolkit, Tool, FileResponse, ToolContext, TextResponse
|
|
8
|
-
|
|
9
|
-
from livekit.plugins import elevenlabs
|
|
10
|
-
from livekit.plugins import cartesia
|
|
11
|
-
from livekit.plugins import openai
|
|
12
|
-
from livekit.plugins import playai
|
|
13
|
-
|
|
14
|
-
class SpeechTools(Toolkit):
|
|
15
|
-
def __init__(self):
|
|
16
|
-
super().__init__(
|
|
17
|
-
name="meshagent.speech",
|
|
18
|
-
title="voice",
|
|
19
|
-
description="speech to text tools",
|
|
20
|
-
tools=[
|
|
21
|
-
ElevenTextToSpeech(),
|
|
22
|
-
CartesiaTextToSpeech(),
|
|
23
|
-
OpenAITextToSpeech(),
|
|
24
|
-
PlayHTTextToSpeech(),
|
|
25
|
-
])
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
async def synthesize(tts: TTS, text:str):
|
|
29
|
-
|
|
30
|
-
frames = list[rtc.AudioFrame]()
|
|
31
|
-
stream = tts.synthesize(text=text)
|
|
32
|
-
try:
|
|
33
|
-
async for chunk in stream:
|
|
34
|
-
frame : rtc.AudioFrame = chunk.frame
|
|
35
|
-
frames.append(frame)
|
|
36
|
-
|
|
37
|
-
merged = rtc.combine_audio_frames(frames)
|
|
38
|
-
return FileResponse(data=merged.to_wav_bytes(), name= str(uuid.uuid4())+".wav", mime_type="audio/wav")
|
|
39
|
-
finally:
|
|
40
|
-
await stream.aclose()
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class PlayHTTextToSpeech(Tool):
|
|
44
|
-
def __init__(self):
|
|
45
|
-
super().__init__(
|
|
46
|
-
name="playht_text_to_speech",
|
|
47
|
-
title="PlayHT text to speech",
|
|
48
|
-
description="generate an audio file, converting text to speech",
|
|
49
|
-
input_schema={
|
|
50
|
-
"type" : "object",
|
|
51
|
-
"properties" : {
|
|
52
|
-
"input_text" : {
|
|
53
|
-
"type" : "string",
|
|
54
|
-
"description" : "the text to convert to speech",
|
|
55
|
-
},
|
|
56
|
-
"model" : {
|
|
57
|
-
"type": "string",
|
|
58
|
-
"description" : "(default: PlayDialog)",
|
|
59
|
-
"enum" : [ "Play3.0-mini", "PlayDialog", "PlayHT2.0-turbo" ]
|
|
60
|
-
},
|
|
61
|
-
"sample_rate" : {
|
|
62
|
-
"type" : "number",
|
|
63
|
-
"description" : "(default: 48000)",
|
|
64
|
-
"enum" : [ 48000, 24000 ]
|
|
65
|
-
}
|
|
66
|
-
},
|
|
67
|
-
"required": ["input_text","model","sample_rate"],
|
|
68
|
-
"additionalProperties" : False,
|
|
69
|
-
})
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
async def execute(self, *, context: ToolContext, input_text: str, sample_rate: int, model: str):
|
|
74
|
-
tts = playai.TTS(
|
|
75
|
-
model=model,
|
|
76
|
-
sample_rate=sample_rate,
|
|
77
|
-
)
|
|
78
|
-
|
|
79
|
-
return await synthesize(tts, input_text)
|
|
80
|
-
|
|
81
|
-
class ElevenTextToSpeech(Tool):
|
|
82
|
-
def __init__(self):
|
|
83
|
-
super().__init__(
|
|
84
|
-
name="eleven_labs_text_to_speech",
|
|
85
|
-
title="ElevenLabs text to speech",
|
|
86
|
-
description="generate an audio file, converting text to speech",
|
|
87
|
-
input_schema={
|
|
88
|
-
"type" : "object",
|
|
89
|
-
"properties" : {
|
|
90
|
-
"input_text" : {
|
|
91
|
-
"type" : "string",
|
|
92
|
-
"description" : "the text to convert to speech",
|
|
93
|
-
},
|
|
94
|
-
"voice_id" : {
|
|
95
|
-
"type" : "string",
|
|
96
|
-
"description" : "the id of a voice to use (default: EXAVITQu4vr4xnSDxMaL)",
|
|
97
|
-
},
|
|
98
|
-
"voice_name" : {
|
|
99
|
-
"type" : "string",
|
|
100
|
-
"description" : "the name of the voice to use (optional)",
|
|
101
|
-
},
|
|
102
|
-
"voice_category" : {
|
|
103
|
-
"type" : "string",
|
|
104
|
-
"description" : "the category of the voice to use (optional, default: premade)",
|
|
105
|
-
"enum" : [
|
|
106
|
-
"generated", "cloned", "premade", "professional", "famous", "high_quality"
|
|
107
|
-
]
|
|
108
|
-
},
|
|
109
|
-
"model" : {
|
|
110
|
-
"type": "string",
|
|
111
|
-
"description" : "(default: eleven_flash_v2_5)",
|
|
112
|
-
"enum" : [ "eleven_multilingual_v2", "eleven_flash_v2_5", "eleven_flash_v2", "eleven_multilingual_sts_v2", "eleven_english_sts_v2" ]
|
|
113
|
-
},
|
|
114
|
-
"encoding" : {
|
|
115
|
-
"type" : "string",
|
|
116
|
-
"description" : "(default: pcm_44100)",
|
|
117
|
-
"enum" : [ "pcm_44100", "mp3_22050_32" ]
|
|
118
|
-
}
|
|
119
|
-
},
|
|
120
|
-
"required": ["input_text","voice_id","model","encoding", "voice_name", "voice_category"],
|
|
121
|
-
"additionalProperties" : False,
|
|
122
|
-
})
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
async def execute(self, *, context: ToolContext, input_text: str, voice_id: str, voice_name:str, voice_category:str, model: str, encoding: str):
|
|
127
|
-
tts = elevenlabs.TTS(
|
|
128
|
-
model_id=model,
|
|
129
|
-
encoding=encoding,
|
|
130
|
-
voice=elevenlabs.Voice(id=voice_id, name=voice_name, category=voice_category)
|
|
131
|
-
)
|
|
132
|
-
|
|
133
|
-
return await synthesize(tts, input_text)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
class CartesiaTextToSpeech(Tool):
|
|
137
|
-
def __init__(self):
|
|
138
|
-
super().__init__(
|
|
139
|
-
name="cartesia_text_to_speech",
|
|
140
|
-
title="Cartesia text to speech",
|
|
141
|
-
description="generate an audio file, converting text to speech",
|
|
142
|
-
input_schema={
|
|
143
|
-
"type" : "object",
|
|
144
|
-
"properties" : {
|
|
145
|
-
"input_text" : {
|
|
146
|
-
"type" : "string",
|
|
147
|
-
"description" : "the text to convert to speech",
|
|
148
|
-
},
|
|
149
|
-
"voice" : {
|
|
150
|
-
"type" : "string",
|
|
151
|
-
"description" : "the id of a voice to use (default: c2ac25f9-ecc4-4f56-9095-651354df60c0)"
|
|
152
|
-
},
|
|
153
|
-
"model" : {
|
|
154
|
-
"type": "string",
|
|
155
|
-
"description" : "(default: sonic-english)",
|
|
156
|
-
"enum" : [ "sonic", "sonic-preview", "sonic-2024-12-12", "sonic-2024-10-19", "sonic-english", "sonic-multilingual" ]
|
|
157
|
-
},
|
|
158
|
-
"speed" : {
|
|
159
|
-
"type" : "string",
|
|
160
|
-
"description" : "(default: normal)",
|
|
161
|
-
"enum" : ["fastest", "fast", "normal", "slow", "slowest" ]
|
|
162
|
-
},
|
|
163
|
-
"encoding" : {
|
|
164
|
-
"type" : "string",
|
|
165
|
-
"description" : "(default: pcm_s16le)",
|
|
166
|
-
"enum" : [ "pcm_s16le" ]
|
|
167
|
-
},
|
|
168
|
-
"emotion" : {
|
|
169
|
-
"type" : "array",
|
|
170
|
-
"items" : {
|
|
171
|
-
"type": "string",
|
|
172
|
-
"enum" : [
|
|
173
|
-
"anger:lowest",
|
|
174
|
-
"positivity:lowest",
|
|
175
|
-
"surprise:lowest",
|
|
176
|
-
"sadness:lowest",
|
|
177
|
-
"curiosity:lowest"
|
|
178
|
-
"anger:low",
|
|
179
|
-
"positivity:low",
|
|
180
|
-
"surprise:low",
|
|
181
|
-
"sadness:low",
|
|
182
|
-
"curiosity:low"
|
|
183
|
-
"anger:medium",
|
|
184
|
-
"positivity",
|
|
185
|
-
"surprise",
|
|
186
|
-
"sadness",
|
|
187
|
-
"curiosity"
|
|
188
|
-
"anger:high",
|
|
189
|
-
"positivity:high",
|
|
190
|
-
"surprise:high",
|
|
191
|
-
"sadness:high",
|
|
192
|
-
"curiosity:high",
|
|
193
|
-
"anger:highest",
|
|
194
|
-
"positivity:highest",
|
|
195
|
-
"surprise:highest",
|
|
196
|
-
"sadness:highest",
|
|
197
|
-
"curiosity:highest"
|
|
198
|
-
|
|
199
|
-
]
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
},
|
|
203
|
-
"required": ["input_text", "voice","speed","emotion","encoding","model"],
|
|
204
|
-
"additionalProperties" : False,
|
|
205
|
-
})
|
|
206
|
-
|
|
207
|
-
async def execute(self, *, context: ToolContext, input_text: str, voice: str, model: str, speed: str, emotion: list, encoding: str):
|
|
208
|
-
tts = cartesia.TTS(
|
|
209
|
-
encoding=encoding,
|
|
210
|
-
model=model,
|
|
211
|
-
voice=voice,
|
|
212
|
-
emotion=emotion,
|
|
213
|
-
speed=speed
|
|
214
|
-
)
|
|
215
|
-
|
|
216
|
-
return await synthesize(tts, input_text)
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
class OpenAITextToSpeech(Tool):
|
|
220
|
-
def __init__(self):
|
|
221
|
-
super().__init__(
|
|
222
|
-
name="openai_text_to_speech",
|
|
223
|
-
title="OpenAI text to speech",
|
|
224
|
-
description="generate an audio file, converting text to speech",
|
|
225
|
-
input_schema={
|
|
226
|
-
"type" : "object",
|
|
227
|
-
"properties" : {
|
|
228
|
-
"input_text" : {
|
|
229
|
-
"type" : "string",
|
|
230
|
-
"description" : "the text to convert to speech",
|
|
231
|
-
},
|
|
232
|
-
"voice" : {
|
|
233
|
-
"type" : "string",
|
|
234
|
-
"description" : "the id of a voice to use (default: alloy)",
|
|
235
|
-
"enum" : [
|
|
236
|
-
"alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"
|
|
237
|
-
]
|
|
238
|
-
},
|
|
239
|
-
"model" : {
|
|
240
|
-
"type": "string",
|
|
241
|
-
"description" : "(default: tts-1)",
|
|
242
|
-
"enum" : [ "tts-1", "tts-1-hd" ]
|
|
243
|
-
},
|
|
244
|
-
"speed" : {
|
|
245
|
-
"type" : "number",
|
|
246
|
-
"description" : "(default: 1.0)",
|
|
247
|
-
},
|
|
248
|
-
},
|
|
249
|
-
"required": ["input_text", "voice", "model", "speed"],
|
|
250
|
-
"additionalProperties" : False,
|
|
251
|
-
})
|
|
252
|
-
|
|
253
|
-
async def execute(self, *, context: ToolContext, input_text: str, voice: str, model: str, speed: float):
|
|
254
|
-
tts = openai.TTS(
|
|
255
|
-
model=model,
|
|
256
|
-
voice=voice,
|
|
257
|
-
speed=speed
|
|
258
|
-
)
|
|
259
|
-
|
|
260
|
-
return await synthesize(tts, input_text)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|