meshagent-livekit 0.0.37__py3-none-any.whl → 0.0.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of meshagent-livekit might be problematic. Click here for more details.
- meshagent/livekit/__init__.py +3 -1
- meshagent/livekit/agents/transcriber.py +196 -117
- meshagent/livekit/agents/voice.py +112 -115
- meshagent/livekit/livekit_protocol.py +15 -13
- meshagent/livekit/livekit_protocol_test.py +59 -49
- meshagent/livekit/version.py +1 -1
- meshagent_livekit-0.0.38.dist-info/METADATA +37 -0
- meshagent_livekit-0.0.38.dist-info/RECORD +11 -0
- meshagent_livekit-0.0.37.dist-info/METADATA +0 -24
- meshagent_livekit-0.0.37.dist-info/RECORD +0 -11
- {meshagent_livekit-0.0.37.dist-info → meshagent_livekit-0.0.38.dist-info}/WHEEL +0 -0
- {meshagent_livekit-0.0.37.dist-info → meshagent_livekit-0.0.38.dist-info}/licenses/LICENSE +0 -0
- {meshagent_livekit-0.0.37.dist-info → meshagent_livekit-0.0.38.dist-info}/top_level.txt +0 -0
meshagent/livekit/__init__.py
CHANGED
|
@@ -1,15 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import asyncio
|
|
3
3
|
|
|
4
|
-
from meshagent.api.schema_document import Element,Text
|
|
5
|
-
from meshagent.api.room_server_client import RoomClient
|
|
6
|
-
from meshagent.api.websocket_protocol import WebSocketClientProtocol
|
|
7
4
|
|
|
8
|
-
|
|
9
|
-
import asyncio
|
|
10
5
|
import os
|
|
11
|
-
import logging
|
|
12
|
-
import json
|
|
13
6
|
|
|
14
7
|
from livekit import api
|
|
15
8
|
|
|
@@ -17,7 +10,6 @@ from livekit.agents import stt, transcription, utils
|
|
|
17
10
|
from livekit.plugins import openai, silero
|
|
18
11
|
from livekit import rtc
|
|
19
12
|
from livekit.rtc import TranscriptionSegment
|
|
20
|
-
from livekit.agents import utils
|
|
21
13
|
from livekit.agents import stt as speech_to_text
|
|
22
14
|
|
|
23
15
|
from meshagent.api.runtime import RuntimeDocument
|
|
@@ -26,7 +18,7 @@ from typing import Optional
|
|
|
26
18
|
|
|
27
19
|
from meshagent.api.schema import MeshSchema
|
|
28
20
|
|
|
29
|
-
from meshagent.api.schema import
|
|
21
|
+
from meshagent.api.schema import ElementType, ChildProperty, ValueProperty
|
|
30
22
|
|
|
31
23
|
from meshagent.agents.agent import AgentCallContext
|
|
32
24
|
from meshagent.agents import TaskRunner
|
|
@@ -37,151 +29,221 @@ logger = logging.getLogger("transcriber")
|
|
|
37
29
|
transcription_schema = MeshSchema(
|
|
38
30
|
root_tag_name="transcript",
|
|
39
31
|
elements=[
|
|
40
|
-
ElementType(
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
32
|
+
ElementType(
|
|
33
|
+
tag_name="transcript",
|
|
34
|
+
description="a transcript",
|
|
35
|
+
properties=[
|
|
36
|
+
ChildProperty(
|
|
37
|
+
name="transcriptions",
|
|
38
|
+
description="the transcript entries",
|
|
39
|
+
child_tag_names=["speech"],
|
|
40
|
+
)
|
|
41
|
+
],
|
|
42
|
+
),
|
|
43
|
+
ElementType(
|
|
44
|
+
tag_name="speech",
|
|
45
|
+
description="transcribed speech",
|
|
46
|
+
properties=[
|
|
47
|
+
ValueProperty(
|
|
48
|
+
name="text", description="the transcribed text", type="string"
|
|
49
|
+
),
|
|
50
|
+
ValueProperty(
|
|
51
|
+
name="startTime",
|
|
52
|
+
description="the time of the start of this speech",
|
|
53
|
+
type="number",
|
|
54
|
+
),
|
|
55
|
+
ValueProperty(
|
|
56
|
+
name="endTime",
|
|
57
|
+
description="the time of th end of this speech",
|
|
58
|
+
type="number",
|
|
59
|
+
),
|
|
60
|
+
ValueProperty(
|
|
61
|
+
name="participantId",
|
|
62
|
+
description="the identity of the participant",
|
|
63
|
+
type="string",
|
|
64
|
+
),
|
|
65
|
+
ValueProperty(
|
|
66
|
+
name="participantName",
|
|
67
|
+
description="the name of the participant",
|
|
68
|
+
type="string",
|
|
69
|
+
),
|
|
70
|
+
],
|
|
71
|
+
),
|
|
72
|
+
],
|
|
51
73
|
)
|
|
52
74
|
|
|
53
|
-
class Transcriber(TaskRunner):
|
|
54
75
|
|
|
55
|
-
|
|
76
|
+
class Transcriber(TaskRunner):
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
*,
|
|
80
|
+
livekit_url: Optional[str] = None,
|
|
81
|
+
livekit_api_key: Optional[str] = None,
|
|
82
|
+
livekit_api_secret: Optional[str] = None,
|
|
83
|
+
livekit_identity: Optional[str] = None,
|
|
84
|
+
):
|
|
56
85
|
super().__init__(
|
|
57
86
|
name="livekit.transcriber",
|
|
58
87
|
title="transcriber",
|
|
59
88
|
description="connects to a livekit room and transcribes the conversation",
|
|
60
89
|
input_schema={
|
|
61
|
-
"type"
|
|
62
|
-
"additionalProperties"
|
|
63
|
-
"required"
|
|
64
|
-
"properties"
|
|
65
|
-
"room_name" :
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
"path" : {
|
|
69
|
-
"type" : "string"
|
|
70
|
-
}
|
|
71
|
-
}
|
|
90
|
+
"type": "object",
|
|
91
|
+
"additionalProperties": False,
|
|
92
|
+
"required": ["room_name", "path"],
|
|
93
|
+
"properties": {
|
|
94
|
+
"room_name": {"type": "string"},
|
|
95
|
+
"path": {"type": "string"},
|
|
96
|
+
},
|
|
72
97
|
},
|
|
73
98
|
output_schema={
|
|
74
|
-
"type"
|
|
75
|
-
"additionalProperties"
|
|
76
|
-
"required"
|
|
77
|
-
"properties"
|
|
78
|
-
|
|
79
|
-
}
|
|
99
|
+
"type": "object",
|
|
100
|
+
"additionalProperties": False,
|
|
101
|
+
"required": [],
|
|
102
|
+
"properties": {},
|
|
103
|
+
},
|
|
80
104
|
)
|
|
81
105
|
self._livekit_url = livekit_url
|
|
82
106
|
self._livekit_api_key = livekit_api_key
|
|
83
107
|
self._livekit_api_secret = livekit_api_secret
|
|
84
108
|
self._livekit_identity = livekit_identity
|
|
85
109
|
|
|
86
|
-
async def _transcribe_participant(
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
110
|
+
async def _transcribe_participant(
|
|
111
|
+
self,
|
|
112
|
+
doc: RuntimeDocument,
|
|
113
|
+
room: rtc.Room,
|
|
114
|
+
participant: rtc.RemoteParticipant,
|
|
115
|
+
stt_stream: stt.SpeechStream,
|
|
116
|
+
stt_forwarder: transcription.STTSegmentsForwarder,
|
|
117
|
+
):
|
|
118
|
+
logger.info("transcribing participant %s", participant.sid)
|
|
119
|
+
"""Forward the transcription to the client and log the transcript in the console"""
|
|
120
|
+
async for ev in stt_stream:
|
|
121
|
+
logger.info("event from participant %s %s", participant.sid, ev)
|
|
122
|
+
|
|
123
|
+
if ev.type == stt.SpeechEventType.FINAL_TRANSCRIPT:
|
|
124
|
+
logger.info("transcript: %s", ev.alternatives[0].text)
|
|
125
|
+
if len(ev.alternatives) > 0:
|
|
126
|
+
alt = ev.alternatives[0]
|
|
127
|
+
doc.root.append_child(
|
|
128
|
+
tag_name="speech",
|
|
129
|
+
attributes={
|
|
130
|
+
"text": alt.text,
|
|
131
|
+
"startTime": alt.start_time,
|
|
132
|
+
"endTime": alt.end_time,
|
|
133
|
+
"participantId": participant.identity,
|
|
134
|
+
"participantName": participant.name,
|
|
135
|
+
},
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
logger.info("done forwarding %s", participant.sid)
|
|
101
139
|
|
|
102
140
|
def should_transcribe(self, p: rtc.Participant) -> bool:
|
|
103
141
|
# don't transcribe other agents
|
|
104
142
|
# todo: maybe have a better way to detect
|
|
105
143
|
return ".agent" not in p.identity
|
|
106
|
-
|
|
144
|
+
|
|
107
145
|
async def _wait_for_disconnect(self, room: rtc.Room):
|
|
108
146
|
disconnected = asyncio.Future()
|
|
147
|
+
|
|
109
148
|
def on_disconnected(_):
|
|
110
149
|
disconnected.set_result(True)
|
|
150
|
+
|
|
111
151
|
room.on("disconnected", on_disconnected)
|
|
112
152
|
|
|
113
153
|
logger.info("waiting for disconnection")
|
|
114
|
-
await disconnected
|
|
115
|
-
|
|
154
|
+
await disconnected
|
|
116
155
|
|
|
117
156
|
async def ask(self, *, context: AgentCallContext, arguments: dict):
|
|
118
157
|
logger.info("Transcriber connecting to %s", arguments)
|
|
119
158
|
output_path = arguments["path"]
|
|
120
159
|
room_name = arguments["room_name"]
|
|
121
160
|
|
|
122
|
-
client = context.room
|
|
161
|
+
client = context.room
|
|
123
162
|
doc = await client.sync.open(path=output_path)
|
|
124
163
|
try:
|
|
164
|
+
vad = silero.VAD.load()
|
|
165
|
+
utils.http_context._new_session_ctx()
|
|
125
166
|
|
|
126
|
-
vad = silero.VAD.load()
|
|
127
|
-
utils.http_context._new_session_ctx()
|
|
128
|
-
|
|
129
167
|
pending_tasks = list()
|
|
130
|
-
participantNames = dict[str,str]()
|
|
168
|
+
participantNames = dict[str, str]()
|
|
131
169
|
|
|
132
170
|
sst_provider = openai.STT()
|
|
133
|
-
#sst_provider = fal.WizperSTT()
|
|
171
|
+
# sst_provider = fal.WizperSTT()
|
|
134
172
|
|
|
135
173
|
room_options = rtc.RoomOptions(auto_subscribe=False)
|
|
136
174
|
|
|
137
175
|
room = rtc.Room()
|
|
138
176
|
|
|
139
|
-
url =
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
.
|
|
146
|
-
.
|
|
147
|
-
.
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
177
|
+
url = (
|
|
178
|
+
self._livekit_url
|
|
179
|
+
if self._livekit_url is not None
|
|
180
|
+
else os.getenv("LIVEKIT_URL")
|
|
181
|
+
)
|
|
182
|
+
api_key = (
|
|
183
|
+
self._livekit_api_key
|
|
184
|
+
if self._livekit_api_key is not None
|
|
185
|
+
else os.getenv("LIVEKIT_API_KEY")
|
|
186
|
+
)
|
|
187
|
+
api_secret = (
|
|
188
|
+
self._livekit_api_secret
|
|
189
|
+
if self._livekit_api_secret is not None
|
|
190
|
+
else os.getenv("LIVEKIT_API_SECRET")
|
|
191
|
+
)
|
|
192
|
+
identity = (
|
|
193
|
+
self._livekit_identity
|
|
194
|
+
if self._livekit_identity is not None
|
|
195
|
+
else os.getenv("AGENT_IDENTITY")
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
token = (
|
|
199
|
+
api.AccessToken(api_key=api_key, api_secret=api_secret)
|
|
200
|
+
.with_identity(identity)
|
|
201
|
+
.with_name("Agent")
|
|
202
|
+
.with_kind("agent")
|
|
203
|
+
.with_grants(
|
|
204
|
+
api.VideoGrants(
|
|
205
|
+
can_update_own_metadata=True,
|
|
206
|
+
room_join=True,
|
|
207
|
+
room=room_name,
|
|
208
|
+
agent=True,
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
)
|
|
212
|
+
|
|
155
213
|
jwt = token.to_jwt()
|
|
156
|
-
|
|
214
|
+
|
|
157
215
|
await room.connect(url=url, token=jwt, options=room_options)
|
|
158
|
-
|
|
159
216
|
|
|
160
|
-
logger.info(
|
|
161
|
-
|
|
162
|
-
room_name
|
|
163
|
-
)
|
|
164
|
-
|
|
217
|
+
logger.info("connected to room: %s", room_name)
|
|
218
|
+
|
|
165
219
|
audio_streams = list[rtc.AudioStream]()
|
|
166
220
|
|
|
167
|
-
async def transcribe_track(
|
|
221
|
+
async def transcribe_track(
|
|
222
|
+
participant: rtc.RemoteParticipant, track: rtc.Track
|
|
223
|
+
):
|
|
168
224
|
audio_stream = rtc.AudioStream(track)
|
|
169
225
|
stt_forwarder = transcription.STTSegmentsForwarder(
|
|
170
226
|
room=room, participant=participant, track=track
|
|
171
227
|
)
|
|
172
228
|
|
|
173
229
|
audio_streams.append(audio_stream)
|
|
174
|
-
|
|
230
|
+
|
|
175
231
|
stt = sst_provider
|
|
176
|
-
if not sst_provider.capabilities.streaming:
|
|
232
|
+
if not sst_provider.capabilities.streaming:
|
|
177
233
|
stt = speech_to_text.StreamAdapter(
|
|
178
234
|
stt=stt,
|
|
179
235
|
vad=vad,
|
|
180
236
|
)
|
|
181
|
-
|
|
237
|
+
|
|
182
238
|
stt_stream = stt.stream()
|
|
183
239
|
|
|
184
|
-
pending_tasks.append(
|
|
240
|
+
pending_tasks.append(
|
|
241
|
+
asyncio.create_task(
|
|
242
|
+
self._transcribe_participant(
|
|
243
|
+
doc, room, participant, stt_stream, stt_forwarder
|
|
244
|
+
)
|
|
245
|
+
)
|
|
246
|
+
)
|
|
185
247
|
|
|
186
248
|
async for ev in audio_stream:
|
|
187
249
|
stt_stream.push_frame(ev.frame)
|
|
@@ -189,49 +251,67 @@ class Transcriber(TaskRunner):
|
|
|
189
251
|
def subscribe_if_needed(pub: rtc.RemoteTrackPublication):
|
|
190
252
|
if pub.kind == rtc.TrackKind.KIND_AUDIO:
|
|
191
253
|
pub.set_subscribed(True)
|
|
192
|
-
|
|
254
|
+
|
|
193
255
|
for p in room.remote_participants.values():
|
|
194
256
|
participantNames[p.identity] = p.name
|
|
195
257
|
if self.should_transcribe(p):
|
|
196
258
|
for pub in p.track_publications.values():
|
|
197
259
|
subscribe_if_needed(pub)
|
|
198
|
-
|
|
260
|
+
|
|
199
261
|
first_parts = dict[str, rtc.Participant]()
|
|
200
|
-
|
|
201
|
-
def on_transcript_event(
|
|
262
|
+
|
|
263
|
+
def on_transcript_event(
|
|
264
|
+
segments: list[TranscriptionSegment],
|
|
265
|
+
part: rtc.Participant | None,
|
|
266
|
+
pub: rtc.TrackPublication | None = None,
|
|
267
|
+
) -> None:
|
|
202
268
|
nonlocal room
|
|
203
269
|
logger.info("Got transcription segment %s %s %s", segments, part, pub)
|
|
204
270
|
for segment in segments:
|
|
205
|
-
if segment.id not in first_parts and part
|
|
271
|
+
if segment.id not in first_parts and part is not None:
|
|
206
272
|
first_parts[segment.id] = part
|
|
207
273
|
|
|
208
274
|
if segment.final:
|
|
209
|
-
if part
|
|
275
|
+
if part is None and segment.id in first_parts:
|
|
210
276
|
part = first_parts[segment.id]
|
|
211
277
|
first_parts.pop(segment.id)
|
|
212
|
-
|
|
213
|
-
if part
|
|
214
|
-
doc.root.append_child(
|
|
278
|
+
|
|
279
|
+
if part is not None:
|
|
280
|
+
doc.root.append_child(
|
|
281
|
+
tag_name="speech",
|
|
282
|
+
attributes={
|
|
283
|
+
"text": segment.text,
|
|
284
|
+
"startTime": segment.start_time,
|
|
285
|
+
"endTime": segment.end_time,
|
|
286
|
+
"participantId": part.identity,
|
|
287
|
+
"participantName": part.name,
|
|
288
|
+
},
|
|
289
|
+
)
|
|
215
290
|
else:
|
|
216
|
-
logger.warning(
|
|
291
|
+
logger.warning(
|
|
292
|
+
"transcription was missing participant information"
|
|
293
|
+
)
|
|
217
294
|
|
|
218
295
|
def on_participant_connected(p: rtc.RemoteParticipant):
|
|
219
296
|
participantNames[p.identity] = p.name
|
|
220
297
|
|
|
221
|
-
def on_track_published(
|
|
298
|
+
def on_track_published(
|
|
299
|
+
pub: rtc.RemoteTrackPublication, p: rtc.RemoteParticipant
|
|
300
|
+
):
|
|
222
301
|
if self.should_transcribe(p):
|
|
223
302
|
subscribe_if_needed(pub)
|
|
224
303
|
|
|
225
304
|
subscriptions = dict()
|
|
226
|
-
|
|
227
|
-
def on_track_unpublished(
|
|
305
|
+
|
|
306
|
+
def on_track_unpublished(
|
|
307
|
+
pub: rtc.RemoteTrackPublication, p: rtc.RemoteParticipant
|
|
308
|
+
):
|
|
228
309
|
if pub in subscriptions:
|
|
229
310
|
logger.info("track unpublished, stopping transcription")
|
|
230
311
|
# todo: maybe could be more graceful
|
|
231
312
|
subscriptions[pub].cancel()
|
|
232
313
|
subscriptions.pop(pub)
|
|
233
|
-
|
|
234
|
-
|
|
314
|
+
|
|
235
315
|
def on_track_subscribed(
|
|
236
316
|
track: rtc.Track,
|
|
237
317
|
publication: rtc.TrackPublication,
|
|
@@ -239,22 +319,23 @@ class Transcriber(TaskRunner):
|
|
|
239
319
|
):
|
|
240
320
|
if track.kind == rtc.TrackKind.KIND_AUDIO:
|
|
241
321
|
logger.info("transcribing track %s", track.sid)
|
|
242
|
-
track_task = asyncio.create_task(
|
|
322
|
+
track_task = asyncio.create_task(
|
|
323
|
+
transcribe_track(participant, track)
|
|
324
|
+
)
|
|
325
|
+
|
|
243
326
|
def on_transcription_done(t):
|
|
244
327
|
try:
|
|
245
328
|
t.result()
|
|
246
329
|
except Exception as e:
|
|
247
330
|
logger.error("Transcription failed", exc_info=e)
|
|
248
331
|
|
|
249
|
-
|
|
250
332
|
track_task.add_done_callback(on_transcription_done)
|
|
251
333
|
pending_tasks.append(track_task)
|
|
252
334
|
subscriptions[publication] = track_task
|
|
253
|
-
|
|
254
335
|
|
|
255
336
|
for p in room.remote_participants.values():
|
|
256
337
|
on_participant_connected(p)
|
|
257
|
-
|
|
338
|
+
|
|
258
339
|
room.on("participant_connected", on_participant_connected)
|
|
259
340
|
|
|
260
341
|
room.on("track_published", on_track_published)
|
|
@@ -263,16 +344,15 @@ class Transcriber(TaskRunner):
|
|
|
263
344
|
room.on("transcription_received", on_transcript_event)
|
|
264
345
|
|
|
265
346
|
await self._wait_for_disconnect(room)
|
|
266
|
-
|
|
347
|
+
|
|
267
348
|
logger.info("waited for termination")
|
|
268
349
|
await room.disconnect()
|
|
269
|
-
|
|
350
|
+
|
|
270
351
|
logger.info("closing audio streams")
|
|
271
|
-
|
|
352
|
+
|
|
272
353
|
for stream in audio_streams:
|
|
273
354
|
await stream.aclose()
|
|
274
355
|
|
|
275
|
-
|
|
276
356
|
logger.info("waiting for pending tasks")
|
|
277
357
|
gather_future = asyncio.gather(*pending_tasks)
|
|
278
358
|
|
|
@@ -280,7 +360,7 @@ class Transcriber(TaskRunner):
|
|
|
280
360
|
try:
|
|
281
361
|
await gather_future
|
|
282
362
|
except Exception as e:
|
|
283
|
-
if isinstance(e, asyncio.CancelledError)
|
|
363
|
+
if not isinstance(e, asyncio.CancelledError):
|
|
284
364
|
logger.warning("Did not shut down cleanly", exc_info=e)
|
|
285
365
|
pass
|
|
286
366
|
|
|
@@ -288,11 +368,10 @@ class Transcriber(TaskRunner):
|
|
|
288
368
|
except Exception as e:
|
|
289
369
|
logger.info("Transcription failed", exc_info=e)
|
|
290
370
|
finally:
|
|
291
|
-
|
|
292
371
|
await utils.http_context._close_http_ctx()
|
|
293
372
|
logger.info("Transcription done")
|
|
294
373
|
|
|
295
374
|
await asyncio.sleep(5)
|
|
296
375
|
await client.sync.close(path=output_path)
|
|
297
|
-
|
|
376
|
+
|
|
298
377
|
return {}
|
|
@@ -2,7 +2,15 @@ import logging
|
|
|
2
2
|
import asyncio
|
|
3
3
|
from asyncio import CancelledError
|
|
4
4
|
|
|
5
|
-
from meshagent.api import
|
|
5
|
+
from meshagent.api import (
|
|
6
|
+
RoomMessage,
|
|
7
|
+
ErrorResponse,
|
|
8
|
+
Requirement,
|
|
9
|
+
Participant,
|
|
10
|
+
JsonResponse,
|
|
11
|
+
EmptyResponse,
|
|
12
|
+
TextResponse,
|
|
13
|
+
)
|
|
6
14
|
from meshagent.api.room_server_client import RoomClient
|
|
7
15
|
|
|
8
16
|
from meshagent.agents import ToolResponseAdapter
|
|
@@ -14,34 +22,23 @@ from openai import AsyncOpenAI
|
|
|
14
22
|
from meshagent.agents import AgentChatContext
|
|
15
23
|
from livekit.agents import BackgroundAudioPlayer, AudioConfig, BuiltinAudioClip
|
|
16
24
|
|
|
17
|
-
from typing import Annotated
|
|
18
25
|
from livekit.plugins import openai, silero
|
|
19
|
-
#from livekit.plugins.turn_detector.multilingual import MultilingualModel
|
|
20
|
-
import uuid
|
|
21
|
-
import asyncio
|
|
22
|
-
import logging
|
|
26
|
+
# from livekit.plugins.turn_detector.multilingual import MultilingualModel
|
|
23
27
|
|
|
24
|
-
import os
|
|
25
28
|
|
|
26
|
-
import json
|
|
29
|
+
import json
|
|
27
30
|
|
|
28
31
|
from typing import Any
|
|
29
32
|
|
|
30
|
-
from livekit.plugins import openai
|
|
31
33
|
|
|
32
|
-
from livekit.plugins import openai, silero
|
|
33
34
|
from livekit import rtc
|
|
34
|
-
from livekit.agents import
|
|
35
|
+
from livekit.agents import RunContext
|
|
35
36
|
|
|
36
37
|
from typing import Optional
|
|
37
38
|
|
|
38
|
-
from copy import deepcopy
|
|
39
|
-
|
|
40
|
-
from meshagent.api.schema_util import merge, prompt_schema
|
|
41
39
|
|
|
42
40
|
from meshagent.agents import SingleRoomAgent
|
|
43
41
|
|
|
44
|
-
from livekit.plugins.turn_detector.multilingual import MultilingualModel
|
|
45
42
|
|
|
46
43
|
import re
|
|
47
44
|
|
|
@@ -52,7 +49,7 @@ def _replace_non_matching(text: str, allowed_chars: str, replacement: str) -> st
|
|
|
52
49
|
"""
|
|
53
50
|
Replaces every character in `text` that does not match the given
|
|
54
51
|
`allowed_chars` regex set with `replacement`.
|
|
55
|
-
|
|
52
|
+
|
|
56
53
|
Parameters:
|
|
57
54
|
-----------
|
|
58
55
|
text : str
|
|
@@ -62,7 +59,7 @@ def _replace_non_matching(text: str, allowed_chars: str, replacement: str) -> st
|
|
|
62
59
|
For example, "a-zA-Z0-9" will keep only letters and digits.
|
|
63
60
|
replacement : str
|
|
64
61
|
The string to replace non-matching characters with.
|
|
65
|
-
|
|
62
|
+
|
|
66
63
|
Returns:
|
|
67
64
|
--------
|
|
68
65
|
str
|
|
@@ -72,35 +69,40 @@ def _replace_non_matching(text: str, allowed_chars: str, replacement: str) -> st
|
|
|
72
69
|
pattern = rf"[^{allowed_chars}]"
|
|
73
70
|
return re.sub(pattern, replacement, text)
|
|
74
71
|
|
|
72
|
+
|
|
75
73
|
def safe_tool_name(name: str):
|
|
76
74
|
return _replace_non_matching(name, "a-zA-Z0-9_-", "_")
|
|
77
75
|
|
|
76
|
+
|
|
78
77
|
class VoiceConnection:
|
|
79
78
|
def __init__(self, *, room: RoomClient, breakout_room: str):
|
|
80
79
|
self.room = room
|
|
81
80
|
self.breakout_room = breakout_room
|
|
82
|
-
|
|
83
|
-
async def __aenter__(self):
|
|
84
81
|
|
|
82
|
+
async def __aenter__(self):
|
|
85
83
|
client = self.room
|
|
86
|
-
|
|
84
|
+
|
|
87
85
|
room_options = rtc.RoomOptions(auto_subscribe=True)
|
|
88
86
|
|
|
89
87
|
room = rtc.Room()
|
|
90
88
|
|
|
91
89
|
self.livekit_room = room
|
|
92
90
|
|
|
93
|
-
connection_info = await client.livekit.get_connection_info(
|
|
94
|
-
|
|
95
|
-
|
|
91
|
+
connection_info = await client.livekit.get_connection_info(
|
|
92
|
+
breakout_room=self.breakout_room
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
await room.connect(
|
|
96
|
+
url=connection_info.url, token=connection_info.token, options=room_options
|
|
97
|
+
)
|
|
96
98
|
|
|
97
99
|
return self
|
|
98
|
-
|
|
100
|
+
|
|
99
101
|
async def __aexit__(self, exc_type, exc, tb):
|
|
100
102
|
await self.livekit_room.disconnect()
|
|
101
103
|
|
|
102
|
-
class VoiceBot(SingleRoomAgent):
|
|
103
104
|
|
|
105
|
+
class VoiceBot(SingleRoomAgent):
|
|
104
106
|
def __init__(
|
|
105
107
|
self,
|
|
106
108
|
name: str,
|
|
@@ -112,20 +114,20 @@ class VoiceBot(SingleRoomAgent):
|
|
|
112
114
|
auto_greet_prompt: Optional[str] = None,
|
|
113
115
|
tool_adapter: ToolResponseAdapter = None,
|
|
114
116
|
toolkits: list[Toolkit] = None,
|
|
115
|
-
requires: list[Requirement] = None
|
|
117
|
+
requires: list[Requirement] = None,
|
|
116
118
|
):
|
|
117
|
-
if toolkits
|
|
119
|
+
if toolkits is None:
|
|
118
120
|
toolkits = []
|
|
119
121
|
|
|
120
122
|
self.toolkits = toolkits
|
|
121
|
-
|
|
122
|
-
if rules
|
|
123
|
-
rules = [
|
|
124
|
-
|
|
123
|
+
|
|
124
|
+
if rules is None:
|
|
125
|
+
rules = ["You are a helpful assistant communicating through voice."]
|
|
126
|
+
|
|
125
127
|
self.tool_adapter = tool_adapter
|
|
126
128
|
self.auto_greet_message = auto_greet_message
|
|
127
129
|
self.auto_greet_prompt = auto_greet_prompt
|
|
128
|
-
|
|
130
|
+
|
|
129
131
|
self.rules = rules
|
|
130
132
|
|
|
131
133
|
super().__init__(
|
|
@@ -133,7 +135,7 @@ class VoiceBot(SingleRoomAgent):
|
|
|
133
135
|
description=description,
|
|
134
136
|
title=title,
|
|
135
137
|
labels=labels,
|
|
136
|
-
requires=requires
|
|
138
|
+
requires=requires,
|
|
137
139
|
)
|
|
138
140
|
|
|
139
141
|
async def start(self, *, room):
|
|
@@ -141,66 +143,75 @@ class VoiceBot(SingleRoomAgent):
|
|
|
141
143
|
await room.local_participant.set_attribute("supports_voice", True)
|
|
142
144
|
await room.messaging.enable()
|
|
143
145
|
room.messaging.on("message", self.on_message)
|
|
144
|
-
|
|
146
|
+
|
|
145
147
|
def on_message(self, message: RoomMessage):
|
|
146
148
|
if message.type == "voice_call":
|
|
147
149
|
breakout_room = message.message["breakout_room"]
|
|
148
|
-
|
|
150
|
+
|
|
149
151
|
logger.info(f"joining breakout room {breakout_room}")
|
|
150
|
-
|
|
152
|
+
|
|
151
153
|
def on_done(task: asyncio.Task):
|
|
152
154
|
try:
|
|
153
155
|
task.result()
|
|
154
|
-
except CancelledError
|
|
155
|
-
|
|
156
|
+
except CancelledError:
|
|
157
|
+
pass
|
|
156
158
|
except Exception as e:
|
|
157
159
|
logger.error(f"{e}", exc_info=e)
|
|
158
160
|
|
|
159
161
|
for participant in self.room.messaging.remote_participants:
|
|
160
|
-
|
|
161
162
|
if participant.id == message.from_participant_id:
|
|
162
|
-
|
|
163
|
-
|
|
163
|
+
task = asyncio.create_task(
|
|
164
|
+
self.run_voice_agent(
|
|
165
|
+
participant=participant, breakout_room=breakout_room
|
|
166
|
+
)
|
|
167
|
+
)
|
|
164
168
|
task.add_done_callback(on_done)
|
|
165
169
|
return
|
|
166
|
-
|
|
170
|
+
|
|
167
171
|
logger.error(f"unable to find participant {message.from_participant_id}")
|
|
168
|
-
|
|
169
|
-
|
|
172
|
+
|
|
170
173
|
async def _wait_for_disconnect(self, room: rtc.Room):
|
|
171
174
|
disconnected = asyncio.Future()
|
|
175
|
+
|
|
172
176
|
def on_disconnected(_):
|
|
173
177
|
disconnected.set_result(True)
|
|
178
|
+
|
|
174
179
|
room.on("disconnected", on_disconnected)
|
|
175
180
|
|
|
176
181
|
logger.info("waiting for disconnection")
|
|
177
182
|
await disconnected
|
|
178
183
|
|
|
179
184
|
async def make_function_tools(self, *, context: ToolContext):
|
|
180
|
-
|
|
181
|
-
toolkits = [
|
|
182
|
-
*await self.get_required_toolkits(context=context),
|
|
183
|
-
*self.toolkits
|
|
184
|
-
]
|
|
185
|
+
toolkits = [*await self.get_required_toolkits(context=context), *self.toolkits]
|
|
185
186
|
|
|
186
187
|
tools = []
|
|
187
188
|
|
|
188
189
|
for toolkit in toolkits:
|
|
189
|
-
|
|
190
190
|
for tool in toolkit.tools:
|
|
191
|
-
|
|
192
|
-
|
|
191
|
+
tools.append(
|
|
192
|
+
self._make_function_tool(
|
|
193
|
+
toolkits,
|
|
194
|
+
context,
|
|
195
|
+
tool.name,
|
|
196
|
+
tool.description,
|
|
197
|
+
tool.input_schema,
|
|
198
|
+
)
|
|
199
|
+
)
|
|
193
200
|
|
|
194
201
|
return tools
|
|
195
202
|
|
|
196
203
|
def _make_function_tool(
|
|
197
|
-
self,
|
|
204
|
+
self,
|
|
205
|
+
toolkits: list[Toolkit],
|
|
206
|
+
context: ToolContext,
|
|
207
|
+
name: str,
|
|
208
|
+
description: str | None,
|
|
209
|
+
input_schema: dict,
|
|
198
210
|
) -> RawFunctionTool:
|
|
199
|
-
|
|
200
211
|
name = safe_tool_name(name)
|
|
212
|
+
|
|
201
213
|
async def _tool_called(raw_arguments: dict) -> Any:
|
|
202
214
|
try:
|
|
203
|
-
|
|
204
215
|
tool = None
|
|
205
216
|
for toolkit in toolkits:
|
|
206
217
|
for t in toolkit.tools:
|
|
@@ -208,9 +219,7 @@ class VoiceBot(SingleRoomAgent):
|
|
|
208
219
|
tool = t
|
|
209
220
|
|
|
210
221
|
if tool is None:
|
|
211
|
-
raise ToolError(
|
|
212
|
-
f"Could not find tool {name}"
|
|
213
|
-
)
|
|
222
|
+
raise ToolError(f"Could not find tool {name}")
|
|
214
223
|
|
|
215
224
|
try:
|
|
216
225
|
logger.info(f"executing tool {name}: {raw_arguments}")
|
|
@@ -218,32 +227,31 @@ class VoiceBot(SingleRoomAgent):
|
|
|
218
227
|
except Exception as e:
|
|
219
228
|
logger.error(f"failed to call tool {tool.name}: {e}")
|
|
220
229
|
return ToolError("f{e}")
|
|
221
|
-
if self.tool_adapter
|
|
222
|
-
|
|
230
|
+
if self.tool_adapter is None:
|
|
223
231
|
if isinstance(tool_result, ErrorResponse):
|
|
224
232
|
raise ToolError(tool_result.text)
|
|
225
233
|
|
|
226
234
|
if isinstance(tool_result, JsonResponse):
|
|
227
235
|
return json.dumps(tool_result.json)
|
|
228
|
-
|
|
236
|
+
|
|
229
237
|
if isinstance(tool_result, TextResponse):
|
|
230
238
|
return tool_result.text
|
|
231
|
-
|
|
239
|
+
|
|
232
240
|
if isinstance(tool_result, EmptyResponse):
|
|
233
241
|
return "success"
|
|
234
|
-
|
|
235
|
-
if tool_result
|
|
242
|
+
|
|
243
|
+
if tool_result is None:
|
|
236
244
|
return "success"
|
|
237
|
-
|
|
238
245
|
|
|
239
246
|
raise ToolError(
|
|
240
247
|
f"Tool '{name}' returned an unexpected result {type(tool_result)}, attach a tool response adapter"
|
|
241
248
|
)
|
|
242
249
|
|
|
243
250
|
else:
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
251
|
+
text = await self.tool_adapter.to_plain_text(
|
|
252
|
+
room=context.room, response=tool_result
|
|
253
|
+
)
|
|
254
|
+
if text is None:
|
|
247
255
|
text = "success"
|
|
248
256
|
return text
|
|
249
257
|
|
|
@@ -251,14 +259,17 @@ class VoiceBot(SingleRoomAgent):
|
|
|
251
259
|
logger.error("unable to call tool", exc_info=e)
|
|
252
260
|
raise
|
|
253
261
|
|
|
254
|
-
|
|
255
262
|
return function_tool(
|
|
256
263
|
_tool_called,
|
|
257
|
-
raw_schema={
|
|
264
|
+
raw_schema={
|
|
265
|
+
"name": name,
|
|
266
|
+
"description": description,
|
|
267
|
+
"strict": True,
|
|
268
|
+
"parameters": input_schema,
|
|
269
|
+
},
|
|
258
270
|
)
|
|
259
271
|
|
|
260
272
|
async def create_agent(self, *, context: ToolContext, session: AgentSession):
|
|
261
|
-
|
|
262
273
|
@function_tool
|
|
263
274
|
async def say(context: RunContext, text: str):
|
|
264
275
|
"says something out loud to the user"
|
|
@@ -266,8 +277,8 @@ class VoiceBot(SingleRoomAgent):
|
|
|
266
277
|
session.say(text)
|
|
267
278
|
return "success"
|
|
268
279
|
|
|
269
|
-
ctx=ChatContext()
|
|
270
|
-
|
|
280
|
+
ctx = ChatContext()
|
|
281
|
+
|
|
271
282
|
initial_context = await self.init_chat_context()
|
|
272
283
|
for message in initial_context.messages:
|
|
273
284
|
ctx.add_message(role=message["role"], content=message["content"])
|
|
@@ -276,10 +287,7 @@ class VoiceBot(SingleRoomAgent):
|
|
|
276
287
|
chat_ctx=ctx,
|
|
277
288
|
instructions="\n".join(self.rules),
|
|
278
289
|
allow_interruptions=True,
|
|
279
|
-
tools=[
|
|
280
|
-
*await self.make_function_tools(context=context),
|
|
281
|
-
say
|
|
282
|
-
]
|
|
290
|
+
tools=[*await self.make_function_tools(context=context), say],
|
|
283
291
|
)
|
|
284
292
|
|
|
285
293
|
# agent = Agent(
|
|
@@ -291,88 +299,77 @@ class VoiceBot(SingleRoomAgent):
|
|
|
291
299
|
# tts=openai.TTS(),
|
|
292
300
|
# vad=silero.VAD.load(),
|
|
293
301
|
# allow_interruptions=True
|
|
294
|
-
#)
|
|
302
|
+
# )
|
|
295
303
|
|
|
296
304
|
async def init_chat_context(self) -> AgentChatContext:
|
|
297
|
-
return AgentChatContext()
|
|
298
|
-
|
|
305
|
+
return AgentChatContext()
|
|
306
|
+
|
|
299
307
|
def create_session(self, *, context: ToolContext) -> AgentSession:
|
|
308
|
+
token: str = context.room.protocol.token
|
|
309
|
+
url: str = context.room.room_url
|
|
300
310
|
|
|
301
|
-
token : str = context.room.protocol.token
|
|
302
|
-
url : str = context.room.room_url
|
|
303
|
-
|
|
304
311
|
room_proxy_url = f"{url}/v1"
|
|
305
|
-
|
|
312
|
+
|
|
306
313
|
oaiclient = AsyncOpenAI(
|
|
307
314
|
api_key=token,
|
|
308
315
|
base_url=room_proxy_url,
|
|
309
|
-
default_headers={
|
|
310
|
-
"Meshagent-Session" : context.room.session_id
|
|
311
|
-
}
|
|
316
|
+
default_headers={"Meshagent-Session": context.room.session_id},
|
|
312
317
|
)
|
|
313
318
|
|
|
314
319
|
session = AgentSession(
|
|
315
320
|
max_tool_steps=50,
|
|
316
321
|
allow_interruptions=True,
|
|
317
322
|
vad=silero.VAD.load(),
|
|
318
|
-
stt=openai.STT(
|
|
319
|
-
|
|
320
|
-
),
|
|
321
|
-
|
|
322
|
-
client=oaiclient,
|
|
323
|
-
voice="echo"
|
|
324
|
-
),
|
|
325
|
-
llm=openai.LLM(
|
|
326
|
-
client=oaiclient
|
|
327
|
-
),
|
|
328
|
-
#turn_detection=MultilingualModel(),
|
|
329
|
-
|
|
323
|
+
stt=openai.STT(client=oaiclient),
|
|
324
|
+
tts=openai.TTS(client=oaiclient, voice="echo"),
|
|
325
|
+
llm=openai.LLM(client=oaiclient),
|
|
326
|
+
# turn_detection=MultilingualModel(),
|
|
330
327
|
)
|
|
331
328
|
return session
|
|
332
|
-
|
|
333
|
-
|
|
329
|
+
|
|
334
330
|
async def run_voice_agent(self, *, participant: Participant, breakout_room: str):
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
331
|
+
async with VoiceConnection(
|
|
332
|
+
room=self.room, breakout_room=breakout_room
|
|
333
|
+
) as connection:
|
|
338
334
|
logger.info("starting voice agent")
|
|
339
335
|
|
|
340
336
|
context = ToolContext(
|
|
341
337
|
room=self.room,
|
|
342
338
|
caller=self.room.local_participant,
|
|
343
|
-
on_behalf_of=participant
|
|
339
|
+
on_behalf_of=participant,
|
|
344
340
|
)
|
|
345
341
|
|
|
346
342
|
session = self.create_session(context=context)
|
|
347
343
|
|
|
348
344
|
agent = await self.create_agent(context=context, session=session)
|
|
349
|
-
|
|
345
|
+
|
|
350
346
|
background_audio = BackgroundAudioPlayer(
|
|
351
347
|
thinking_sound=[
|
|
352
|
-
#AudioConfig(
|
|
348
|
+
# AudioConfig(
|
|
353
349
|
# os.path.dirname(os.path.abspath(__file__)) +"/sfx/thinking.mp3", volume=0.2),
|
|
354
350
|
AudioConfig(BuiltinAudioClip.KEYBOARD_TYPING, volume=0.3),
|
|
355
351
|
AudioConfig(BuiltinAudioClip.KEYBOARD_TYPING2, volume=0.4),
|
|
356
352
|
],
|
|
357
353
|
)
|
|
358
|
-
await background_audio.start(
|
|
354
|
+
await background_audio.start(
|
|
355
|
+
room=connection.livekit_room, agent_session=session
|
|
356
|
+
)
|
|
359
357
|
|
|
360
358
|
await session.start(agent=agent, room=connection.livekit_room)
|
|
361
359
|
|
|
362
|
-
if self.auto_greet_prompt
|
|
360
|
+
if self.auto_greet_prompt is not None:
|
|
363
361
|
session.generate_reply(user_input=self.auto_greet_prompt)
|
|
364
362
|
|
|
365
|
-
if self.auto_greet_message
|
|
363
|
+
if self.auto_greet_message is not None:
|
|
366
364
|
session.say(self.auto_greet_message)
|
|
367
|
-
|
|
365
|
+
|
|
368
366
|
logger.info("started voice agent")
|
|
369
367
|
await self._wait_for_disconnect(room=connection.livekit_room)
|
|
370
|
-
|
|
371
|
-
|
|
372
368
|
|
|
373
|
-
|
|
374
369
|
|
|
375
370
|
class Voicebot(VoiceBot):
|
|
376
371
|
def __init__(self, **kwargs):
|
|
377
|
-
logger.warning(
|
|
372
|
+
logger.warning(
|
|
373
|
+
"Voicebot is deprecated, use VoiceBot instead. This class will be removed in a future release."
|
|
374
|
+
)
|
|
378
375
|
super().__init__(**kwargs)
|
|
@@ -14,35 +14,37 @@ class LivekitProtocol(Protocol):
|
|
|
14
14
|
self.remote = remote
|
|
15
15
|
self.topic = topic
|
|
16
16
|
|
|
17
|
-
|
|
18
17
|
async def __aenter__(self):
|
|
19
18
|
self.room.on("data_received", self._on_data_packet)
|
|
20
19
|
return await super().__aenter__()
|
|
21
|
-
|
|
20
|
+
|
|
22
21
|
async def __aexit__(self, exc_type, exc, tb):
|
|
23
22
|
self.room.off("data_received", self._on_data_packet)
|
|
24
|
-
|
|
25
|
-
return await super().__aexit__(exc_type, exc, tb)
|
|
26
23
|
|
|
27
|
-
|
|
24
|
+
return await super().__aexit__(exc_type, exc, tb)
|
|
28
25
|
|
|
29
|
-
|
|
26
|
+
async def send_packet(self, data: bytes) -> None:
|
|
27
|
+
logger.info(
|
|
28
|
+
"sending data packet %s %s to %s",
|
|
29
|
+
self.topic,
|
|
30
|
+
self.remote.identity,
|
|
31
|
+
self.room.remote_participants[self.remote.identity].sid,
|
|
32
|
+
)
|
|
30
33
|
|
|
31
34
|
await self.local.publish_data(
|
|
32
35
|
payload=data,
|
|
33
|
-
topic
|
|
34
|
-
reliable
|
|
35
|
-
destination_identities
|
|
36
|
+
topic=self.topic,
|
|
37
|
+
reliable=True,
|
|
38
|
+
destination_identities=[self.remote.identity],
|
|
36
39
|
)
|
|
37
40
|
|
|
38
41
|
def _on_data_packet(self, evt: rtc.DataPacket):
|
|
39
|
-
|
|
40
42
|
if self.remote != evt.participant:
|
|
41
43
|
return
|
|
42
44
|
|
|
43
|
-
logger.info(
|
|
45
|
+
logger.info(
|
|
46
|
+
"received data packet %s from %s", evt.topic, evt.participant.identity
|
|
47
|
+
)
|
|
44
48
|
|
|
45
49
|
if evt.topic == self.topic:
|
|
46
50
|
self.receive_packet(evt.data)
|
|
47
|
-
|
|
48
|
-
|
|
@@ -15,60 +15,76 @@ import asyncio
|
|
|
15
15
|
|
|
16
16
|
logger = logging.getLogger(__name__)
|
|
17
17
|
|
|
18
|
+
|
|
18
19
|
@pytest.mark.asyncio
|
|
19
20
|
async def test_protocol():
|
|
21
|
+
url = os.getenv("LIVEKIT_URL")
|
|
22
|
+
api_key = os.getenv("LIVEKIT_API_KEY")
|
|
23
|
+
api_secret = os.getenv("LIVEKIT_API_SECRET")
|
|
24
|
+
|
|
25
|
+
token1 = (
|
|
26
|
+
api.AccessToken(api_key=api_key, api_secret=api_secret)
|
|
27
|
+
.with_identity("core:user.test.agent-send")
|
|
28
|
+
.with_name("Agent")
|
|
29
|
+
.with_kind("agent")
|
|
30
|
+
.with_grants(
|
|
31
|
+
api.VideoGrants(
|
|
32
|
+
can_update_own_metadata=True,
|
|
33
|
+
room_join=True,
|
|
34
|
+
room="test-process",
|
|
35
|
+
agent=True,
|
|
36
|
+
)
|
|
37
|
+
)
|
|
38
|
+
)
|
|
20
39
|
|
|
21
|
-
url = os.getenv('LIVEKIT_URL')
|
|
22
|
-
api_key = os.getenv('LIVEKIT_API_KEY')
|
|
23
|
-
api_secret = os.getenv('LIVEKIT_API_SECRET')
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
token1 = api.AccessToken(api_key=api_key, api_secret=api_secret) \
|
|
27
|
-
.with_identity('core:user.test.agent-send') \
|
|
28
|
-
.with_name("Agent") \
|
|
29
|
-
.with_kind("agent") \
|
|
30
|
-
.with_grants(api.VideoGrants(
|
|
31
|
-
can_update_own_metadata=True,
|
|
32
|
-
room_join=True,
|
|
33
|
-
room="test-process",
|
|
34
|
-
agent=True
|
|
35
|
-
))
|
|
36
|
-
|
|
37
40
|
jwt1 = token1.to_jwt()
|
|
38
41
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
.with_identity(
|
|
42
|
-
.with_name("Agent")
|
|
43
|
-
.with_kind("agent")
|
|
44
|
-
.with_grants(
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
42
|
+
token2 = (
|
|
43
|
+
api.AccessToken(api_key=api_key, api_secret=api_secret)
|
|
44
|
+
.with_identity("core:user.test.agent-recv")
|
|
45
|
+
.with_name("Agent")
|
|
46
|
+
.with_kind("agent")
|
|
47
|
+
.with_grants(
|
|
48
|
+
api.VideoGrants(
|
|
49
|
+
can_update_own_metadata=True,
|
|
50
|
+
room_join=True,
|
|
51
|
+
room="test-process",
|
|
52
|
+
agent=True,
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
jwt2 = token2.to_jwt()
|
|
58
|
+
|
|
59
|
+
room1 = rtc.Room()
|
|
54
60
|
await room1.connect(url=url, token=jwt1)
|
|
55
|
-
|
|
56
|
-
room2 = rtc.Room()
|
|
61
|
+
|
|
62
|
+
room2 = rtc.Room()
|
|
57
63
|
await room2.connect(url=url, token=jwt2)
|
|
58
|
-
|
|
64
|
+
|
|
59
65
|
topic = "test_topic"
|
|
60
66
|
|
|
61
67
|
while True:
|
|
62
68
|
await asyncio.sleep(0.1)
|
|
63
69
|
|
|
64
|
-
if
|
|
70
|
+
if (
|
|
71
|
+
room2.local_participant.identity in room1.remote_participants
|
|
72
|
+
and room1.local_participant.identity in room2.remote_participants
|
|
73
|
+
):
|
|
65
74
|
break
|
|
66
75
|
|
|
67
|
-
async with livekit_protocol.LivekitProtocol(
|
|
68
|
-
|
|
69
|
-
|
|
76
|
+
async with livekit_protocol.LivekitProtocol(
|
|
77
|
+
room=room1,
|
|
78
|
+
remote=room1.remote_participants[room2.local_participant.identity],
|
|
79
|
+
topic=topic,
|
|
80
|
+
) as proto1:
|
|
81
|
+
async with livekit_protocol.LivekitProtocol(
|
|
82
|
+
room=room2,
|
|
83
|
+
remote=room2.remote_participants[room1.local_participant.identity],
|
|
84
|
+
topic=topic,
|
|
85
|
+
) as proto2:
|
|
70
86
|
test_data_builder = bytearray()
|
|
71
|
-
for i in range(1024*1024):
|
|
87
|
+
for i in range(1024 * 1024):
|
|
72
88
|
test_data_builder.append(i % 255)
|
|
73
89
|
|
|
74
90
|
test_data = bytes(test_data_builder)
|
|
@@ -76,16 +92,17 @@ async def test_protocol():
|
|
|
76
92
|
done = asyncio.Future[bool]()
|
|
77
93
|
|
|
78
94
|
matches = 0
|
|
79
|
-
|
|
95
|
+
|
|
96
|
+
async def test_fn(protocol, id: int, type: str, data: bytes):
|
|
80
97
|
nonlocal matches
|
|
81
98
|
logger.info("Message received")
|
|
82
99
|
if test_data != data:
|
|
83
100
|
raise "data isn't equal"
|
|
84
|
-
matches+=1
|
|
101
|
+
matches += 1
|
|
85
102
|
|
|
86
103
|
if matches == 2:
|
|
87
104
|
done.set_result(True)
|
|
88
|
-
|
|
105
|
+
|
|
89
106
|
proto2.register_handler("test", test_fn)
|
|
90
107
|
|
|
91
108
|
await asyncio.sleep(1)
|
|
@@ -95,12 +112,5 @@ async def test_protocol():
|
|
|
95
112
|
|
|
96
113
|
await done
|
|
97
114
|
|
|
98
|
-
|
|
99
115
|
await room2.disconnect()
|
|
100
116
|
await room1.disconnect()
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
meshagent/livekit/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.38"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: meshagent-livekit
|
|
3
|
+
Version: 0.0.38
|
|
4
|
+
Summary: Livekit support for Meshagent
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Project-URL: Documentation, https://docs.meshagent.com
|
|
7
|
+
Project-URL: Website, https://www.meshagent.com
|
|
8
|
+
Project-URL: Source, https://www.meshagent.com
|
|
9
|
+
Requires-Python: >=3.12
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: pytest~=8.4
|
|
13
|
+
Requires-Dist: pytest-asyncio~=0.26
|
|
14
|
+
Requires-Dist: strip-markdown~=1.3
|
|
15
|
+
Requires-Dist: livekit-api>=1.0
|
|
16
|
+
Requires-Dist: livekit-agents~=1.1
|
|
17
|
+
Requires-Dist: livekit-plugins-openai~=1.1
|
|
18
|
+
Requires-Dist: livekit-plugins-silero~=1.1
|
|
19
|
+
Requires-Dist: livekit-plugins-turn-detector~=1.1
|
|
20
|
+
Requires-Dist: meshagent-api~=0.0.38
|
|
21
|
+
Requires-Dist: meshagent-tools~=0.0.38
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
## MeshAgent LiveKit
|
|
25
|
+
The ``meshagent.livekit`` package equips agents with real-time audio and voice capabilities via the LiveKit SDK.
|
|
26
|
+
|
|
27
|
+
### VoiceBot
|
|
28
|
+
The ``VoiceBot`` agent handles two-way voice conversations allowing users to interact with the agent verbally. Agents based on the ``VoiceBot`` class can be given the same tools as ``ChatBot`` based agents. This means you only need to write a tool once and the same tool can be used across both text and voice based agents. Check out the [Build and Deploy a Voice Agent](https://docs.meshagent.com/agents/standard/buildanddeployvoicebot) example to learn how to create a simple Voice Agent without tools then add built-in MeshAgent tools and custom tools to the agent.
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
### Learn more about MeshAgent on our website or check out the docs for additional examples!
|
|
32
|
+
|
|
33
|
+
**Website**: [www.meshagent.com](https://www.meshagent.com/)
|
|
34
|
+
|
|
35
|
+
**Documentation**: [docs.meshagent.com](https://docs.meshagent.com/)
|
|
36
|
+
|
|
37
|
+
---
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
meshagent/livekit/__init__.py,sha256=X78Z4yEg5XfkNKH0HiIdG4k1q5ktB-ampTuXHLNFrAw,58
|
|
2
|
+
meshagent/livekit/livekit_protocol.py,sha256=5Zu4ymLWEGt5SGXLNu94gOeyjnjhaV6uTS2FhSdODqs,1470
|
|
3
|
+
meshagent/livekit/livekit_protocol_test.py,sha256=o7yYxXad4tMazcxFkq44yW-A9tJ0Lk6WdZpG5ifxcU4,2980
|
|
4
|
+
meshagent/livekit/version.py,sha256=R5QxTjVaID7odO0eBWpOnyCjNQxBZ7cpyruM_NMOoDc,23
|
|
5
|
+
meshagent/livekit/agents/transcriber.py,sha256=oqfHBhBSwU62LbsO8WFiJg3Xoi4vkWlTFzgTxBP0erg,13297
|
|
6
|
+
meshagent/livekit/agents/voice.py,sha256=STgjMSqzUgV9UAmleOy1vkgRXP93MDSYgiOO6Lo0peU,11964
|
|
7
|
+
meshagent_livekit-0.0.38.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
|
|
8
|
+
meshagent_livekit-0.0.38.dist-info/METADATA,sha256=pVJlT13Hr3NnFwMSuA60PG_U_uZoPgWcwlepxZl9k_w,1721
|
|
9
|
+
meshagent_livekit-0.0.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
+
meshagent_livekit-0.0.38.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
|
|
11
|
+
meshagent_livekit-0.0.38.dist-info/RECORD,,
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: meshagent-livekit
|
|
3
|
-
Version: 0.0.37
|
|
4
|
-
Summary: Livekit support for Meshagent
|
|
5
|
-
License-Expression: Apache-2.0
|
|
6
|
-
Project-URL: Documentation, https://docs.meshagent.com
|
|
7
|
-
Project-URL: Website, https://www.meshagent.com
|
|
8
|
-
Project-URL: Source, https://www.meshagent.com
|
|
9
|
-
Requires-Python: >=3.12
|
|
10
|
-
Description-Content-Type: text/markdown
|
|
11
|
-
License-File: LICENSE
|
|
12
|
-
Requires-Dist: pytest~=8.3
|
|
13
|
-
Requires-Dist: pytest-asyncio~=0.26
|
|
14
|
-
Requires-Dist: strip-markdown~=1.3
|
|
15
|
-
Requires-Dist: livekit-api>=1.0
|
|
16
|
-
Requires-Dist: livekit-agents~=1.1
|
|
17
|
-
Requires-Dist: livekit-plugins-openai~=1.1
|
|
18
|
-
Requires-Dist: livekit-plugins-silero~=1.1
|
|
19
|
-
Requires-Dist: livekit-plugins-turn-detector~=1.1
|
|
20
|
-
Requires-Dist: meshagent-api~=0.0.37
|
|
21
|
-
Requires-Dist: meshagent-tools~=0.0.37
|
|
22
|
-
Dynamic: license-file
|
|
23
|
-
|
|
24
|
-
### Meshagent LiveKit
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
meshagent/livekit/__init__.py,sha256=8zLGg-DfQhnDl2Ky0n-zXpN-8e-g7iR0AcaI4l4Vvpk,32
|
|
2
|
-
meshagent/livekit/livekit_protocol.py,sha256=K9yP-qpxag5_7TXlKjFEx3cOJJJpYI_z6zGzFHoN1Hs,1421
|
|
3
|
-
meshagent/livekit/livekit_protocol_test.py,sha256=n_ZQjt7n4u7TM7eENzH8L0tw8LvypS_JHF_PuJ2o6h4,2836
|
|
4
|
-
meshagent/livekit/version.py,sha256=JaGEpJ5xP3R4j7pGgCziGajlIRjy1_NJdv_OaXPQius,22
|
|
5
|
-
meshagent/livekit/agents/transcriber.py,sha256=Dq1Ijx4gmA-0jQGM-f3w7X-JIZpkRCFDxWae9AOwz-k,12290
|
|
6
|
-
meshagent/livekit/agents/voice.py,sha256=CHbzuLdkxEcuHfC1skjl_0KHV-hmf8i6NWxV7AYTWyc,12348
|
|
7
|
-
meshagent_livekit-0.0.37.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
|
|
8
|
-
meshagent_livekit-0.0.37.dist-info/METADATA,sha256=tdVvbqxsQp0faK5xMyHqFUdLUd2nXywubcydiE2B9kM,790
|
|
9
|
-
meshagent_livekit-0.0.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
-
meshagent_livekit-0.0.37.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
|
|
11
|
-
meshagent_livekit-0.0.37.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|