meshagent-livekit 0.5.15__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of meshagent-livekit might be problematic. Click here for more details.
- meshagent/livekit/agents/meeting_transcriber.py +823 -0
- meshagent/livekit/version.py +1 -1
- {meshagent_livekit-0.5.15.dist-info → meshagent_livekit-0.6.0.dist-info}/METADATA +9 -9
- meshagent_livekit-0.6.0.dist-info/RECORD +12 -0
- meshagent_livekit-0.5.15.dist-info/RECORD +0 -11
- {meshagent_livekit-0.5.15.dist-info → meshagent_livekit-0.6.0.dist-info}/WHEEL +0 -0
- {meshagent_livekit-0.5.15.dist-info → meshagent_livekit-0.6.0.dist-info}/licenses/LICENSE +0 -0
- {meshagent_livekit-0.5.15.dist-info → meshagent_livekit-0.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,823 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Awaitable, Callable, Coroutine, Iterable, Optional
|
|
7
|
+
|
|
8
|
+
from livekit import rtc
|
|
9
|
+
from livekit.agents import (
|
|
10
|
+
Agent,
|
|
11
|
+
AgentSession,
|
|
12
|
+
RoomInputOptions,
|
|
13
|
+
RoomIO,
|
|
14
|
+
RoomOutputOptions,
|
|
15
|
+
StopResponse,
|
|
16
|
+
llm,
|
|
17
|
+
utils,
|
|
18
|
+
)
|
|
19
|
+
from livekit.plugins import openai, silero
|
|
20
|
+
from meshagent.api import MeshDocument, SchemaRegistration, SchemaRegistry
|
|
21
|
+
from meshagent.agents import SingleRoomAgent
|
|
22
|
+
from meshagent.tools import RemoteToolkit, ToolContext, Tool
|
|
23
|
+
from meshagent.api.room_server_client import Requirement
|
|
24
|
+
from meshagent.livekit.agents.voice import VoiceConnection
|
|
25
|
+
from meshagent.agents.schemas.transcript import transcript_schema
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger("meeting_transcriber")
|
|
28
|
+
|
|
29
|
+
_shared_vad = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class TranscriptSession:
|
|
34
|
+
document: MeshDocument
|
|
35
|
+
|
|
36
|
+
def append_segment(
|
|
37
|
+
self,
|
|
38
|
+
*,
|
|
39
|
+
participant_name: str,
|
|
40
|
+
text: str,
|
|
41
|
+
participant_id: Optional[str] = None,
|
|
42
|
+
time: Optional[str] = None,
|
|
43
|
+
) -> bool:
|
|
44
|
+
normalized_text = (text or "").strip()
|
|
45
|
+
if not normalized_text:
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
normalized_name = (participant_name or "").strip()
|
|
49
|
+
alias_set: set[str] = set()
|
|
50
|
+
if normalized_name:
|
|
51
|
+
alias_set.add(normalized_name)
|
|
52
|
+
if participant_id:
|
|
53
|
+
alias_set.add(str(participant_id).strip())
|
|
54
|
+
alias_set = {a for a in alias_set if a}
|
|
55
|
+
if not alias_set:
|
|
56
|
+
alias_set.add("")
|
|
57
|
+
|
|
58
|
+
payload = {"participant_name": normalized_name, "text": normalized_text}
|
|
59
|
+
if participant_id:
|
|
60
|
+
payload["participant_id"] = participant_id
|
|
61
|
+
if time:
|
|
62
|
+
payload["time"] = time
|
|
63
|
+
|
|
64
|
+
self.document.root.append_child("segment", payload)
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _get_shared_vad():
|
|
69
|
+
global _shared_vad
|
|
70
|
+
if _shared_vad is None:
|
|
71
|
+
_shared_vad = silero.VAD.load()
|
|
72
|
+
return _shared_vad
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class _ParticipantTranscriber(Agent):
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
*,
|
|
79
|
+
participant_name: str,
|
|
80
|
+
participant_id: Optional[str],
|
|
81
|
+
on_transcript: Optional[Callable[..., Awaitable[None]]] = None,
|
|
82
|
+
):
|
|
83
|
+
super().__init__(
|
|
84
|
+
instructions="not-needed",
|
|
85
|
+
stt=openai.STT(),
|
|
86
|
+
)
|
|
87
|
+
self._participant_name = participant_name or "unknown participant"
|
|
88
|
+
self._participant_id = participant_id
|
|
89
|
+
self._on_transcript = on_transcript
|
|
90
|
+
|
|
91
|
+
async def on_user_turn_completed(
|
|
92
|
+
self, chat_ctx: llm.ChatContext, new_message: llm.ChatMessage
|
|
93
|
+
):
|
|
94
|
+
user_transcript = new_message.text_content
|
|
95
|
+
logger.info("%s -> %s", self._participant_name, user_transcript)
|
|
96
|
+
await self._on_transcript(
|
|
97
|
+
participant_id=self._participant_id,
|
|
98
|
+
participant_name=self._participant_name,
|
|
99
|
+
chat_ctx=chat_ctx,
|
|
100
|
+
new_message=new_message,
|
|
101
|
+
)
|
|
102
|
+
raise StopResponse()
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class _MeetingManager:
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
*,
|
|
109
|
+
room: rtc.Room,
|
|
110
|
+
vad,
|
|
111
|
+
on_transcript: Optional[Callable[..., Awaitable[None]]] = None,
|
|
112
|
+
):
|
|
113
|
+
self.room = room
|
|
114
|
+
self._vad = vad
|
|
115
|
+
self._sessions: dict[str, AgentSession] = {}
|
|
116
|
+
self._tasks: set[asyncio.Task] = set()
|
|
117
|
+
self._closed = False
|
|
118
|
+
self._on_transcript = on_transcript
|
|
119
|
+
|
|
120
|
+
@staticmethod
|
|
121
|
+
def _resolve_participant_identity(
|
|
122
|
+
participant: rtc.RemoteParticipant,
|
|
123
|
+
) -> tuple[str, Optional[str]]:
|
|
124
|
+
return (
|
|
125
|
+
participant.name,
|
|
126
|
+
participant.identity,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
def start(self):
|
|
130
|
+
self.room.on("participant_connected", self.on_participant_connected)
|
|
131
|
+
self.room.on("participant_disconnected", self.on_participant_disconnected)
|
|
132
|
+
|
|
133
|
+
async def aclose(self):
|
|
134
|
+
self._closed = True
|
|
135
|
+
await utils.aio.cancel_and_wait(*self._tasks)
|
|
136
|
+
|
|
137
|
+
await asyncio.gather(
|
|
138
|
+
*[self._close_session(session) for session in self._sessions.values()]
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
self.room.off("participant_connected", self.on_participant_connected)
|
|
142
|
+
self.room.off("participant_disconnected", self.on_participant_disconnected)
|
|
143
|
+
|
|
144
|
+
def on_participant_connected(self, participant: rtc.RemoteParticipant):
|
|
145
|
+
if participant.identity in self._sessions:
|
|
146
|
+
return
|
|
147
|
+
|
|
148
|
+
if self._closed:
|
|
149
|
+
logger.debug(
|
|
150
|
+
"ignoring connect for %s because transcriber is closed",
|
|
151
|
+
getattr(participant, "identity", None),
|
|
152
|
+
)
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
readable_name, participant_id = self._resolve_participant_identity(participant)
|
|
156
|
+
kind = getattr(participant, "kind", None)
|
|
157
|
+
logger.info("participant connected: %s (kind=%s)", readable_name, kind)
|
|
158
|
+
|
|
159
|
+
task = asyncio.create_task(self._start_session(participant))
|
|
160
|
+
self._tasks.add(task)
|
|
161
|
+
|
|
162
|
+
def on_task_done(task: asyncio.Task):
|
|
163
|
+
try:
|
|
164
|
+
self._sessions[participant.identity] = task.result()
|
|
165
|
+
finally:
|
|
166
|
+
self._tasks.discard(task)
|
|
167
|
+
|
|
168
|
+
task.add_done_callback(on_task_done)
|
|
169
|
+
|
|
170
|
+
def on_participant_disconnected(self, participant: rtc.RemoteParticipant):
|
|
171
|
+
readable_name, participant_id = self._resolve_participant_identity(participant)
|
|
172
|
+
logger.info("participant disconnected: %s", readable_name)
|
|
173
|
+
session = self._sessions.pop(participant.identity, None)
|
|
174
|
+
if session is None:
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
logger.info("closing session for %s", participant_id or readable_name)
|
|
178
|
+
task = asyncio.create_task(self._close_session(session))
|
|
179
|
+
self._tasks.add(task)
|
|
180
|
+
task.add_done_callback(lambda _: self._tasks.discard(task))
|
|
181
|
+
|
|
182
|
+
async def _start_session(self, participant: rtc.RemoteParticipant) -> AgentSession:
|
|
183
|
+
if participant.identity in self._sessions:
|
|
184
|
+
return self._sessions[participant.identity]
|
|
185
|
+
|
|
186
|
+
display_name, participant_id = self._resolve_participant_identity(participant)
|
|
187
|
+
logger.debug("creating session for %s (id=%s)", display_name, participant_id)
|
|
188
|
+
|
|
189
|
+
session = AgentSession(
|
|
190
|
+
vad=self._vad,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
room_io = RoomIO(
|
|
194
|
+
agent_session=session,
|
|
195
|
+
room=self.room,
|
|
196
|
+
participant=participant,
|
|
197
|
+
input_options=RoomInputOptions(
|
|
198
|
+
# text input is not supported for multiple room participants
|
|
199
|
+
# if needed, register the text stream handler by yourself
|
|
200
|
+
# and route the text to different sessions based on the participant identity
|
|
201
|
+
text_enabled=False,
|
|
202
|
+
),
|
|
203
|
+
output_options=RoomOutputOptions(
|
|
204
|
+
transcription_enabled=True,
|
|
205
|
+
audio_enabled=False,
|
|
206
|
+
),
|
|
207
|
+
)
|
|
208
|
+
await room_io.start()
|
|
209
|
+
agent = _ParticipantTranscriber(
|
|
210
|
+
participant_name=display_name,
|
|
211
|
+
participant_id=participant_id,
|
|
212
|
+
on_transcript=self._on_transcript,
|
|
213
|
+
)
|
|
214
|
+
await session.start(agent=agent)
|
|
215
|
+
return session
|
|
216
|
+
|
|
217
|
+
async def _close_session(self, sess: AgentSession) -> None:
|
|
218
|
+
await sess.drain()
|
|
219
|
+
await sess.aclose()
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
SessionKey = tuple[str, str]
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
@dataclass
|
|
226
|
+
class _SessionState:
|
|
227
|
+
breakout_room: Optional[str]
|
|
228
|
+
transcript_path: str
|
|
229
|
+
voice_conn: VoiceConnection
|
|
230
|
+
manager: _MeetingManager
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class StartTranscriptionTool(Tool):
|
|
234
|
+
def __init__(self, *, transcriber: "MeetingTranscriber"):
|
|
235
|
+
self.transcriber = transcriber
|
|
236
|
+
super().__init__(
|
|
237
|
+
name="start_transcription",
|
|
238
|
+
input_schema={
|
|
239
|
+
"type": "object",
|
|
240
|
+
"required": [
|
|
241
|
+
"breakout_room",
|
|
242
|
+
"path",
|
|
243
|
+
],
|
|
244
|
+
"additionalProperties": False,
|
|
245
|
+
"properties": {
|
|
246
|
+
"breakout_room": {
|
|
247
|
+
"type": "string",
|
|
248
|
+
},
|
|
249
|
+
"path": {
|
|
250
|
+
"type": "string",
|
|
251
|
+
},
|
|
252
|
+
},
|
|
253
|
+
},
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
async def execute(self, context: ToolContext, *, breakout_room: str, path: str):
|
|
257
|
+
await self.transcriber.start_transcription(
|
|
258
|
+
breakout_room=breakout_room, path=path
|
|
259
|
+
)
|
|
260
|
+
return {"status": "started"}
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class StopTranscriptionTool(Tool):
|
|
264
|
+
def __init__(self, *, transcriber: "MeetingTranscriber"):
|
|
265
|
+
self.transcriber = transcriber
|
|
266
|
+
super().__init__(
|
|
267
|
+
name="stop_transcription",
|
|
268
|
+
input_schema={
|
|
269
|
+
"type": "object",
|
|
270
|
+
"required": [
|
|
271
|
+
"breakout_room",
|
|
272
|
+
"path",
|
|
273
|
+
],
|
|
274
|
+
"additionalProperties": False,
|
|
275
|
+
"properties": {
|
|
276
|
+
"breakout_room": {
|
|
277
|
+
"type": "string",
|
|
278
|
+
},
|
|
279
|
+
"path": {
|
|
280
|
+
"type": "string",
|
|
281
|
+
},
|
|
282
|
+
},
|
|
283
|
+
},
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
async def execute(self, context: ToolContext, *, breakout_room: str, path: str):
|
|
287
|
+
await self.transcriber.stop_transcription(
|
|
288
|
+
breakout_room=breakout_room, path=path
|
|
289
|
+
)
|
|
290
|
+
return {"status": "stopped"}
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class MeetingTranscriber(SingleRoomAgent):
|
|
294
|
+
_STATUS_ATTRIBUTE_KEY = "transcriber_status"
|
|
295
|
+
|
|
296
|
+
def __init__(self, name: str, requires: Optional[list[Requirement]] = None):
|
|
297
|
+
super().__init__(
|
|
298
|
+
name=name,
|
|
299
|
+
requires=requires,
|
|
300
|
+
)
|
|
301
|
+
self._sessions: dict[SessionKey, _SessionState] = {}
|
|
302
|
+
self._pending_sessions: set[SessionKey] = set()
|
|
303
|
+
self._transcript_sessions: dict[str, TranscriptSession] = {}
|
|
304
|
+
self._status_entries: dict[SessionKey, dict[str, Any]] = {}
|
|
305
|
+
self._tasks: set[asyncio.Task] = set()
|
|
306
|
+
self._session_tasks_by_key: dict[SessionKey, asyncio.Task] = {}
|
|
307
|
+
self._deferred_stop_keys: set[SessionKey] = set()
|
|
308
|
+
self._deferred_stop_reasons: dict[SessionKey, str] = {}
|
|
309
|
+
self._session_lock = asyncio.Lock()
|
|
310
|
+
self._toolkit = RemoteToolkit(
|
|
311
|
+
name="transcription",
|
|
312
|
+
tools=[
|
|
313
|
+
StartTranscriptionTool(transcriber=self),
|
|
314
|
+
StopTranscriptionTool(transcriber=self),
|
|
315
|
+
],
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
@staticmethod
|
|
319
|
+
def _make_session_key(breakout_room: str, transcript_path: str) -> SessionKey:
|
|
320
|
+
return (breakout_room, transcript_path)
|
|
321
|
+
|
|
322
|
+
def _sessions_using_path(self, path: str) -> int:
|
|
323
|
+
return sum(
|
|
324
|
+
1 for session in self._sessions.values() if session.transcript_path == path
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
async def _publish_status_attribute(self) -> None:
|
|
328
|
+
if not getattr(self, "room", None):
|
|
329
|
+
return
|
|
330
|
+
payload: dict[str, dict[str, Any]] = {}
|
|
331
|
+
for key in sorted(self._status_entries):
|
|
332
|
+
entry = self._status_entries[key]
|
|
333
|
+
breakout_room, transcript_path = key
|
|
334
|
+
entry_payload = dict(entry)
|
|
335
|
+
entry_payload.pop("breakout_room", None)
|
|
336
|
+
breakout_bucket = payload.setdefault(breakout_room, {})
|
|
337
|
+
breakout_bucket[transcript_path] = entry_payload
|
|
338
|
+
try:
|
|
339
|
+
await self.room.local_participant.set_attribute(
|
|
340
|
+
self._STATUS_ATTRIBUTE_KEY,
|
|
341
|
+
payload,
|
|
342
|
+
)
|
|
343
|
+
except (
|
|
344
|
+
Exception
|
|
345
|
+
) as exc: # pragma: no cover - attribute updates should not break flow
|
|
346
|
+
logger.warning(
|
|
347
|
+
"failed to update transcriber status attribute: %s", exc, exc_info=exc
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
async def _update_status_entry(
|
|
351
|
+
self,
|
|
352
|
+
session_key: SessionKey,
|
|
353
|
+
*,
|
|
354
|
+
state: str,
|
|
355
|
+
breakout_room: Optional[str],
|
|
356
|
+
transcript_path: str,
|
|
357
|
+
reason: Optional[str] = None,
|
|
358
|
+
error: Optional[str] = None,
|
|
359
|
+
) -> None:
|
|
360
|
+
entry = dict(self._status_entries.get(session_key, {}))
|
|
361
|
+
now = datetime.utcnow().isoformat() + "Z"
|
|
362
|
+
if "created_at" not in entry:
|
|
363
|
+
entry["created_at"] = now
|
|
364
|
+
entry.update(
|
|
365
|
+
{
|
|
366
|
+
"session_id": f"{session_key[0]}::{session_key[1]}",
|
|
367
|
+
"state": state,
|
|
368
|
+
"active": state == "active",
|
|
369
|
+
"breakout_room": breakout_room,
|
|
370
|
+
"transcript_path": transcript_path,
|
|
371
|
+
"updated_at": now,
|
|
372
|
+
}
|
|
373
|
+
)
|
|
374
|
+
if reason is not None:
|
|
375
|
+
entry["reason"] = reason
|
|
376
|
+
else:
|
|
377
|
+
entry.pop("reason", None)
|
|
378
|
+
if error is not None:
|
|
379
|
+
entry["error"] = error
|
|
380
|
+
else:
|
|
381
|
+
entry.pop("error", None)
|
|
382
|
+
self._status_entries[session_key] = entry
|
|
383
|
+
await self._publish_status_attribute()
|
|
384
|
+
|
|
385
|
+
async def _remove_status_entry(self, session_key: SessionKey) -> None:
|
|
386
|
+
if session_key in self._status_entries:
|
|
387
|
+
self._status_entries.pop(session_key, None)
|
|
388
|
+
await self._publish_status_attribute()
|
|
389
|
+
|
|
390
|
+
def _spawn_task(
|
|
391
|
+
self,
|
|
392
|
+
coro: Coroutine[Any, Any, Any],
|
|
393
|
+
*,
|
|
394
|
+
session_key: Optional[SessionKey] = None,
|
|
395
|
+
) -> None:
|
|
396
|
+
task = asyncio.create_task(coro)
|
|
397
|
+
self._tasks.add(task)
|
|
398
|
+
if session_key is not None:
|
|
399
|
+
self._session_tasks_by_key[session_key] = task
|
|
400
|
+
|
|
401
|
+
def _on_done(t: asyncio.Task):
|
|
402
|
+
self._tasks.discard(t)
|
|
403
|
+
if session_key is not None:
|
|
404
|
+
self._session_tasks_by_key.pop(session_key, None)
|
|
405
|
+
try:
|
|
406
|
+
t.result()
|
|
407
|
+
except asyncio.CancelledError:
|
|
408
|
+
logger.debug("transcriber task cancelled")
|
|
409
|
+
except Exception as exc:
|
|
410
|
+
logger.error("transcriber task failed: %s", exc, exc_info=exc)
|
|
411
|
+
|
|
412
|
+
task.add_done_callback(_on_done)
|
|
413
|
+
|
|
414
|
+
@staticmethod
|
|
415
|
+
def _default_transcript_document_path() -> str:
|
|
416
|
+
env_path = os.getenv("TRANSCRIPT_DOCUMENT_PATH", "").strip()
|
|
417
|
+
return env_path or "transcript.transcript"
|
|
418
|
+
|
|
419
|
+
async def _release_transcript_session(self, path: str) -> None:
|
|
420
|
+
ts = self._transcript_sessions.pop(path, None)
|
|
421
|
+
if not ts:
|
|
422
|
+
return
|
|
423
|
+
doc = ts.document
|
|
424
|
+
close_coro = getattr(doc, "aclose", None)
|
|
425
|
+
if callable(close_coro):
|
|
426
|
+
try:
|
|
427
|
+
await close_coro()
|
|
428
|
+
return
|
|
429
|
+
except Exception as exc:
|
|
430
|
+
logger.debug(
|
|
431
|
+
"failed to aclose transcript document: %s", exc, exc_info=exc
|
|
432
|
+
)
|
|
433
|
+
close_fn = getattr(doc, "close", None)
|
|
434
|
+
if callable(close_fn):
|
|
435
|
+
try:
|
|
436
|
+
maybe = close_fn()
|
|
437
|
+
if asyncio.iscoroutine(maybe):
|
|
438
|
+
await maybe
|
|
439
|
+
except Exception as exc:
|
|
440
|
+
logger.debug(
|
|
441
|
+
"failed to close transcript document: %s", exc, exc_info=exc
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
async def _ensure_transcript_session(self, path: str) -> TranscriptSession:
|
|
445
|
+
session = self._transcript_sessions.get(path)
|
|
446
|
+
if session is None:
|
|
447
|
+
doc = await self.room.sync.open(path=path, create=True)
|
|
448
|
+
session = TranscriptSession(document=doc)
|
|
449
|
+
self._transcript_sessions[path] = session
|
|
450
|
+
logger.info("transcript document ready at %s", path)
|
|
451
|
+
return session
|
|
452
|
+
|
|
453
|
+
async def start(self, *, room):
|
|
454
|
+
await super().start(room=room)
|
|
455
|
+
await self._toolkit.start(room=room)
|
|
456
|
+
await room.local_participant.set_attribute("supports_voice", True)
|
|
457
|
+
await room.messaging.enable()
|
|
458
|
+
await self._publish_status_attribute()
|
|
459
|
+
|
|
460
|
+
async def start_transcription(self, *, breakout_room: Optional[str], path: str):
|
|
461
|
+
session_key = self._make_session_key(breakout_room, path)
|
|
462
|
+
if session_key in self._sessions or session_key in self._pending_sessions:
|
|
463
|
+
logger.warning(
|
|
464
|
+
"start_transcription for breakout=%s transcript=%s ignored; session already active or pending",
|
|
465
|
+
breakout_room,
|
|
466
|
+
path,
|
|
467
|
+
)
|
|
468
|
+
return
|
|
469
|
+
logger.info(
|
|
470
|
+
"transcription starting (breakout=%s transcript=%s)",
|
|
471
|
+
breakout_room,
|
|
472
|
+
path,
|
|
473
|
+
)
|
|
474
|
+
self._spawn_task(
|
|
475
|
+
self._start_transcription_session(
|
|
476
|
+
session_key=session_key,
|
|
477
|
+
breakout_room=breakout_room,
|
|
478
|
+
transcript_path=path,
|
|
479
|
+
),
|
|
480
|
+
session_key=session_key,
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
async def stop_transcription(self, *, breakout_room: Optional[str], path: str):
|
|
484
|
+
breakout_filter = breakout_room if breakout_room is not None else None
|
|
485
|
+
target_keys = self._find_session_keys(breakout_filter, path)
|
|
486
|
+
if not target_keys:
|
|
487
|
+
logger.warning(
|
|
488
|
+
"stop_transcription received but no matching sessions (breakout=%s transcript=%s)",
|
|
489
|
+
breakout_room,
|
|
490
|
+
path,
|
|
491
|
+
)
|
|
492
|
+
return
|
|
493
|
+
logger.info(
|
|
494
|
+
"stop_transcription received (breakout=%s transcript=%s); stopping %d session(s)",
|
|
495
|
+
breakout_room,
|
|
496
|
+
path,
|
|
497
|
+
len(target_keys),
|
|
498
|
+
)
|
|
499
|
+
for key in target_keys:
|
|
500
|
+
if key in self._sessions:
|
|
501
|
+
self._spawn_task(
|
|
502
|
+
self._stop_transcription_session(
|
|
503
|
+
session_key=key, reason="stop_transcription"
|
|
504
|
+
)
|
|
505
|
+
)
|
|
506
|
+
elif key in self._pending_sessions:
|
|
507
|
+
self._deferred_stop_keys.add(key)
|
|
508
|
+
self._deferred_stop_reasons[key] = "stop_transcription"
|
|
509
|
+
entry = self._status_entries.get(key)
|
|
510
|
+
if entry:
|
|
511
|
+
self._spawn_task(
|
|
512
|
+
self._update_status_entry(
|
|
513
|
+
key,
|
|
514
|
+
state="closing",
|
|
515
|
+
breakout_room=entry["breakout_room"],
|
|
516
|
+
transcript_path=entry["transcript_path"],
|
|
517
|
+
reason="stop_transcription",
|
|
518
|
+
)
|
|
519
|
+
)
|
|
520
|
+
else:
|
|
521
|
+
entry = self._status_entries.get(key)
|
|
522
|
+
if entry:
|
|
523
|
+
self._spawn_task(
|
|
524
|
+
self._update_status_entry(
|
|
525
|
+
key,
|
|
526
|
+
state="closing",
|
|
527
|
+
breakout_room=entry["breakout_room"],
|
|
528
|
+
transcript_path=entry["transcript_path"],
|
|
529
|
+
reason="stop_transcription",
|
|
530
|
+
)
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
def _find_session_keys(
|
|
534
|
+
self,
|
|
535
|
+
breakout_room: Optional[str],
|
|
536
|
+
transcript_path: Optional[str],
|
|
537
|
+
) -> list[SessionKey]:
|
|
538
|
+
matches: list[SessionKey] = []
|
|
539
|
+
for key, entry in self._status_entries.items():
|
|
540
|
+
if breakout_room is not None and key[0] != breakout_room:
|
|
541
|
+
continue
|
|
542
|
+
if (
|
|
543
|
+
transcript_path is not None
|
|
544
|
+
and entry.get("transcript_path") != transcript_path
|
|
545
|
+
):
|
|
546
|
+
continue
|
|
547
|
+
matches.append(key)
|
|
548
|
+
return matches
|
|
549
|
+
|
|
550
|
+
async def _start_transcription_session(
|
|
551
|
+
self,
|
|
552
|
+
*,
|
|
553
|
+
session_key: SessionKey,
|
|
554
|
+
breakout_room: Optional[str],
|
|
555
|
+
transcript_path: str,
|
|
556
|
+
) -> None:
|
|
557
|
+
async with self._session_lock:
|
|
558
|
+
if session_key in self._sessions or session_key in self._pending_sessions:
|
|
559
|
+
return
|
|
560
|
+
self._pending_sessions.add(session_key)
|
|
561
|
+
|
|
562
|
+
await self._update_status_entry(
|
|
563
|
+
session_key,
|
|
564
|
+
state="connecting",
|
|
565
|
+
breakout_room=breakout_room,
|
|
566
|
+
transcript_path=transcript_path,
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
voice_conn: Optional[VoiceConnection] = None
|
|
570
|
+
manager: Optional[_MeetingManager] = None
|
|
571
|
+
|
|
572
|
+
try:
|
|
573
|
+
voice_conn = VoiceConnection(room=self.room, breakout_room=breakout_room)
|
|
574
|
+
logger.info(
|
|
575
|
+
"joining breakout %s (transcript=%s)",
|
|
576
|
+
breakout_room,
|
|
577
|
+
transcript_path,
|
|
578
|
+
)
|
|
579
|
+
await voice_conn.__aenter__()
|
|
580
|
+
|
|
581
|
+
livekit_room = voice_conn.livekit_room
|
|
582
|
+
if livekit_room is None:
|
|
583
|
+
raise RuntimeError("VoiceConnection did not return a LiveKit room")
|
|
584
|
+
|
|
585
|
+
await self._ensure_transcript_session(transcript_path)
|
|
586
|
+
|
|
587
|
+
async def _handle_transcript(
|
|
588
|
+
participant_id: str,
|
|
589
|
+
participant_name: str,
|
|
590
|
+
chat_ctx: llm.ChatContext,
|
|
591
|
+
new_message: llm.ChatMessage,
|
|
592
|
+
):
|
|
593
|
+
ts = await self._ensure_transcript_session(transcript_path)
|
|
594
|
+
stored = ts.append_segment(
|
|
595
|
+
participant_name=participant_name,
|
|
596
|
+
text=new_message.text_content,
|
|
597
|
+
participant_id=participant_id,
|
|
598
|
+
time=datetime.utcnow().isoformat() + "Z",
|
|
599
|
+
)
|
|
600
|
+
if not stored:
|
|
601
|
+
logger.debug(
|
|
602
|
+
"duplicate transcript skipped for %s", participant_name
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
manager = _MeetingManager(
|
|
606
|
+
room=livekit_room,
|
|
607
|
+
vad=_get_shared_vad(),
|
|
608
|
+
on_transcript=_handle_transcript,
|
|
609
|
+
)
|
|
610
|
+
manager.start()
|
|
611
|
+
|
|
612
|
+
remotes = getattr(livekit_room, "remote_participants", None) or ()
|
|
613
|
+
if isinstance(remotes, dict):
|
|
614
|
+
participant_iter: Iterable[rtc.RemoteParticipant] = remotes.values()
|
|
615
|
+
else:
|
|
616
|
+
participant_iter = remotes
|
|
617
|
+
|
|
618
|
+
participants = [p for p in participant_iter if p is not None]
|
|
619
|
+
|
|
620
|
+
for participant in participants:
|
|
621
|
+
try:
|
|
622
|
+
manager.on_participant_connected(participant)
|
|
623
|
+
except Exception as exc:
|
|
624
|
+
logger.error(
|
|
625
|
+
"failed starting session for %s: %s",
|
|
626
|
+
getattr(participant, "identity", "?"),
|
|
627
|
+
exc,
|
|
628
|
+
exc_info=exc,
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
session_state = _SessionState(
|
|
632
|
+
breakout_room=breakout_room,
|
|
633
|
+
transcript_path=transcript_path,
|
|
634
|
+
voice_conn=voice_conn,
|
|
635
|
+
manager=manager,
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
async with self._session_lock:
|
|
639
|
+
self._sessions[session_key] = session_state
|
|
640
|
+
self._pending_sessions.discard(session_key)
|
|
641
|
+
|
|
642
|
+
await self._update_status_entry(
|
|
643
|
+
session_key,
|
|
644
|
+
state="active",
|
|
645
|
+
breakout_room=breakout_room,
|
|
646
|
+
transcript_path=transcript_path,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
except asyncio.CancelledError:
|
|
650
|
+
async with self._session_lock:
|
|
651
|
+
self._pending_sessions.discard(session_key)
|
|
652
|
+
self._sessions.pop(session_key, None)
|
|
653
|
+
if manager:
|
|
654
|
+
try:
|
|
655
|
+
await manager.aclose()
|
|
656
|
+
except Exception as exc:
|
|
657
|
+
logger.error(
|
|
658
|
+
"failed closing cancelled transcriber: %s", exc, exc_info=exc
|
|
659
|
+
)
|
|
660
|
+
if voice_conn:
|
|
661
|
+
try:
|
|
662
|
+
await voice_conn.__aexit__(None, None, None)
|
|
663
|
+
except Exception as exc:
|
|
664
|
+
logger.error(
|
|
665
|
+
"failed disconnecting cancelled livekit: %s", exc, exc_info=exc
|
|
666
|
+
)
|
|
667
|
+
if self._sessions_using_path(transcript_path) == 0:
|
|
668
|
+
await self._release_transcript_session(transcript_path)
|
|
669
|
+
await self._update_status_entry(
|
|
670
|
+
session_key,
|
|
671
|
+
state="cancelled",
|
|
672
|
+
breakout_room=breakout_room,
|
|
673
|
+
transcript_path=transcript_path,
|
|
674
|
+
reason="cancelled",
|
|
675
|
+
)
|
|
676
|
+
raise
|
|
677
|
+
except Exception as exc:
|
|
678
|
+
async with self._session_lock:
|
|
679
|
+
self._pending_sessions.discard(session_key)
|
|
680
|
+
self._sessions.pop(session_key, None)
|
|
681
|
+
if manager:
|
|
682
|
+
try:
|
|
683
|
+
await manager.aclose()
|
|
684
|
+
except Exception as close_exc:
|
|
685
|
+
logger.error(
|
|
686
|
+
"failed closing multi-user transcriber: %s",
|
|
687
|
+
close_exc,
|
|
688
|
+
exc_info=close_exc,
|
|
689
|
+
)
|
|
690
|
+
if voice_conn:
|
|
691
|
+
try:
|
|
692
|
+
await voice_conn.__aexit__(None, None, None)
|
|
693
|
+
except Exception as disconnect_exc:
|
|
694
|
+
logger.error(
|
|
695
|
+
"failed disconnecting livekit: %s",
|
|
696
|
+
disconnect_exc,
|
|
697
|
+
exc_info=disconnect_exc,
|
|
698
|
+
)
|
|
699
|
+
if self._sessions_using_path(transcript_path) == 0:
|
|
700
|
+
await self._release_transcript_session(transcript_path)
|
|
701
|
+
await self._update_status_entry(
|
|
702
|
+
session_key,
|
|
703
|
+
state="error",
|
|
704
|
+
breakout_room=breakout_room,
|
|
705
|
+
transcript_path=transcript_path,
|
|
706
|
+
error=str(exc),
|
|
707
|
+
)
|
|
708
|
+
raise
|
|
709
|
+
else:
|
|
710
|
+
logger.info(
|
|
711
|
+
"ready to transcribe breakout=%s transcript=%s (participants=%d)",
|
|
712
|
+
breakout_room,
|
|
713
|
+
transcript_path,
|
|
714
|
+
len(participants),
|
|
715
|
+
)
|
|
716
|
+
if session_key in self._deferred_stop_keys:
|
|
717
|
+
reason = self._deferred_stop_reasons.pop(session_key, "stop_requested")
|
|
718
|
+
self._deferred_stop_keys.discard(session_key)
|
|
719
|
+
self._spawn_task(
|
|
720
|
+
self._stop_transcription_session(
|
|
721
|
+
session_key=session_key, reason=reason
|
|
722
|
+
)
|
|
723
|
+
)
|
|
724
|
+
finally:
|
|
725
|
+
async with self._session_lock:
|
|
726
|
+
self._pending_sessions.discard(session_key)
|
|
727
|
+
|
|
728
|
+
async def _stop_transcription_session(
|
|
729
|
+
self,
|
|
730
|
+
*,
|
|
731
|
+
session_key: SessionKey,
|
|
732
|
+
reason: str,
|
|
733
|
+
suppress_log: bool = False,
|
|
734
|
+
) -> None:
|
|
735
|
+
async with self._session_lock:
|
|
736
|
+
session = self._sessions.pop(session_key, None)
|
|
737
|
+
|
|
738
|
+
if not session:
|
|
739
|
+
await self._remove_status_entry(session_key)
|
|
740
|
+
return
|
|
741
|
+
|
|
742
|
+
breakout_room = session.breakout_room
|
|
743
|
+
transcript_path = session.transcript_path
|
|
744
|
+
|
|
745
|
+
await self._update_status_entry(
|
|
746
|
+
session_key,
|
|
747
|
+
state="closing",
|
|
748
|
+
breakout_room=breakout_room,
|
|
749
|
+
transcript_path=transcript_path,
|
|
750
|
+
reason=reason,
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
if session.manager:
|
|
754
|
+
try:
|
|
755
|
+
await session.manager.aclose()
|
|
756
|
+
except Exception as exc:
|
|
757
|
+
logger.error(
|
|
758
|
+
"failed closing multi-user transcriber: %s", exc, exc_info=exc
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
if session.voice_conn:
|
|
762
|
+
try:
|
|
763
|
+
await session.voice_conn.__aexit__(None, None, None)
|
|
764
|
+
except Exception as exc:
|
|
765
|
+
logger.error("failed disconnecting livekit: %s", exc, exc_info=exc)
|
|
766
|
+
|
|
767
|
+
if self._sessions_using_path(transcript_path) == 0:
|
|
768
|
+
await self._release_transcript_session(transcript_path)
|
|
769
|
+
|
|
770
|
+
await self._remove_status_entry(session_key)
|
|
771
|
+
self._deferred_stop_keys.discard(session_key)
|
|
772
|
+
self._deferred_stop_reasons.pop(session_key, None)
|
|
773
|
+
|
|
774
|
+
if not suppress_log:
|
|
775
|
+
logger.info(
|
|
776
|
+
"closed livekit connection (breakout=%s transcript=%s reason=%s)",
|
|
777
|
+
breakout_room,
|
|
778
|
+
transcript_path,
|
|
779
|
+
reason,
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
async def _stop_all_sessions(self, *, reason: str) -> None:
|
|
783
|
+
keys = list(self._sessions.keys())
|
|
784
|
+
for key in keys:
|
|
785
|
+
await self._stop_transcription_session(
|
|
786
|
+
session_key=key, reason=reason, suppress_log=True
|
|
787
|
+
)
|
|
788
|
+
|
|
789
|
+
pending = list(self._pending_sessions)
|
|
790
|
+
for key in pending:
|
|
791
|
+
self._deferred_stop_keys.add(key)
|
|
792
|
+
self._deferred_stop_reasons[key] = reason
|
|
793
|
+
entry = self._status_entries.get(key)
|
|
794
|
+
if entry:
|
|
795
|
+
await self._update_status_entry(
|
|
796
|
+
key,
|
|
797
|
+
state="closing",
|
|
798
|
+
breakout_room=entry["breakout_room"],
|
|
799
|
+
transcript_path=entry["transcript_path"],
|
|
800
|
+
reason=reason,
|
|
801
|
+
)
|
|
802
|
+
task = self._session_tasks_by_key.get(key)
|
|
803
|
+
if task:
|
|
804
|
+
task.cancel()
|
|
805
|
+
|
|
806
|
+
async def stop(self):
|
|
807
|
+
await self._toolkit.stop()
|
|
808
|
+
await self._stop_all_sessions(reason="agent_stop")
|
|
809
|
+
if self._tasks:
|
|
810
|
+
await asyncio.gather(*list(self._tasks), return_exceptions=True)
|
|
811
|
+
self._status_entries.clear()
|
|
812
|
+
await self._publish_status_attribute()
|
|
813
|
+
await super().stop()
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
class TranscriptRegistry(SchemaRegistry):
|
|
817
|
+
def __init__(self):
|
|
818
|
+
name = "transcript"
|
|
819
|
+
super().__init__(
|
|
820
|
+
name=f"meshagent.schema.{name}",
|
|
821
|
+
validate_webhook_secret=False,
|
|
822
|
+
schemas=[SchemaRegistration(name=name, schema=transcript_schema)],
|
|
823
|
+
)
|
meshagent/livekit/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.6.0"
|
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: meshagent-livekit
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Livekit support for Meshagent
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
Project-URL: Documentation, https://docs.meshagent.com
|
|
7
7
|
Project-URL: Website, https://www.meshagent.com
|
|
8
8
|
Project-URL: Source, https://www.meshagent.com
|
|
9
|
-
Requires-Python: >=3.
|
|
9
|
+
Requires-Python: >=3.13
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE
|
|
12
12
|
Requires-Dist: pytest~=8.4
|
|
13
13
|
Requires-Dist: pytest-asyncio~=0.26
|
|
14
14
|
Requires-Dist: strip-markdown~=1.3
|
|
15
15
|
Requires-Dist: livekit-api>=1.0
|
|
16
|
-
Requires-Dist: livekit-agents~=1.
|
|
17
|
-
Requires-Dist: livekit-plugins-openai~=1.
|
|
18
|
-
Requires-Dist: livekit-plugins-silero~=1.
|
|
19
|
-
Requires-Dist: livekit-plugins-turn-detector~=1.
|
|
20
|
-
Requires-Dist: meshagent-api~=0.
|
|
21
|
-
Requires-Dist: meshagent-tools~=0.
|
|
16
|
+
Requires-Dist: livekit-agents~=1.2
|
|
17
|
+
Requires-Dist: livekit-plugins-openai~=1.2
|
|
18
|
+
Requires-Dist: livekit-plugins-silero~=1.2
|
|
19
|
+
Requires-Dist: livekit-plugins-turn-detector~=1.2
|
|
20
|
+
Requires-Dist: meshagent-api~=0.6.0
|
|
21
|
+
Requires-Dist: meshagent-tools~=0.6.0
|
|
22
22
|
Dynamic: license-file
|
|
23
23
|
|
|
24
24
|
# [Meshagent](https://www.meshagent.com)
|
|
@@ -27,7 +27,7 @@ Dynamic: license-file
|
|
|
27
27
|
The ``meshagent.livekit`` package equips agents with real-time audio and voice capabilities via the LiveKit SDK.
|
|
28
28
|
|
|
29
29
|
### VoiceBot
|
|
30
|
-
The ``VoiceBot`` agent handles two-way voice conversations allowing users to interact with the agent verbally. Agents based on the ``VoiceBot`` class can be given the same tools as ``ChatBot`` based agents. This means you only need to write a tool once and the same tool can be used across both text and voice based agents. Check out the [Build
|
|
30
|
+
The ``VoiceBot`` agent handles two-way voice conversations allowing users to interact with the agent verbally. Agents based on the ``VoiceBot`` class can be given the same tools as ``ChatBot`` based agents. This means you only need to write a tool once and the same tool can be used across both text and voice based agents. Check out the [Build a Voice Agent](https://docs.meshagent.com/agents/standard/buildanddeployvoicebot) example to learn how to create a simple Voice Agent without tools then add built-in MeshAgent tools and custom tools to the agent.
|
|
31
31
|
|
|
32
32
|
---
|
|
33
33
|
### Learn more about MeshAgent on our website or check out the docs for additional examples!
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
meshagent/livekit/__init__.py,sha256=X78Z4yEg5XfkNKH0HiIdG4k1q5ktB-ampTuXHLNFrAw,58
|
|
2
|
+
meshagent/livekit/livekit_protocol.py,sha256=5Zu4ymLWEGt5SGXLNu94gOeyjnjhaV6uTS2FhSdODqs,1470
|
|
3
|
+
meshagent/livekit/livekit_protocol_test.py,sha256=o7yYxXad4tMazcxFkq44yW-A9tJ0Lk6WdZpG5ifxcU4,2980
|
|
4
|
+
meshagent/livekit/version.py,sha256=cID1jLnC_vj48GgMN6Yb1FA3JsQ95zNmCHmRYE8TFhY,22
|
|
5
|
+
meshagent/livekit/agents/meeting_transcriber.py,sha256=UFJaMR9-FDL6kE_2sjSvKNUSJQ5yEKATsHvL2047yew,29039
|
|
6
|
+
meshagent/livekit/agents/transcriber.py,sha256=S992oVVBt3ShWDQQWprLjyl6Yh0hyNRd8d3qCmg_toU,5795
|
|
7
|
+
meshagent/livekit/agents/voice.py,sha256=STgjMSqzUgV9UAmleOy1vkgRXP93MDSYgiOO6Lo0peU,11964
|
|
8
|
+
meshagent_livekit-0.6.0.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
|
|
9
|
+
meshagent_livekit-0.6.0.dist-info/METADATA,sha256=znxygEPylhD8Ol9KMxJH9p3IPUHJO_fyJdjOVk_mvTY,1749
|
|
10
|
+
meshagent_livekit-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
meshagent_livekit-0.6.0.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
|
|
12
|
+
meshagent_livekit-0.6.0.dist-info/RECORD,,
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
meshagent/livekit/__init__.py,sha256=X78Z4yEg5XfkNKH0HiIdG4k1q5ktB-ampTuXHLNFrAw,58
|
|
2
|
-
meshagent/livekit/livekit_protocol.py,sha256=5Zu4ymLWEGt5SGXLNu94gOeyjnjhaV6uTS2FhSdODqs,1470
|
|
3
|
-
meshagent/livekit/livekit_protocol_test.py,sha256=o7yYxXad4tMazcxFkq44yW-A9tJ0Lk6WdZpG5ifxcU4,2980
|
|
4
|
-
meshagent/livekit/version.py,sha256=L6LoKMlJx-n68Agaz17S4PydD7S5Z4lwW3o79dy3l1c,23
|
|
5
|
-
meshagent/livekit/agents/transcriber.py,sha256=S992oVVBt3ShWDQQWprLjyl6Yh0hyNRd8d3qCmg_toU,5795
|
|
6
|
-
meshagent/livekit/agents/voice.py,sha256=STgjMSqzUgV9UAmleOy1vkgRXP93MDSYgiOO6Lo0peU,11964
|
|
7
|
-
meshagent_livekit-0.5.15.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
|
|
8
|
-
meshagent_livekit-0.5.15.dist-info/METADATA,sha256=UYlIztPHfXX6T6lVeQhLM_HD6W3Q6WtxLsDvwZPD9kE,1763
|
|
9
|
-
meshagent_livekit-0.5.15.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
10
|
-
meshagent_livekit-0.5.15.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
|
|
11
|
-
meshagent_livekit-0.5.15.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|