meshagent-livekit 0.6.0__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of meshagent-livekit might be problematic. Click here for more details.

Files changed (21) hide show
  1. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/CHANGELOG.md +6 -0
  2. {meshagent_livekit-0.6.0/meshagent_livekit.egg-info → meshagent_livekit-0.6.2}/PKG-INFO +3 -3
  3. meshagent_livekit-0.6.2/meshagent/livekit/agents/meeting_transcriber.py +294 -0
  4. meshagent_livekit-0.6.2/meshagent/livekit/version.py +1 -0
  5. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2/meshagent_livekit.egg-info}/PKG-INFO +3 -3
  6. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/meshagent_livekit.egg-info/requires.txt +2 -2
  7. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/pyproject.toml +2 -2
  8. meshagent_livekit-0.6.0/meshagent/livekit/agents/meeting_transcriber.py +0 -823
  9. meshagent_livekit-0.6.0/meshagent/livekit/version.py +0 -1
  10. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/LICENSE +0 -0
  11. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/MANIFEST.in +0 -0
  12. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/README.md +0 -0
  13. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/meshagent/livekit/__init__.py +0 -0
  14. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/meshagent/livekit/agents/transcriber.py +0 -0
  15. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/meshagent/livekit/agents/voice.py +0 -0
  16. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/meshagent/livekit/livekit_protocol.py +0 -0
  17. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/meshagent/livekit/livekit_protocol_test.py +0 -0
  18. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/meshagent_livekit.egg-info/SOURCES.txt +0 -0
  19. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/meshagent_livekit.egg-info/dependency_links.txt +0 -0
  20. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/meshagent_livekit.egg-info/top_level.txt +0 -0
  21. {meshagent_livekit-0.6.0 → meshagent_livekit-0.6.2}/setup.cfg +0 -0
@@ -1,3 +1,9 @@
1
+ ## [0.6.2]
2
+ - Stability
3
+
4
+ ## [0.6.1]
5
+ - Stability
6
+
1
7
  ## [0.6.0]
2
8
  - Stability
3
9
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: meshagent-livekit
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: Livekit support for Meshagent
5
5
  License-Expression: Apache-2.0
6
6
  Project-URL: Documentation, https://docs.meshagent.com
@@ -17,8 +17,8 @@ Requires-Dist: livekit-agents~=1.2
17
17
  Requires-Dist: livekit-plugins-openai~=1.2
18
18
  Requires-Dist: livekit-plugins-silero~=1.2
19
19
  Requires-Dist: livekit-plugins-turn-detector~=1.2
20
- Requires-Dist: meshagent-api~=0.6.0
21
- Requires-Dist: meshagent-tools~=0.6.0
20
+ Requires-Dist: meshagent-api~=0.6.2
21
+ Requires-Dist: meshagent-tools~=0.6.2
22
22
  Dynamic: license-file
23
23
 
24
24
  # [Meshagent](https://www.meshagent.com)
@@ -0,0 +1,294 @@
1
+ import asyncio
2
+ import logging
3
+
4
+ from typing import Optional
5
+
6
+ from datetime import datetime, timezone
7
+ from livekit import rtc
8
+ from livekit.agents import (
9
+ Agent,
10
+ AgentSession,
11
+ RoomInputOptions,
12
+ RoomIO,
13
+ RoomOutputOptions,
14
+ StopResponse,
15
+ llm,
16
+ utils,
17
+ )
18
+ from livekit.plugins import openai, silero
19
+ from meshagent.api import MeshDocument, SchemaRegistration, SchemaRegistry
20
+ from meshagent.agents import SingleRoomAgent
21
+ from meshagent.tools import RemoteToolkit, ToolContext, Tool
22
+ from meshagent.api.room_server_client import Requirement
23
+ from meshagent.livekit.agents.voice import VoiceConnection
24
+ from meshagent.agents.schemas.transcript import transcript_schema
25
+
26
+ logger = logging.getLogger("meeting_transcriber")
27
+
28
+
29
+ class StartTranscriptionTool(Tool):
30
+ def __init__(self, *, transcriber: "MeetingTranscriber"):
31
+ self.transcriber = transcriber
32
+ super().__init__(
33
+ name="start_transcription",
34
+ input_schema={
35
+ "type": "object",
36
+ "required": [
37
+ "breakout_room",
38
+ "path",
39
+ ],
40
+ "additionalProperties": False,
41
+ "properties": {
42
+ "breakout_room": {
43
+ "type": "string",
44
+ },
45
+ "path": {
46
+ "type": "string",
47
+ },
48
+ },
49
+ },
50
+ )
51
+
52
+ async def execute(self, context: ToolContext, *, breakout_room: str, path: str):
53
+ await self.transcriber.start_transcription(
54
+ breakout_room=breakout_room, path=path
55
+ )
56
+ return {"status": "started"}
57
+
58
+
59
+ class StopTranscriptionTool(Tool):
60
+ def __init__(self, *, transcriber: "MeetingTranscriber"):
61
+ self.transcriber = transcriber
62
+ super().__init__(
63
+ name="stop_transcription",
64
+ input_schema={
65
+ "type": "object",
66
+ "required": [
67
+ "breakout_room",
68
+ ],
69
+ "additionalProperties": False,
70
+ "properties": {
71
+ "breakout_room": {
72
+ "type": "string",
73
+ },
74
+ },
75
+ },
76
+ )
77
+
78
+ async def execute(self, context: ToolContext, *, breakout_room: str):
79
+ await self.transcriber.stop_transcription(
80
+ breakout_room=breakout_room,
81
+ )
82
+ return {"status": "stopped"}
83
+
84
+
85
+ class MeetingTranscriber(SingleRoomAgent):
86
+ def __init__(self, name: str, requires: Optional[list[Requirement]] = None):
87
+ super().__init__(
88
+ name=name,
89
+ requires=requires,
90
+ )
91
+ self._toolkit = RemoteToolkit(
92
+ name="transcription",
93
+ tools=[
94
+ StartTranscriptionTool(transcriber=self),
95
+ StopTranscriptionTool(transcriber=self),
96
+ ],
97
+ )
98
+ self._vad = None
99
+ self._transcription_tasks = dict[str, tuple[asyncio.Task, asyncio.Future]]()
100
+
101
+ async def start(self, *, room):
102
+ await super().start(room=room)
103
+ await self._toolkit.start(room=room)
104
+ await room.local_participant.set_attribute("supports_voice", True)
105
+ await room.messaging.enable()
106
+
107
+ self._vad = silero.VAD.load()
108
+
109
+ async def start_transcription(self, *, breakout_room: Optional[str], path: str):
110
+ stop_fut = asyncio.Future()
111
+
112
+ async def transcribe():
113
+ await self.room.local_participant.set_attribute(
114
+ f"transcribing.{breakout_room}", True
115
+ )
116
+
117
+ try:
118
+ async with VoiceConnection(
119
+ room=self.room, breakout_room=breakout_room
120
+ ) as conn:
121
+ doc = await self.room.sync.open(path=path, create=True)
122
+
123
+ transcriber = MultiUserTranscriber(conn, doc, self._vad)
124
+ transcriber.start()
125
+
126
+ for participant in conn.livekit_room.remote_participants.values():
127
+ # handle all existing participants
128
+ transcriber.on_participant_connected(participant)
129
+
130
+ await stop_fut
131
+
132
+ await self.room.local_participant.set_attribute(
133
+ f"transcribing.{breakout_room}", False
134
+ )
135
+
136
+ await self.room.sync.close(path=path)
137
+
138
+ await transcriber.aclose()
139
+ except Exception as ex:
140
+ logger.error(f"error during transcription {ex}", exc_info=ex)
141
+ pass
142
+
143
+ await self.room.local_participant.set_attribute(
144
+ f"transcribing.{breakout_room}", False
145
+ )
146
+
147
+ if breakout_room not in self._transcription_tasks:
148
+ self._transcription_tasks[breakout_room] = (
149
+ asyncio.create_task(transcribe()),
150
+ stop_fut,
151
+ )
152
+
153
+ async def stop_transcription(self, *, breakout_room: Optional[str]):
154
+ if breakout_room in self._transcription_tasks:
155
+ task, fut = self._transcription_tasks.pop(breakout_room)
156
+ fut.set_result(True)
157
+ await asyncio.gather(task)
158
+
159
+ async def stop(self):
160
+ await self._toolkit.stop()
161
+
162
+ tasks = []
163
+ for breakout_room, _ in self._transcription_tasks.items():
164
+ task, fut = self._transcription_tasks.pop(breakout_room)
165
+ fut.set_result(True)
166
+ tasks.append(task)
167
+
168
+ await asyncio.gather(*tasks)
169
+ await super().stop()
170
+
171
+
172
+ class TranscriptRegistry(SchemaRegistry):
173
+ def __init__(self):
174
+ name = "transcript"
175
+ super().__init__(
176
+ name=f"meshagent.schema.{name}",
177
+ validate_webhook_secret=False,
178
+ schemas=[SchemaRegistration(name=name, schema=transcript_schema)],
179
+ )
180
+
181
+
182
+ class Transcriber(Agent):
183
+ def __init__(self, *, participant, doc: MeshDocument):
184
+ super().__init__(
185
+ instructions="not-needed",
186
+ stt=openai.STT(),
187
+ )
188
+ self.doc = doc
189
+ self.participant = participant
190
+
191
+ async def on_user_turn_completed(
192
+ self, chat_ctx: llm.ChatContext, new_message: llm.ChatMessage
193
+ ):
194
+ segments = self.doc.root
195
+ segments.append_child(
196
+ "segment",
197
+ {
198
+ "text": new_message.text_content,
199
+ "participant_name": self.participant.name,
200
+ "participant_id": self.participant.sid,
201
+ "time": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
202
+ },
203
+ )
204
+ raise StopResponse()
205
+
206
+
207
+ class MultiUserTranscriber:
208
+ def __init__(self, ctx: VoiceConnection, doc: MeshDocument, vad):
209
+ self.ctx = ctx
210
+ self.doc = doc
211
+ self.vad = vad
212
+ self._sessions: dict[str, AgentSession] = {}
213
+ self._tasks: set[asyncio.Task] = set()
214
+
215
+ def start(self):
216
+ self.ctx.livekit_room.on("participant_connected", self.on_participant_connected)
217
+ self.ctx.livekit_room.on(
218
+ "participant_disconnected", self.on_participant_disconnected
219
+ )
220
+
221
+ async def aclose(self):
222
+ await utils.aio.cancel_and_wait(*self._tasks)
223
+
224
+ await asyncio.gather(
225
+ *[self._close_session(session) for session in self._sessions.values()]
226
+ )
227
+
228
+ self.ctx.livekit_room.off(
229
+ "participant_connected", self.on_participant_connected
230
+ )
231
+ self.ctx.livekit_room.off(
232
+ "participant_disconnected", self.on_participant_disconnected
233
+ )
234
+
235
+ def on_participant_connected(self, participant: rtc.RemoteParticipant):
236
+ if participant.identity in self._sessions:
237
+ return
238
+
239
+ logger.info(f"starting session for {participant.identity}")
240
+ task = asyncio.create_task(self._start_session(participant))
241
+ self._tasks.add(task)
242
+
243
+ def on_task_done(task: asyncio.Task):
244
+ try:
245
+ self._sessions[participant.identity] = task.result()
246
+ finally:
247
+ self._tasks.discard(task)
248
+
249
+ task.add_done_callback(on_task_done)
250
+
251
+ def on_participant_disconnected(self, participant: rtc.RemoteParticipant):
252
+ if (session := self._sessions.pop(participant.identity)) is None:
253
+ return
254
+
255
+ logger.info(f"closing session for {participant.identity}")
256
+ task = asyncio.create_task(self._close_session(session))
257
+ self._tasks.add(task)
258
+ task.add_done_callback(lambda _: self._tasks.discard(task))
259
+
260
+ async def _start_session(self, participant: rtc.RemoteParticipant) -> AgentSession:
261
+ if participant.identity in self._sessions:
262
+ return self._sessions[participant.identity]
263
+
264
+ session = AgentSession(
265
+ vad=self.vad,
266
+ )
267
+ room_io = RoomIO(
268
+ agent_session=session,
269
+ room=self.ctx.livekit_room,
270
+ participant=participant,
271
+ input_options=RoomInputOptions(
272
+ # text input is not supported for multiple room participants
273
+ # if needed, register the text stream handler by yourself
274
+ # and route the text to different sessions based on the participant identity
275
+ text_enabled=False,
276
+ delete_room_on_close=False,
277
+ ),
278
+ output_options=RoomOutputOptions(
279
+ transcription_enabled=True,
280
+ audio_enabled=False,
281
+ ),
282
+ )
283
+ await room_io.start()
284
+ await session.start(
285
+ agent=Transcriber(
286
+ participant=participant,
287
+ doc=self.doc,
288
+ )
289
+ )
290
+ return session
291
+
292
+ async def _close_session(self, sess: AgentSession) -> None:
293
+ await sess.drain()
294
+ await sess.aclose()
@@ -0,0 +1 @@
1
+ __version__ = "0.6.2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: meshagent-livekit
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: Livekit support for Meshagent
5
5
  License-Expression: Apache-2.0
6
6
  Project-URL: Documentation, https://docs.meshagent.com
@@ -17,8 +17,8 @@ Requires-Dist: livekit-agents~=1.2
17
17
  Requires-Dist: livekit-plugins-openai~=1.2
18
18
  Requires-Dist: livekit-plugins-silero~=1.2
19
19
  Requires-Dist: livekit-plugins-turn-detector~=1.2
20
- Requires-Dist: meshagent-api~=0.6.0
21
- Requires-Dist: meshagent-tools~=0.6.0
20
+ Requires-Dist: meshagent-api~=0.6.2
21
+ Requires-Dist: meshagent-tools~=0.6.2
22
22
  Dynamic: license-file
23
23
 
24
24
  # [Meshagent](https://www.meshagent.com)
@@ -6,5 +6,5 @@ livekit-agents~=1.2
6
6
  livekit-plugins-openai~=1.2
7
7
  livekit-plugins-silero~=1.2
8
8
  livekit-plugins-turn-detector~=1.2
9
- meshagent-api~=0.6.0
10
- meshagent-tools~=0.6.0
9
+ meshagent-api~=0.6.2
10
+ meshagent-tools~=0.6.2
@@ -18,8 +18,8 @@ dependencies = [
18
18
  "livekit-plugins-openai~=1.2",
19
19
  "livekit-plugins-silero~=1.2",
20
20
  "livekit-plugins-turn-detector~=1.2",
21
- "meshagent-api~=0.6.0",
22
- "meshagent-tools~=0.6.0"
21
+ "meshagent-api~=0.6.2",
22
+ "meshagent-tools~=0.6.2"
23
23
  ]
24
24
 
25
25
  [project.urls]
@@ -1,823 +0,0 @@
1
- import asyncio
2
- import logging
3
- import os
4
- from datetime import datetime
5
- from dataclasses import dataclass
6
- from typing import Any, Awaitable, Callable, Coroutine, Iterable, Optional
7
-
8
- from livekit import rtc
9
- from livekit.agents import (
10
- Agent,
11
- AgentSession,
12
- RoomInputOptions,
13
- RoomIO,
14
- RoomOutputOptions,
15
- StopResponse,
16
- llm,
17
- utils,
18
- )
19
- from livekit.plugins import openai, silero
20
- from meshagent.api import MeshDocument, SchemaRegistration, SchemaRegistry
21
- from meshagent.agents import SingleRoomAgent
22
- from meshagent.tools import RemoteToolkit, ToolContext, Tool
23
- from meshagent.api.room_server_client import Requirement
24
- from meshagent.livekit.agents.voice import VoiceConnection
25
- from meshagent.agents.schemas.transcript import transcript_schema
26
-
27
- logger = logging.getLogger("meeting_transcriber")
28
-
29
- _shared_vad = None
30
-
31
-
32
- @dataclass
33
- class TranscriptSession:
34
- document: MeshDocument
35
-
36
- def append_segment(
37
- self,
38
- *,
39
- participant_name: str,
40
- text: str,
41
- participant_id: Optional[str] = None,
42
- time: Optional[str] = None,
43
- ) -> bool:
44
- normalized_text = (text or "").strip()
45
- if not normalized_text:
46
- return False
47
-
48
- normalized_name = (participant_name or "").strip()
49
- alias_set: set[str] = set()
50
- if normalized_name:
51
- alias_set.add(normalized_name)
52
- if participant_id:
53
- alias_set.add(str(participant_id).strip())
54
- alias_set = {a for a in alias_set if a}
55
- if not alias_set:
56
- alias_set.add("")
57
-
58
- payload = {"participant_name": normalized_name, "text": normalized_text}
59
- if participant_id:
60
- payload["participant_id"] = participant_id
61
- if time:
62
- payload["time"] = time
63
-
64
- self.document.root.append_child("segment", payload)
65
- return True
66
-
67
-
68
- def _get_shared_vad():
69
- global _shared_vad
70
- if _shared_vad is None:
71
- _shared_vad = silero.VAD.load()
72
- return _shared_vad
73
-
74
-
75
- class _ParticipantTranscriber(Agent):
76
- def __init__(
77
- self,
78
- *,
79
- participant_name: str,
80
- participant_id: Optional[str],
81
- on_transcript: Optional[Callable[..., Awaitable[None]]] = None,
82
- ):
83
- super().__init__(
84
- instructions="not-needed",
85
- stt=openai.STT(),
86
- )
87
- self._participant_name = participant_name or "unknown participant"
88
- self._participant_id = participant_id
89
- self._on_transcript = on_transcript
90
-
91
- async def on_user_turn_completed(
92
- self, chat_ctx: llm.ChatContext, new_message: llm.ChatMessage
93
- ):
94
- user_transcript = new_message.text_content
95
- logger.info("%s -> %s", self._participant_name, user_transcript)
96
- await self._on_transcript(
97
- participant_id=self._participant_id,
98
- participant_name=self._participant_name,
99
- chat_ctx=chat_ctx,
100
- new_message=new_message,
101
- )
102
- raise StopResponse()
103
-
104
-
105
- class _MeetingManager:
106
- def __init__(
107
- self,
108
- *,
109
- room: rtc.Room,
110
- vad,
111
- on_transcript: Optional[Callable[..., Awaitable[None]]] = None,
112
- ):
113
- self.room = room
114
- self._vad = vad
115
- self._sessions: dict[str, AgentSession] = {}
116
- self._tasks: set[asyncio.Task] = set()
117
- self._closed = False
118
- self._on_transcript = on_transcript
119
-
120
- @staticmethod
121
- def _resolve_participant_identity(
122
- participant: rtc.RemoteParticipant,
123
- ) -> tuple[str, Optional[str]]:
124
- return (
125
- participant.name,
126
- participant.identity,
127
- )
128
-
129
- def start(self):
130
- self.room.on("participant_connected", self.on_participant_connected)
131
- self.room.on("participant_disconnected", self.on_participant_disconnected)
132
-
133
- async def aclose(self):
134
- self._closed = True
135
- await utils.aio.cancel_and_wait(*self._tasks)
136
-
137
- await asyncio.gather(
138
- *[self._close_session(session) for session in self._sessions.values()]
139
- )
140
-
141
- self.room.off("participant_connected", self.on_participant_connected)
142
- self.room.off("participant_disconnected", self.on_participant_disconnected)
143
-
144
- def on_participant_connected(self, participant: rtc.RemoteParticipant):
145
- if participant.identity in self._sessions:
146
- return
147
-
148
- if self._closed:
149
- logger.debug(
150
- "ignoring connect for %s because transcriber is closed",
151
- getattr(participant, "identity", None),
152
- )
153
- return
154
-
155
- readable_name, participant_id = self._resolve_participant_identity(participant)
156
- kind = getattr(participant, "kind", None)
157
- logger.info("participant connected: %s (kind=%s)", readable_name, kind)
158
-
159
- task = asyncio.create_task(self._start_session(participant))
160
- self._tasks.add(task)
161
-
162
- def on_task_done(task: asyncio.Task):
163
- try:
164
- self._sessions[participant.identity] = task.result()
165
- finally:
166
- self._tasks.discard(task)
167
-
168
- task.add_done_callback(on_task_done)
169
-
170
- def on_participant_disconnected(self, participant: rtc.RemoteParticipant):
171
- readable_name, participant_id = self._resolve_participant_identity(participant)
172
- logger.info("participant disconnected: %s", readable_name)
173
- session = self._sessions.pop(participant.identity, None)
174
- if session is None:
175
- return
176
-
177
- logger.info("closing session for %s", participant_id or readable_name)
178
- task = asyncio.create_task(self._close_session(session))
179
- self._tasks.add(task)
180
- task.add_done_callback(lambda _: self._tasks.discard(task))
181
-
182
- async def _start_session(self, participant: rtc.RemoteParticipant) -> AgentSession:
183
- if participant.identity in self._sessions:
184
- return self._sessions[participant.identity]
185
-
186
- display_name, participant_id = self._resolve_participant_identity(participant)
187
- logger.debug("creating session for %s (id=%s)", display_name, participant_id)
188
-
189
- session = AgentSession(
190
- vad=self._vad,
191
- )
192
-
193
- room_io = RoomIO(
194
- agent_session=session,
195
- room=self.room,
196
- participant=participant,
197
- input_options=RoomInputOptions(
198
- # text input is not supported for multiple room participants
199
- # if needed, register the text stream handler by yourself
200
- # and route the text to different sessions based on the participant identity
201
- text_enabled=False,
202
- ),
203
- output_options=RoomOutputOptions(
204
- transcription_enabled=True,
205
- audio_enabled=False,
206
- ),
207
- )
208
- await room_io.start()
209
- agent = _ParticipantTranscriber(
210
- participant_name=display_name,
211
- participant_id=participant_id,
212
- on_transcript=self._on_transcript,
213
- )
214
- await session.start(agent=agent)
215
- return session
216
-
217
- async def _close_session(self, sess: AgentSession) -> None:
218
- await sess.drain()
219
- await sess.aclose()
220
-
221
-
222
- SessionKey = tuple[str, str]
223
-
224
-
225
- @dataclass
226
- class _SessionState:
227
- breakout_room: Optional[str]
228
- transcript_path: str
229
- voice_conn: VoiceConnection
230
- manager: _MeetingManager
231
-
232
-
233
- class StartTranscriptionTool(Tool):
234
- def __init__(self, *, transcriber: "MeetingTranscriber"):
235
- self.transcriber = transcriber
236
- super().__init__(
237
- name="start_transcription",
238
- input_schema={
239
- "type": "object",
240
- "required": [
241
- "breakout_room",
242
- "path",
243
- ],
244
- "additionalProperties": False,
245
- "properties": {
246
- "breakout_room": {
247
- "type": "string",
248
- },
249
- "path": {
250
- "type": "string",
251
- },
252
- },
253
- },
254
- )
255
-
256
- async def execute(self, context: ToolContext, *, breakout_room: str, path: str):
257
- await self.transcriber.start_transcription(
258
- breakout_room=breakout_room, path=path
259
- )
260
- return {"status": "started"}
261
-
262
-
263
- class StopTranscriptionTool(Tool):
264
- def __init__(self, *, transcriber: "MeetingTranscriber"):
265
- self.transcriber = transcriber
266
- super().__init__(
267
- name="stop_transcription",
268
- input_schema={
269
- "type": "object",
270
- "required": [
271
- "breakout_room",
272
- "path",
273
- ],
274
- "additionalProperties": False,
275
- "properties": {
276
- "breakout_room": {
277
- "type": "string",
278
- },
279
- "path": {
280
- "type": "string",
281
- },
282
- },
283
- },
284
- )
285
-
286
- async def execute(self, context: ToolContext, *, breakout_room: str, path: str):
287
- await self.transcriber.stop_transcription(
288
- breakout_room=breakout_room, path=path
289
- )
290
- return {"status": "stopped"}
291
-
292
-
293
- class MeetingTranscriber(SingleRoomAgent):
294
- _STATUS_ATTRIBUTE_KEY = "transcriber_status"
295
-
296
- def __init__(self, name: str, requires: Optional[list[Requirement]] = None):
297
- super().__init__(
298
- name=name,
299
- requires=requires,
300
- )
301
- self._sessions: dict[SessionKey, _SessionState] = {}
302
- self._pending_sessions: set[SessionKey] = set()
303
- self._transcript_sessions: dict[str, TranscriptSession] = {}
304
- self._status_entries: dict[SessionKey, dict[str, Any]] = {}
305
- self._tasks: set[asyncio.Task] = set()
306
- self._session_tasks_by_key: dict[SessionKey, asyncio.Task] = {}
307
- self._deferred_stop_keys: set[SessionKey] = set()
308
- self._deferred_stop_reasons: dict[SessionKey, str] = {}
309
- self._session_lock = asyncio.Lock()
310
- self._toolkit = RemoteToolkit(
311
- name="transcription",
312
- tools=[
313
- StartTranscriptionTool(transcriber=self),
314
- StopTranscriptionTool(transcriber=self),
315
- ],
316
- )
317
-
318
- @staticmethod
319
- def _make_session_key(breakout_room: str, transcript_path: str) -> SessionKey:
320
- return (breakout_room, transcript_path)
321
-
322
- def _sessions_using_path(self, path: str) -> int:
323
- return sum(
324
- 1 for session in self._sessions.values() if session.transcript_path == path
325
- )
326
-
327
- async def _publish_status_attribute(self) -> None:
328
- if not getattr(self, "room", None):
329
- return
330
- payload: dict[str, dict[str, Any]] = {}
331
- for key in sorted(self._status_entries):
332
- entry = self._status_entries[key]
333
- breakout_room, transcript_path = key
334
- entry_payload = dict(entry)
335
- entry_payload.pop("breakout_room", None)
336
- breakout_bucket = payload.setdefault(breakout_room, {})
337
- breakout_bucket[transcript_path] = entry_payload
338
- try:
339
- await self.room.local_participant.set_attribute(
340
- self._STATUS_ATTRIBUTE_KEY,
341
- payload,
342
- )
343
- except (
344
- Exception
345
- ) as exc: # pragma: no cover - attribute updates should not break flow
346
- logger.warning(
347
- "failed to update transcriber status attribute: %s", exc, exc_info=exc
348
- )
349
-
350
- async def _update_status_entry(
351
- self,
352
- session_key: SessionKey,
353
- *,
354
- state: str,
355
- breakout_room: Optional[str],
356
- transcript_path: str,
357
- reason: Optional[str] = None,
358
- error: Optional[str] = None,
359
- ) -> None:
360
- entry = dict(self._status_entries.get(session_key, {}))
361
- now = datetime.utcnow().isoformat() + "Z"
362
- if "created_at" not in entry:
363
- entry["created_at"] = now
364
- entry.update(
365
- {
366
- "session_id": f"{session_key[0]}::{session_key[1]}",
367
- "state": state,
368
- "active": state == "active",
369
- "breakout_room": breakout_room,
370
- "transcript_path": transcript_path,
371
- "updated_at": now,
372
- }
373
- )
374
- if reason is not None:
375
- entry["reason"] = reason
376
- else:
377
- entry.pop("reason", None)
378
- if error is not None:
379
- entry["error"] = error
380
- else:
381
- entry.pop("error", None)
382
- self._status_entries[session_key] = entry
383
- await self._publish_status_attribute()
384
-
385
- async def _remove_status_entry(self, session_key: SessionKey) -> None:
386
- if session_key in self._status_entries:
387
- self._status_entries.pop(session_key, None)
388
- await self._publish_status_attribute()
389
-
390
- def _spawn_task(
391
- self,
392
- coro: Coroutine[Any, Any, Any],
393
- *,
394
- session_key: Optional[SessionKey] = None,
395
- ) -> None:
396
- task = asyncio.create_task(coro)
397
- self._tasks.add(task)
398
- if session_key is not None:
399
- self._session_tasks_by_key[session_key] = task
400
-
401
- def _on_done(t: asyncio.Task):
402
- self._tasks.discard(t)
403
- if session_key is not None:
404
- self._session_tasks_by_key.pop(session_key, None)
405
- try:
406
- t.result()
407
- except asyncio.CancelledError:
408
- logger.debug("transcriber task cancelled")
409
- except Exception as exc:
410
- logger.error("transcriber task failed: %s", exc, exc_info=exc)
411
-
412
- task.add_done_callback(_on_done)
413
-
414
- @staticmethod
415
- def _default_transcript_document_path() -> str:
416
- env_path = os.getenv("TRANSCRIPT_DOCUMENT_PATH", "").strip()
417
- return env_path or "transcript.transcript"
418
-
419
- async def _release_transcript_session(self, path: str) -> None:
420
- ts = self._transcript_sessions.pop(path, None)
421
- if not ts:
422
- return
423
- doc = ts.document
424
- close_coro = getattr(doc, "aclose", None)
425
- if callable(close_coro):
426
- try:
427
- await close_coro()
428
- return
429
- except Exception as exc:
430
- logger.debug(
431
- "failed to aclose transcript document: %s", exc, exc_info=exc
432
- )
433
- close_fn = getattr(doc, "close", None)
434
- if callable(close_fn):
435
- try:
436
- maybe = close_fn()
437
- if asyncio.iscoroutine(maybe):
438
- await maybe
439
- except Exception as exc:
440
- logger.debug(
441
- "failed to close transcript document: %s", exc, exc_info=exc
442
- )
443
-
444
- async def _ensure_transcript_session(self, path: str) -> TranscriptSession:
445
- session = self._transcript_sessions.get(path)
446
- if session is None:
447
- doc = await self.room.sync.open(path=path, create=True)
448
- session = TranscriptSession(document=doc)
449
- self._transcript_sessions[path] = session
450
- logger.info("transcript document ready at %s", path)
451
- return session
452
-
453
- async def start(self, *, room):
454
- await super().start(room=room)
455
- await self._toolkit.start(room=room)
456
- await room.local_participant.set_attribute("supports_voice", True)
457
- await room.messaging.enable()
458
- await self._publish_status_attribute()
459
-
460
- async def start_transcription(self, *, breakout_room: Optional[str], path: str):
461
- session_key = self._make_session_key(breakout_room, path)
462
- if session_key in self._sessions or session_key in self._pending_sessions:
463
- logger.warning(
464
- "start_transcription for breakout=%s transcript=%s ignored; session already active or pending",
465
- breakout_room,
466
- path,
467
- )
468
- return
469
- logger.info(
470
- "transcription starting (breakout=%s transcript=%s)",
471
- breakout_room,
472
- path,
473
- )
474
- self._spawn_task(
475
- self._start_transcription_session(
476
- session_key=session_key,
477
- breakout_room=breakout_room,
478
- transcript_path=path,
479
- ),
480
- session_key=session_key,
481
- )
482
-
483
- async def stop_transcription(self, *, breakout_room: Optional[str], path: str):
484
- breakout_filter = breakout_room if breakout_room is not None else None
485
- target_keys = self._find_session_keys(breakout_filter, path)
486
- if not target_keys:
487
- logger.warning(
488
- "stop_transcription received but no matching sessions (breakout=%s transcript=%s)",
489
- breakout_room,
490
- path,
491
- )
492
- return
493
- logger.info(
494
- "stop_transcription received (breakout=%s transcript=%s); stopping %d session(s)",
495
- breakout_room,
496
- path,
497
- len(target_keys),
498
- )
499
- for key in target_keys:
500
- if key in self._sessions:
501
- self._spawn_task(
502
- self._stop_transcription_session(
503
- session_key=key, reason="stop_transcription"
504
- )
505
- )
506
- elif key in self._pending_sessions:
507
- self._deferred_stop_keys.add(key)
508
- self._deferred_stop_reasons[key] = "stop_transcription"
509
- entry = self._status_entries.get(key)
510
- if entry:
511
- self._spawn_task(
512
- self._update_status_entry(
513
- key,
514
- state="closing",
515
- breakout_room=entry["breakout_room"],
516
- transcript_path=entry["transcript_path"],
517
- reason="stop_transcription",
518
- )
519
- )
520
- else:
521
- entry = self._status_entries.get(key)
522
- if entry:
523
- self._spawn_task(
524
- self._update_status_entry(
525
- key,
526
- state="closing",
527
- breakout_room=entry["breakout_room"],
528
- transcript_path=entry["transcript_path"],
529
- reason="stop_transcription",
530
- )
531
- )
532
-
533
- def _find_session_keys(
534
- self,
535
- breakout_room: Optional[str],
536
- transcript_path: Optional[str],
537
- ) -> list[SessionKey]:
538
- matches: list[SessionKey] = []
539
- for key, entry in self._status_entries.items():
540
- if breakout_room is not None and key[0] != breakout_room:
541
- continue
542
- if (
543
- transcript_path is not None
544
- and entry.get("transcript_path") != transcript_path
545
- ):
546
- continue
547
- matches.append(key)
548
- return matches
549
-
550
- async def _start_transcription_session(
551
- self,
552
- *,
553
- session_key: SessionKey,
554
- breakout_room: Optional[str],
555
- transcript_path: str,
556
- ) -> None:
557
- async with self._session_lock:
558
- if session_key in self._sessions or session_key in self._pending_sessions:
559
- return
560
- self._pending_sessions.add(session_key)
561
-
562
- await self._update_status_entry(
563
- session_key,
564
- state="connecting",
565
- breakout_room=breakout_room,
566
- transcript_path=transcript_path,
567
- )
568
-
569
- voice_conn: Optional[VoiceConnection] = None
570
- manager: Optional[_MeetingManager] = None
571
-
572
- try:
573
- voice_conn = VoiceConnection(room=self.room, breakout_room=breakout_room)
574
- logger.info(
575
- "joining breakout %s (transcript=%s)",
576
- breakout_room,
577
- transcript_path,
578
- )
579
- await voice_conn.__aenter__()
580
-
581
- livekit_room = voice_conn.livekit_room
582
- if livekit_room is None:
583
- raise RuntimeError("VoiceConnection did not return a LiveKit room")
584
-
585
- await self._ensure_transcript_session(transcript_path)
586
-
587
- async def _handle_transcript(
588
- participant_id: str,
589
- participant_name: str,
590
- chat_ctx: llm.ChatContext,
591
- new_message: llm.ChatMessage,
592
- ):
593
- ts = await self._ensure_transcript_session(transcript_path)
594
- stored = ts.append_segment(
595
- participant_name=participant_name,
596
- text=new_message.text_content,
597
- participant_id=participant_id,
598
- time=datetime.utcnow().isoformat() + "Z",
599
- )
600
- if not stored:
601
- logger.debug(
602
- "duplicate transcript skipped for %s", participant_name
603
- )
604
-
605
- manager = _MeetingManager(
606
- room=livekit_room,
607
- vad=_get_shared_vad(),
608
- on_transcript=_handle_transcript,
609
- )
610
- manager.start()
611
-
612
- remotes = getattr(livekit_room, "remote_participants", None) or ()
613
- if isinstance(remotes, dict):
614
- participant_iter: Iterable[rtc.RemoteParticipant] = remotes.values()
615
- else:
616
- participant_iter = remotes
617
-
618
- participants = [p for p in participant_iter if p is not None]
619
-
620
- for participant in participants:
621
- try:
622
- manager.on_participant_connected(participant)
623
- except Exception as exc:
624
- logger.error(
625
- "failed starting session for %s: %s",
626
- getattr(participant, "identity", "?"),
627
- exc,
628
- exc_info=exc,
629
- )
630
-
631
- session_state = _SessionState(
632
- breakout_room=breakout_room,
633
- transcript_path=transcript_path,
634
- voice_conn=voice_conn,
635
- manager=manager,
636
- )
637
-
638
- async with self._session_lock:
639
- self._sessions[session_key] = session_state
640
- self._pending_sessions.discard(session_key)
641
-
642
- await self._update_status_entry(
643
- session_key,
644
- state="active",
645
- breakout_room=breakout_room,
646
- transcript_path=transcript_path,
647
- )
648
-
649
- except asyncio.CancelledError:
650
- async with self._session_lock:
651
- self._pending_sessions.discard(session_key)
652
- self._sessions.pop(session_key, None)
653
- if manager:
654
- try:
655
- await manager.aclose()
656
- except Exception as exc:
657
- logger.error(
658
- "failed closing cancelled transcriber: %s", exc, exc_info=exc
659
- )
660
- if voice_conn:
661
- try:
662
- await voice_conn.__aexit__(None, None, None)
663
- except Exception as exc:
664
- logger.error(
665
- "failed disconnecting cancelled livekit: %s", exc, exc_info=exc
666
- )
667
- if self._sessions_using_path(transcript_path) == 0:
668
- await self._release_transcript_session(transcript_path)
669
- await self._update_status_entry(
670
- session_key,
671
- state="cancelled",
672
- breakout_room=breakout_room,
673
- transcript_path=transcript_path,
674
- reason="cancelled",
675
- )
676
- raise
677
- except Exception as exc:
678
- async with self._session_lock:
679
- self._pending_sessions.discard(session_key)
680
- self._sessions.pop(session_key, None)
681
- if manager:
682
- try:
683
- await manager.aclose()
684
- except Exception as close_exc:
685
- logger.error(
686
- "failed closing multi-user transcriber: %s",
687
- close_exc,
688
- exc_info=close_exc,
689
- )
690
- if voice_conn:
691
- try:
692
- await voice_conn.__aexit__(None, None, None)
693
- except Exception as disconnect_exc:
694
- logger.error(
695
- "failed disconnecting livekit: %s",
696
- disconnect_exc,
697
- exc_info=disconnect_exc,
698
- )
699
- if self._sessions_using_path(transcript_path) == 0:
700
- await self._release_transcript_session(transcript_path)
701
- await self._update_status_entry(
702
- session_key,
703
- state="error",
704
- breakout_room=breakout_room,
705
- transcript_path=transcript_path,
706
- error=str(exc),
707
- )
708
- raise
709
- else:
710
- logger.info(
711
- "ready to transcribe breakout=%s transcript=%s (participants=%d)",
712
- breakout_room,
713
- transcript_path,
714
- len(participants),
715
- )
716
- if session_key in self._deferred_stop_keys:
717
- reason = self._deferred_stop_reasons.pop(session_key, "stop_requested")
718
- self._deferred_stop_keys.discard(session_key)
719
- self._spawn_task(
720
- self._stop_transcription_session(
721
- session_key=session_key, reason=reason
722
- )
723
- )
724
- finally:
725
- async with self._session_lock:
726
- self._pending_sessions.discard(session_key)
727
-
728
- async def _stop_transcription_session(
729
- self,
730
- *,
731
- session_key: SessionKey,
732
- reason: str,
733
- suppress_log: bool = False,
734
- ) -> None:
735
- async with self._session_lock:
736
- session = self._sessions.pop(session_key, None)
737
-
738
- if not session:
739
- await self._remove_status_entry(session_key)
740
- return
741
-
742
- breakout_room = session.breakout_room
743
- transcript_path = session.transcript_path
744
-
745
- await self._update_status_entry(
746
- session_key,
747
- state="closing",
748
- breakout_room=breakout_room,
749
- transcript_path=transcript_path,
750
- reason=reason,
751
- )
752
-
753
- if session.manager:
754
- try:
755
- await session.manager.aclose()
756
- except Exception as exc:
757
- logger.error(
758
- "failed closing multi-user transcriber: %s", exc, exc_info=exc
759
- )
760
-
761
- if session.voice_conn:
762
- try:
763
- await session.voice_conn.__aexit__(None, None, None)
764
- except Exception as exc:
765
- logger.error("failed disconnecting livekit: %s", exc, exc_info=exc)
766
-
767
- if self._sessions_using_path(transcript_path) == 0:
768
- await self._release_transcript_session(transcript_path)
769
-
770
- await self._remove_status_entry(session_key)
771
- self._deferred_stop_keys.discard(session_key)
772
- self._deferred_stop_reasons.pop(session_key, None)
773
-
774
- if not suppress_log:
775
- logger.info(
776
- "closed livekit connection (breakout=%s transcript=%s reason=%s)",
777
- breakout_room,
778
- transcript_path,
779
- reason,
780
- )
781
-
782
- async def _stop_all_sessions(self, *, reason: str) -> None:
783
- keys = list(self._sessions.keys())
784
- for key in keys:
785
- await self._stop_transcription_session(
786
- session_key=key, reason=reason, suppress_log=True
787
- )
788
-
789
- pending = list(self._pending_sessions)
790
- for key in pending:
791
- self._deferred_stop_keys.add(key)
792
- self._deferred_stop_reasons[key] = reason
793
- entry = self._status_entries.get(key)
794
- if entry:
795
- await self._update_status_entry(
796
- key,
797
- state="closing",
798
- breakout_room=entry["breakout_room"],
799
- transcript_path=entry["transcript_path"],
800
- reason=reason,
801
- )
802
- task = self._session_tasks_by_key.get(key)
803
- if task:
804
- task.cancel()
805
-
806
- async def stop(self):
807
- await self._toolkit.stop()
808
- await self._stop_all_sessions(reason="agent_stop")
809
- if self._tasks:
810
- await asyncio.gather(*list(self._tasks), return_exceptions=True)
811
- self._status_entries.clear()
812
- await self._publish_status_attribute()
813
- await super().stop()
814
-
815
-
816
- class TranscriptRegistry(SchemaRegistry):
817
- def __init__(self):
818
- name = "transcript"
819
- super().__init__(
820
- name=f"meshagent.schema.{name}",
821
- validate_webhook_secret=False,
822
- schemas=[SchemaRegistration(name=name, schema=transcript_schema)],
823
- )
@@ -1 +0,0 @@
1
- __version__ = "0.6.0"