meshagent-livekit 0.5.2__tar.gz → 0.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of meshagent-livekit might be problematic. Click here for more details.

Files changed (20) hide show
  1. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/CHANGELOG.md +6 -0
  2. {meshagent_livekit-0.5.2/meshagent_livekit.egg-info → meshagent_livekit-0.5.4}/PKG-INFO +3 -3
  3. meshagent_livekit-0.5.4/meshagent/livekit/agents/transcriber.py +187 -0
  4. meshagent_livekit-0.5.4/meshagent/livekit/version.py +1 -0
  5. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4/meshagent_livekit.egg-info}/PKG-INFO +3 -3
  6. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/meshagent_livekit.egg-info/requires.txt +2 -2
  7. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/pyproject.toml +2 -2
  8. meshagent_livekit-0.5.2/meshagent/livekit/agents/transcriber.py +0 -377
  9. meshagent_livekit-0.5.2/meshagent/livekit/version.py +0 -1
  10. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/LICENSE +0 -0
  11. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/MANIFEST.in +0 -0
  12. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/README.md +0 -0
  13. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/meshagent/livekit/__init__.py +0 -0
  14. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/meshagent/livekit/agents/voice.py +0 -0
  15. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/meshagent/livekit/livekit_protocol.py +0 -0
  16. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/meshagent/livekit/livekit_protocol_test.py +0 -0
  17. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/meshagent_livekit.egg-info/SOURCES.txt +0 -0
  18. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/meshagent_livekit.egg-info/dependency_links.txt +0 -0
  19. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/meshagent_livekit.egg-info/top_level.txt +0 -0
  20. {meshagent_livekit-0.5.2 → meshagent_livekit-0.5.4}/setup.cfg +0 -0
@@ -1,3 +1,9 @@
1
+ ## [0.5.4]
2
+ - Stability
3
+
4
+ ## [0.5.3]
5
+ - Stability
6
+
1
7
  ## [0.5.2]
2
8
  - Stability
3
9
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: meshagent-livekit
3
- Version: 0.5.2
3
+ Version: 0.5.4
4
4
  Summary: Livekit support for Meshagent
5
5
  License-Expression: Apache-2.0
6
6
  Project-URL: Documentation, https://docs.meshagent.com
@@ -17,8 +17,8 @@ Requires-Dist: livekit-agents~=1.1
17
17
  Requires-Dist: livekit-plugins-openai~=1.1
18
18
  Requires-Dist: livekit-plugins-silero~=1.1
19
19
  Requires-Dist: livekit-plugins-turn-detector~=1.1
20
- Requires-Dist: meshagent-api~=0.5.2
21
- Requires-Dist: meshagent-tools~=0.5.2
20
+ Requires-Dist: meshagent-api~=0.5.4
21
+ Requires-Dist: meshagent-tools~=0.5.4
22
22
  Dynamic: license-file
23
23
 
24
24
  # [Meshagent](https://www.meshagent.com)
@@ -0,0 +1,187 @@
1
+ import logging
2
+ import asyncio
3
+ from asyncio import CancelledError
4
+
5
+ from meshagent.api import RoomMessage, Requirement, Participant, RemoteParticipant
6
+ from meshagent.api.room_server_client import RoomClient
7
+
8
+ from livekit.agents import Agent, AgentSession
9
+
10
+ from openai import AsyncOpenAI
11
+
12
+ from livekit.agents.stt import STT
13
+ from livekit.agents import RoomOutputOptions, StopResponse
14
+ from livekit.agents import llm
15
+
16
+ from livekit.plugins import openai, silero
17
+
18
+ from .voice import VoiceConnection
19
+ from livekit import rtc
20
+
21
+ from typing import Optional
22
+
23
+
24
+ from meshagent.agents import SingleRoomAgent
25
+
26
+
27
+ import re
28
+
29
+ logger = logging.getLogger("voice")
30
+
31
+
32
+ def _replace_non_matching(text: str, allowed_chars: str, replacement: str) -> str:
33
+ """
34
+ Replaces every character in `text` that does not match the given
35
+ `allowed_chars` regex set with `replacement`.
36
+
37
+ Parameters:
38
+ -----------
39
+ text : str
40
+ The input string on which the replacement is to be done.
41
+ allowed_chars : str
42
+ A string defining the set of allowed characters (part of a character set).
43
+ For example, "a-zA-Z0-9" will keep only letters and digits.
44
+ replacement : str
45
+ The string to replace non-matching characters with.
46
+
47
+ Returns:
48
+ --------
49
+ str
50
+ A new string where all characters not in `allowed_chars` are replaced.
51
+ """
52
+ # Build a regex that matches any character NOT in allowed_chars
53
+ pattern = rf"[^{allowed_chars}]"
54
+ return re.sub(pattern, replacement, text)
55
+
56
+
57
+ def safe_tool_name(name: str):
58
+ return _replace_non_matching(name, "a-zA-Z0-9_-", "_")
59
+
60
+
61
+ class _Transcriber(Agent):
62
+ def __init__(self, *, stt: STT, room: RoomClient, participant: RemoteParticipant):
63
+ super().__init__(instructions="not-needed", stt=stt)
64
+ self.room = room
65
+ self.participant = participant
66
+
67
+ async def on_user_turn_completed(
68
+ self, chat_ctx: llm.ChatContext, new_message: llm.ChatMessage
69
+ ):
70
+ logger.info(f"transcription: {new_message.text_content}")
71
+ self.room.messaging.send_message_nowait(
72
+ to=self.participant,
73
+ type="transcript",
74
+ message={"text": new_message.text_content},
75
+ )
76
+
77
+ raise StopResponse()
78
+
79
+
80
+ class Transcriber(SingleRoomAgent):
81
+ def __init__(
82
+ self,
83
+ name: str,
84
+ title: Optional[str] = None,
85
+ description: Optional[str] = None,
86
+ labels: Optional[list[str]] = None,
87
+ requires: list[Requirement] = None,
88
+ ):
89
+ super().__init__(
90
+ name=name,
91
+ description=description,
92
+ title=title,
93
+ labels=labels,
94
+ requires=requires,
95
+ )
96
+
97
+ async def start(self, *, room):
98
+ await super().start(room=room)
99
+ await room.local_participant.set_attribute("supports_voice", True)
100
+ await room.messaging.enable()
101
+ room.messaging.on("message", self.on_message)
102
+
103
+ def on_message(self, message: RoomMessage):
104
+ if message.type == "voice_call":
105
+ breakout_room = message.message["breakout_room"]
106
+
107
+ logger.info(f"joining breakout room {breakout_room}")
108
+
109
+ def on_done(task: asyncio.Task):
110
+ try:
111
+ task.result()
112
+ except CancelledError:
113
+ pass
114
+ except Exception as e:
115
+ logger.error(f"{e}", exc_info=e)
116
+
117
+ for participant in self.room.messaging.remote_participants:
118
+ if participant.id == message.from_participant_id:
119
+ task = asyncio.create_task(
120
+ self.run_voice_agent(
121
+ participant=participant, breakout_room=breakout_room
122
+ )
123
+ )
124
+ task.add_done_callback(on_done)
125
+ return
126
+
127
+ logger.error(f"unable to find participant {message.from_participant_id}")
128
+
129
+ async def _wait_for_disconnect(self, room: rtc.Room):
130
+ disconnected = asyncio.Future()
131
+
132
+ def on_disconnected(_):
133
+ disconnected.set_result(True)
134
+
135
+ room.on("disconnected", on_disconnected)
136
+
137
+ logger.info("waiting for disconnection")
138
+ await disconnected
139
+
140
+ async def create_agent(
141
+ self, *, session: AgentSession, participant: RemoteParticipant
142
+ ):
143
+ return _Transcriber(
144
+ stt=openai.STT(),
145
+ room=self.room,
146
+ participant=participant,
147
+ )
148
+
149
+ def create_session(self) -> AgentSession:
150
+ token: str = self.room.protocol.token
151
+ url: str = self.room.room_url
152
+
153
+ room_proxy_url = f"{url}/v1"
154
+
155
+ oaiclient = AsyncOpenAI(
156
+ api_key=token,
157
+ base_url=room_proxy_url,
158
+ default_headers={"Meshagent-Session": self.room.session_id},
159
+ )
160
+
161
+ session = AgentSession(
162
+ max_tool_steps=50,
163
+ allow_interruptions=False,
164
+ vad=silero.VAD.load(),
165
+ stt=openai.STT(client=oaiclient),
166
+ # turn_detection=MultilingualModel(),
167
+ )
168
+ return session
169
+
170
+ async def run_voice_agent(self, *, participant: Participant, breakout_room: str):
171
+ async with VoiceConnection(
172
+ room=self.room, breakout_room=breakout_room
173
+ ) as connection:
174
+ logger.info("starting transcription agent")
175
+
176
+ session = self.create_session()
177
+
178
+ agent = await self.create_agent(session=session, participant=participant)
179
+
180
+ await session.start(
181
+ agent=agent,
182
+ room=connection.livekit_room,
183
+ room_output_options=RoomOutputOptions(transcription_enabled=True),
184
+ )
185
+
186
+ logger.info("started transcription agent")
187
+ await self._wait_for_disconnect(room=connection.livekit_room)
@@ -0,0 +1 @@
1
+ __version__ = "0.5.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: meshagent-livekit
3
- Version: 0.5.2
3
+ Version: 0.5.4
4
4
  Summary: Livekit support for Meshagent
5
5
  License-Expression: Apache-2.0
6
6
  Project-URL: Documentation, https://docs.meshagent.com
@@ -17,8 +17,8 @@ Requires-Dist: livekit-agents~=1.1
17
17
  Requires-Dist: livekit-plugins-openai~=1.1
18
18
  Requires-Dist: livekit-plugins-silero~=1.1
19
19
  Requires-Dist: livekit-plugins-turn-detector~=1.1
20
- Requires-Dist: meshagent-api~=0.5.2
21
- Requires-Dist: meshagent-tools~=0.5.2
20
+ Requires-Dist: meshagent-api~=0.5.4
21
+ Requires-Dist: meshagent-tools~=0.5.4
22
22
  Dynamic: license-file
23
23
 
24
24
  # [Meshagent](https://www.meshagent.com)
@@ -6,5 +6,5 @@ livekit-agents~=1.1
6
6
  livekit-plugins-openai~=1.1
7
7
  livekit-plugins-silero~=1.1
8
8
  livekit-plugins-turn-detector~=1.1
9
- meshagent-api~=0.5.2
10
- meshagent-tools~=0.5.2
9
+ meshagent-api~=0.5.4
10
+ meshagent-tools~=0.5.4
@@ -18,8 +18,8 @@ dependencies = [
18
18
  "livekit-plugins-openai~=1.1",
19
19
  "livekit-plugins-silero~=1.1",
20
20
  "livekit-plugins-turn-detector~=1.1",
21
- "meshagent-api~=0.5.2",
22
- "meshagent-tools~=0.5.2"
21
+ "meshagent-api~=0.5.4",
22
+ "meshagent-tools~=0.5.4"
23
23
  ]
24
24
 
25
25
  [project.urls]
@@ -1,377 +0,0 @@
1
- import logging
2
- import asyncio
3
-
4
-
5
- import os
6
-
7
- from livekit import api
8
-
9
- from livekit.agents import stt, transcription, utils
10
- from livekit.plugins import openai, silero
11
- from livekit import rtc
12
- from livekit.rtc import TranscriptionSegment
13
- from livekit.agents import stt as speech_to_text
14
-
15
- from meshagent.api.runtime import RuntimeDocument
16
-
17
- from typing import Optional
18
-
19
- from meshagent.api.schema import MeshSchema
20
-
21
- from meshagent.api.schema import ElementType, ChildProperty, ValueProperty
22
-
23
- from meshagent.agents.agent import AgentCallContext
24
- from meshagent.agents import TaskRunner
25
-
26
- logger = logging.getLogger("transcriber")
27
-
28
-
29
- transcription_schema = MeshSchema(
30
- root_tag_name="transcript",
31
- elements=[
32
- ElementType(
33
- tag_name="transcript",
34
- description="a transcript",
35
- properties=[
36
- ChildProperty(
37
- name="transcriptions",
38
- description="the transcript entries",
39
- child_tag_names=["speech"],
40
- )
41
- ],
42
- ),
43
- ElementType(
44
- tag_name="speech",
45
- description="transcribed speech",
46
- properties=[
47
- ValueProperty(
48
- name="text", description="the transcribed text", type="string"
49
- ),
50
- ValueProperty(
51
- name="startTime",
52
- description="the time of the start of this speech",
53
- type="number",
54
- ),
55
- ValueProperty(
56
- name="endTime",
57
- description="the time of th end of this speech",
58
- type="number",
59
- ),
60
- ValueProperty(
61
- name="participantId",
62
- description="the identity of the participant",
63
- type="string",
64
- ),
65
- ValueProperty(
66
- name="participantName",
67
- description="the name of the participant",
68
- type="string",
69
- ),
70
- ],
71
- ),
72
- ],
73
- )
74
-
75
-
76
- class Transcriber(TaskRunner):
77
- def __init__(
78
- self,
79
- *,
80
- livekit_url: Optional[str] = None,
81
- livekit_api_key: Optional[str] = None,
82
- livekit_api_secret: Optional[str] = None,
83
- livekit_identity: Optional[str] = None,
84
- ):
85
- super().__init__(
86
- name="livekit.transcriber",
87
- title="transcriber",
88
- description="connects to a livekit room and transcribes the conversation",
89
- input_schema={
90
- "type": "object",
91
- "additionalProperties": False,
92
- "required": ["room_name", "path"],
93
- "properties": {
94
- "room_name": {"type": "string"},
95
- "path": {"type": "string"},
96
- },
97
- },
98
- output_schema={
99
- "type": "object",
100
- "additionalProperties": False,
101
- "required": [],
102
- "properties": {},
103
- },
104
- )
105
- self._livekit_url = livekit_url
106
- self._livekit_api_key = livekit_api_key
107
- self._livekit_api_secret = livekit_api_secret
108
- self._livekit_identity = livekit_identity
109
-
110
- async def _transcribe_participant(
111
- self,
112
- doc: RuntimeDocument,
113
- room: rtc.Room,
114
- participant: rtc.RemoteParticipant,
115
- stt_stream: stt.SpeechStream,
116
- stt_forwarder: transcription.STTSegmentsForwarder,
117
- ):
118
- logger.info("transcribing participant %s", participant.sid)
119
- """Forward the transcription to the client and log the transcript in the console"""
120
- async for ev in stt_stream:
121
- logger.info("event from participant %s %s", participant.sid, ev)
122
-
123
- if ev.type == stt.SpeechEventType.FINAL_TRANSCRIPT:
124
- logger.info("transcript: %s", ev.alternatives[0].text)
125
- if len(ev.alternatives) > 0:
126
- alt = ev.alternatives[0]
127
- doc.root.append_child(
128
- tag_name="speech",
129
- attributes={
130
- "text": alt.text,
131
- "startTime": alt.start_time,
132
- "endTime": alt.end_time,
133
- "participantId": participant.identity,
134
- "participantName": participant.name,
135
- },
136
- )
137
-
138
- logger.info("done forwarding %s", participant.sid)
139
-
140
- def should_transcribe(self, p: rtc.Participant) -> bool:
141
- # don't transcribe other agents
142
- # todo: maybe have a better way to detect
143
- return ".agent" not in p.identity
144
-
145
- async def _wait_for_disconnect(self, room: rtc.Room):
146
- disconnected = asyncio.Future()
147
-
148
- def on_disconnected(_):
149
- disconnected.set_result(True)
150
-
151
- room.on("disconnected", on_disconnected)
152
-
153
- logger.info("waiting for disconnection")
154
- await disconnected
155
-
156
- async def ask(self, *, context: AgentCallContext, arguments: dict):
157
- logger.info("Transcriber connecting to %s", arguments)
158
- output_path = arguments["path"]
159
- room_name = arguments["room_name"]
160
-
161
- client = context.room
162
- doc = await client.sync.open(path=output_path)
163
- try:
164
- vad = silero.VAD.load()
165
- utils.http_context._new_session_ctx()
166
-
167
- pending_tasks = list()
168
- participantNames = dict[str, str]()
169
-
170
- sst_provider = openai.STT()
171
- # sst_provider = fal.WizperSTT()
172
-
173
- room_options = rtc.RoomOptions(auto_subscribe=False)
174
-
175
- room = rtc.Room()
176
-
177
- url = (
178
- self._livekit_url
179
- if self._livekit_url is not None
180
- else os.getenv("LIVEKIT_URL")
181
- )
182
- api_key = (
183
- self._livekit_api_key
184
- if self._livekit_api_key is not None
185
- else os.getenv("LIVEKIT_API_KEY")
186
- )
187
- api_secret = (
188
- self._livekit_api_secret
189
- if self._livekit_api_secret is not None
190
- else os.getenv("LIVEKIT_API_SECRET")
191
- )
192
- identity = (
193
- self._livekit_identity
194
- if self._livekit_identity is not None
195
- else os.getenv("AGENT_IDENTITY")
196
- )
197
-
198
- token = (
199
- api.AccessToken(api_key=api_key, api_secret=api_secret)
200
- .with_identity(identity)
201
- .with_name("Agent")
202
- .with_kind("agent")
203
- .with_grants(
204
- api.VideoGrants(
205
- can_update_own_metadata=True,
206
- room_join=True,
207
- room=room_name,
208
- agent=True,
209
- )
210
- )
211
- )
212
-
213
- jwt = token.to_jwt()
214
-
215
- await room.connect(url=url, token=jwt, options=room_options)
216
-
217
- logger.info("connected to room: %s", room_name)
218
-
219
- audio_streams = list[rtc.AudioStream]()
220
-
221
- async def transcribe_track(
222
- participant: rtc.RemoteParticipant, track: rtc.Track
223
- ):
224
- audio_stream = rtc.AudioStream(track)
225
- stt_forwarder = transcription.STTSegmentsForwarder(
226
- room=room, participant=participant, track=track
227
- )
228
-
229
- audio_streams.append(audio_stream)
230
-
231
- stt = sst_provider
232
- if not sst_provider.capabilities.streaming:
233
- stt = speech_to_text.StreamAdapter(
234
- stt=stt,
235
- vad=vad,
236
- )
237
-
238
- stt_stream = stt.stream()
239
-
240
- pending_tasks.append(
241
- asyncio.create_task(
242
- self._transcribe_participant(
243
- doc, room, participant, stt_stream, stt_forwarder
244
- )
245
- )
246
- )
247
-
248
- async for ev in audio_stream:
249
- stt_stream.push_frame(ev.frame)
250
-
251
- def subscribe_if_needed(pub: rtc.RemoteTrackPublication):
252
- if pub.kind == rtc.TrackKind.KIND_AUDIO:
253
- pub.set_subscribed(True)
254
-
255
- for p in room.remote_participants.values():
256
- participantNames[p.identity] = p.name
257
- if self.should_transcribe(p):
258
- for pub in p.track_publications.values():
259
- subscribe_if_needed(pub)
260
-
261
- first_parts = dict[str, rtc.Participant]()
262
-
263
- def on_transcript_event(
264
- segments: list[TranscriptionSegment],
265
- part: rtc.Participant | None,
266
- pub: rtc.TrackPublication | None = None,
267
- ) -> None:
268
- nonlocal room
269
- logger.info("Got transcription segment %s %s %s", segments, part, pub)
270
- for segment in segments:
271
- if segment.id not in first_parts and part is not None:
272
- first_parts[segment.id] = part
273
-
274
- if segment.final:
275
- if part is None and segment.id in first_parts:
276
- part = first_parts[segment.id]
277
- first_parts.pop(segment.id)
278
-
279
- if part is not None:
280
- doc.root.append_child(
281
- tag_name="speech",
282
- attributes={
283
- "text": segment.text,
284
- "startTime": segment.start_time,
285
- "endTime": segment.end_time,
286
- "participantId": part.identity,
287
- "participantName": part.name,
288
- },
289
- )
290
- else:
291
- logger.warning(
292
- "transcription was missing participant information"
293
- )
294
-
295
- def on_participant_connected(p: rtc.RemoteParticipant):
296
- participantNames[p.identity] = p.name
297
-
298
- def on_track_published(
299
- pub: rtc.RemoteTrackPublication, p: rtc.RemoteParticipant
300
- ):
301
- if self.should_transcribe(p):
302
- subscribe_if_needed(pub)
303
-
304
- subscriptions = dict()
305
-
306
- def on_track_unpublished(
307
- pub: rtc.RemoteTrackPublication, p: rtc.RemoteParticipant
308
- ):
309
- if pub in subscriptions:
310
- logger.info("track unpublished, stopping transcription")
311
- # todo: maybe could be more graceful
312
- subscriptions[pub].cancel()
313
- subscriptions.pop(pub)
314
-
315
- def on_track_subscribed(
316
- track: rtc.Track,
317
- publication: rtc.TrackPublication,
318
- participant: rtc.RemoteParticipant,
319
- ):
320
- if track.kind == rtc.TrackKind.KIND_AUDIO:
321
- logger.info("transcribing track %s", track.sid)
322
- track_task = asyncio.create_task(
323
- transcribe_track(participant, track)
324
- )
325
-
326
- def on_transcription_done(t):
327
- try:
328
- t.result()
329
- except Exception as e:
330
- logger.error("Transcription failed", exc_info=e)
331
-
332
- track_task.add_done_callback(on_transcription_done)
333
- pending_tasks.append(track_task)
334
- subscriptions[publication] = track_task
335
-
336
- for p in room.remote_participants.values():
337
- on_participant_connected(p)
338
-
339
- room.on("participant_connected", on_participant_connected)
340
-
341
- room.on("track_published", on_track_published)
342
- room.on("track_unpublished", on_track_unpublished)
343
- room.on("track_subscribed", on_track_subscribed)
344
- room.on("transcription_received", on_transcript_event)
345
-
346
- await self._wait_for_disconnect(room)
347
-
348
- logger.info("waited for termination")
349
- await room.disconnect()
350
-
351
- logger.info("closing audio streams")
352
-
353
- for stream in audio_streams:
354
- await stream.aclose()
355
-
356
- logger.info("waiting for pending tasks")
357
- gather_future = asyncio.gather(*pending_tasks)
358
-
359
- gather_future.cancel()
360
- try:
361
- await gather_future
362
- except Exception as e:
363
- if not isinstance(e, asyncio.CancelledError):
364
- logger.warning("Did not shut down cleanly", exc_info=e)
365
- pass
366
-
367
- print("done")
368
- except Exception as e:
369
- logger.info("Transcription failed", exc_info=e)
370
- finally:
371
- await utils.http_context._close_http_ctx()
372
- logger.info("Transcription done")
373
-
374
- await asyncio.sleep(5)
375
- await client.sync.close(path=output_path)
376
-
377
- return {}
@@ -1 +0,0 @@
1
- __version__ = "0.5.2"