PyPI - meshagent-livekit - Versions diffs - 0.0.37__py3-none-any.whl → 0.0.38__py3-none-any.whl - Mend

meshagent-livekit 0.0.37py3-none-any.whl → 0.0.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of meshagent-livekit might be problematic. Click here for more details.

Files changed (13) hide show

meshagent/livekit/__init__.py +3 -1
meshagent/livekit/agents/transcriber.py +196 -117
meshagent/livekit/agents/voice.py +112 -115
meshagent/livekit/livekit_protocol.py +15 -13
meshagent/livekit/livekit_protocol_test.py +59 -49
meshagent/livekit/version.py +1 -1
meshagent_livekit-0.0.38.dist-info/METADATA +37 -0
meshagent_livekit-0.0.38.dist-info/RECORD +11 -0
meshagent_livekit-0.0.37.dist-info/METADATA +0 -24
meshagent_livekit-0.0.37.dist-info/RECORD +0 -11
{meshagent_livekit-0.0.37.dist-info → meshagent_livekit-0.0.38.dist-info}/WHEEL +0 -0
{meshagent_livekit-0.0.37.dist-info → meshagent_livekit-0.0.38.dist-info}/licenses/LICENSE +0 -0
{meshagent_livekit-0.0.37.dist-info → meshagent_livekit-0.0.38.dist-info}/top_level.txt +0 -0

meshagent/livekit/__init__.py CHANGED Viewed

@@ -1 +1,3 @@
-from .version import __version__
+from .version import __version__
+__all__ = [__version__]

meshagent/livekit/agents/transcriber.py CHANGED Viewed

@@ -1,15 +1,8 @@
 import logging
 import asyncio
-from meshagent.api.schema_document import Element,Text
-from meshagent.api.room_server_client import RoomClient
-from meshagent.api.websocket_protocol import WebSocketClientProtocol
-import asyncio
 import os
-import logging
-import json
 from livekit import api
@@ -17,7 +10,6 @@ from livekit.agents import stt, transcription, utils
 from livekit.plugins import openai, silero
 from livekit import rtc
 from livekit.rtc import TranscriptionSegment
-from livekit.agents import utils
 from livekit.agents import stt as speech_to_text
 from meshagent.api.runtime import RuntimeDocument
@@ -26,7 +18,7 @@ from typing import Optional
 from meshagent.api.schema import MeshSchema
-from meshagent.api.schema import MeshSchema, ElementType, ChildProperty, ValueProperty
+from meshagent.api.schema import ElementType, ChildProperty, ValueProperty
 from meshagent.agents.agent import AgentCallContext
 from meshagent.agents import TaskRunner
@@ -37,151 +29,221 @@ logger = logging.getLogger("transcriber")
 transcription_schema = MeshSchema(
     root_tag_name="transcript",
     elements=[
-        ElementType(tag_name="transcript", description="a transcript", properties=[
-            ChildProperty(name="transcriptions", description="the transcript entries", child_tag_names=[ "speech" ])
-        ]),
-        ElementType(tag_name="speech", description="transcribed speech", properties=[
-            ValueProperty(name="text", description="the transcribed text", type="string"),
-            ValueProperty(name="startTime", description="the time of the start of this speech", type="number"),
-            ValueProperty(name="endTime", description="the time of th end of this speech", type="number"),
-            ValueProperty(name="participantId", description="the identity of the participant", type="string"),
-            ValueProperty(name="participantName", description="the name of the participant", type="string")
-        ])
-    ]
+        ElementType(
+            tag_name="transcript",
+            description="a transcript",
+            properties=[
+                ChildProperty(
+                    name="transcriptions",
+                    description="the transcript entries",
+                    child_tag_names=["speech"],
+                )
+            ],
+        ),
+        ElementType(
+            tag_name="speech",
+            description="transcribed speech",
+            properties=[
+                ValueProperty(
+                    name="text", description="the transcribed text", type="string"
+                ),
+                ValueProperty(
+                    name="startTime",
+                    description="the time of the start of this speech",
+                    type="number",
+                ),
+                ValueProperty(
+                    name="endTime",
+                    description="the time of th end of this speech",
+                    type="number",
+                ),
+                ValueProperty(
+                    name="participantId",
+                    description="the identity of the participant",
+                    type="string",
+                ),
+                ValueProperty(
+                    name="participantName",
+                    description="the name of the participant",
+                    type="string",
+                ),
+            ],
+        ),
+    ],
 )
-class Transcriber(TaskRunner):
-    def __init__(self, *, livekit_url: Optional[str] = None, livekit_api_key: Optional[str] = None, livekit_api_secret: Optional[str] = None, livekit_identity: Optional[str] = None):
+class Transcriber(TaskRunner):
+    def __init__(
+        self,
+        *,
+        livekit_url: Optional[str] = None,
+        livekit_api_key: Optional[str] = None,
+        livekit_api_secret: Optional[str] = None,
+        livekit_identity: Optional[str] = None,
+    ):
         super().__init__(
             name="livekit.transcriber",
             title="transcriber",
             description="connects to a livekit room and transcribes the conversation",
             input_schema={
-                "type" : "object",
-                "additionalProperties" : False,
-                "required" : [ "room_name", "path" ],
-                "properties" : {
-                    "room_name" : {
-                        "type": "string"
-                    },
-                    "path" : {
-                        "type" : "string"
-                    }
-                }
+                "type": "object",
+                "additionalProperties": False,
+                "required": ["room_name", "path"],
+                "properties": {
+                    "room_name": {"type": "string"},
+                    "path": {"type": "string"},
+                },
             },
             output_schema={
-                "type" : "object",
-                "additionalProperties" : False,
-                "required" : [],
-                "properties" : {
-                }
-            }
+                "type": "object",
+                "additionalProperties": False,
+                "required": [],
+                "properties": {},
+            },
         )
         self._livekit_url = livekit_url
         self._livekit_api_key = livekit_api_key
         self._livekit_api_secret = livekit_api_secret
         self._livekit_identity = livekit_identity
-    async def _transcribe_participant(self, doc: RuntimeDocument, room: rtc.Room, participant: rtc.RemoteParticipant,
-            stt_stream: stt.SpeechStream, stt_forwarder: transcription.STTSegmentsForwarder
-        ):
-            logger.info("transcribing participant %s", participant.sid)
-            """Forward the transcription to the client and log the transcript in the console"""
-            async for ev in stt_stream:
-                logger.info("event from participant %s %s", participant.sid, ev)
-                if ev.type == stt.SpeechEventType.FINAL_TRANSCRIPT:
-                    logger.info("transcript: %s", ev.alternatives[0].text)
-                    if len(ev.alternatives) > 0:
-                        alt = ev.alternatives[0]
-                        doc.root.append_child(tag_name="speech", attributes={ "text":  alt.text, "startTime": alt.start_time, "endTime" : alt.end_time, "participantId" : participant.identity, "participantName" : participant.name })
-            logger.info("done forwarding %s", participant.sid)
+    async def _transcribe_participant(
+        self,
+        doc: RuntimeDocument,
+        room: rtc.Room,
+        participant: rtc.RemoteParticipant,
+        stt_stream: stt.SpeechStream,
+        stt_forwarder: transcription.STTSegmentsForwarder,
+    ):
+        logger.info("transcribing participant %s", participant.sid)
+        """Forward the transcription to the client and log the transcript in the console"""
+        async for ev in stt_stream:
+            logger.info("event from participant %s %s", participant.sid, ev)
+            if ev.type == stt.SpeechEventType.FINAL_TRANSCRIPT:
+                logger.info("transcript: %s", ev.alternatives[0].text)
+                if len(ev.alternatives) > 0:
+                    alt = ev.alternatives[0]
+                    doc.root.append_child(
+                        tag_name="speech",
+                        attributes={
+                            "text": alt.text,
+                            "startTime": alt.start_time,
+                            "endTime": alt.end_time,
+                            "participantId": participant.identity,
+                            "participantName": participant.name,
+                        },
+                    )
+        logger.info("done forwarding %s", participant.sid)
     def should_transcribe(self, p: rtc.Participant) -> bool:
         # don't transcribe other agents
         # todo: maybe have a better way to detect
         return ".agent" not in p.identity
     async def _wait_for_disconnect(self, room: rtc.Room):
         disconnected = asyncio.Future()
         def on_disconnected(_):
             disconnected.set_result(True)
         room.on("disconnected", on_disconnected)
         logger.info("waiting for disconnection")
-        await disconnected
+        await disconnected
     async def ask(self, *, context: AgentCallContext, arguments: dict):
         logger.info("Transcriber connecting to %s", arguments)
         output_path = arguments["path"]
         room_name = arguments["room_name"]
-        client = context.room
+        client = context.room
         doc = await client.sync.open(path=output_path)
         try:
+            vad = silero.VAD.load()
+            utils.http_context._new_session_ctx()
-            vad = silero.VAD.load()
-            utils.http_context._new_session_ctx()
             pending_tasks = list()
-            participantNames = dict[str,str]()
+            participantNames = dict[str, str]()
             sst_provider = openai.STT()
-            #sst_provider = fal.WizperSTT()
+            # sst_provider = fal.WizperSTT()
             room_options = rtc.RoomOptions(auto_subscribe=False)
             room = rtc.Room()
-            url = self._livekit_url if self._livekit_url is not None else os.getenv('LIVEKIT_URL')
-            api_key = self._livekit_api_key if self._livekit_api_key is not None else os.getenv('LIVEKIT_API_KEY')
-            api_secret = self._livekit_api_secret if self._livekit_api_secret is not None else os.getenv('LIVEKIT_API_SECRET')
-            identity = self._livekit_identity if self._livekit_identity is not None else os.getenv('AGENT_IDENTITY')
-            token = api.AccessToken(api_key=api_key, api_secret=api_secret) \
-                .with_identity(identity) \
-                .with_name("Agent") \
-                .with_kind("agent") \
-                .with_grants(api.VideoGrants(
-                    can_update_own_metadata=True,
-                    room_join=True,
-                    room=room_name,
-                    agent=True
-                ))
+            url = (
+                self._livekit_url
+                if self._livekit_url is not None
+                else os.getenv("LIVEKIT_URL")
+            )
+            api_key = (
+                self._livekit_api_key
+                if self._livekit_api_key is not None
+                else os.getenv("LIVEKIT_API_KEY")
+            )
+            api_secret = (
+                self._livekit_api_secret
+                if self._livekit_api_secret is not None
+                else os.getenv("LIVEKIT_API_SECRET")
+            )
+            identity = (
+                self._livekit_identity
+                if self._livekit_identity is not None
+                else os.getenv("AGENT_IDENTITY")
+            )
+            token = (
+                api.AccessToken(api_key=api_key, api_secret=api_secret)
+                .with_identity(identity)
+                .with_name("Agent")
+                .with_kind("agent")
+                .with_grants(
+                    api.VideoGrants(
+                        can_update_own_metadata=True,
+                        room_join=True,
+                        room=room_name,
+                        agent=True,
+                    )
+                )
+            )
             jwt = token.to_jwt()
             await room.connect(url=url, token=jwt, options=room_options)
-            logger.info(
-                "connected to room: %s",
-                room_name
-            )
+            logger.info("connected to room: %s", room_name)
             audio_streams = list[rtc.AudioStream]()
-            async def transcribe_track(participant: rtc.RemoteParticipant, track: rtc.Track):
+            async def transcribe_track(
+                participant: rtc.RemoteParticipant, track: rtc.Track
+            ):
                 audio_stream = rtc.AudioStream(track)
                 stt_forwarder = transcription.STTSegmentsForwarder(
                     room=room, participant=participant, track=track
                 )
                 audio_streams.append(audio_stream)
                 stt = sst_provider
-                if not sst_provider.capabilities.streaming:
+                if not sst_provider.capabilities.streaming:
                     stt = speech_to_text.StreamAdapter(
                         stt=stt,
                         vad=vad,
                     )
                 stt_stream = stt.stream()
-                pending_tasks.append(asyncio.create_task(self._transcribe_participant(doc, room, participant, stt_stream, stt_forwarder)))
+                pending_tasks.append(
+                    asyncio.create_task(
+                        self._transcribe_participant(
+                            doc, room, participant, stt_stream, stt_forwarder
+                        )
+                    )
+                )
                 async for ev in audio_stream:
                     stt_stream.push_frame(ev.frame)
@@ -189,49 +251,67 @@ class Transcriber(TaskRunner):
             def subscribe_if_needed(pub: rtc.RemoteTrackPublication):
                 if pub.kind == rtc.TrackKind.KIND_AUDIO:
                     pub.set_subscribed(True)
             for p in room.remote_participants.values():
                 participantNames[p.identity] = p.name
                 if self.should_transcribe(p):
                     for pub in p.track_publications.values():
                         subscribe_if_needed(pub)
             first_parts = dict[str, rtc.Participant]()
-            def on_transcript_event(segments: list[TranscriptionSegment], part: rtc.Participant | None, pub: rtc.TrackPublication | None = None) -> None:
+            def on_transcript_event(
+                segments: list[TranscriptionSegment],
+                part: rtc.Participant | None,
+                pub: rtc.TrackPublication | None = None,
+            ) -> None:
                 nonlocal room
                 logger.info("Got transcription segment %s %s %s", segments, part, pub)
                 for segment in segments:
-                    if segment.id not in first_parts and part != None:
+                    if segment.id not in first_parts and part is not None:
                         first_parts[segment.id] = part
                     if segment.final:
-                        if part == None and segment.id in first_parts:
+                        if part is None and segment.id in first_parts:
                             part = first_parts[segment.id]
                             first_parts.pop(segment.id)
-                        if part != None:
-                            doc.root.append_child(tag_name="speech", attributes={ "text":  segment.text, "startTime": segment.start_time, "endTime" : segment.end_time, "participantId" : part.identity, "participantName" : part.name })
+                        if part is not None:
+                            doc.root.append_child(
+                                tag_name="speech",
+                                attributes={
+                                    "text": segment.text,
+                                    "startTime": segment.start_time,
+                                    "endTime": segment.end_time,
+                                    "participantId": part.identity,
+                                    "participantName": part.name,
+                                },
+                            )
                         else:
-                            logger.warning("transcription was missing participant information")
+                            logger.warning(
+                                "transcription was missing participant information"
+                            )
             def on_participant_connected(p: rtc.RemoteParticipant):
                 participantNames[p.identity] = p.name
-            def on_track_published(pub: rtc.RemoteTrackPublication, p: rtc.RemoteParticipant):
+            def on_track_published(
+                pub: rtc.RemoteTrackPublication, p: rtc.RemoteParticipant
+            ):
                 if self.should_transcribe(p):
                     subscribe_if_needed(pub)
             subscriptions = dict()
-            def on_track_unpublished(pub: rtc.RemoteTrackPublication, p: rtc.RemoteParticipant):
+            def on_track_unpublished(
+                pub: rtc.RemoteTrackPublication, p: rtc.RemoteParticipant
+            ):
                 if pub in subscriptions:
                     logger.info("track unpublished, stopping transcription")
                     # todo: maybe could be more graceful
                     subscriptions[pub].cancel()
                     subscriptions.pop(pub)
             def on_track_subscribed(
                 track: rtc.Track,
                 publication: rtc.TrackPublication,
@@ -239,22 +319,23 @@ class Transcriber(TaskRunner):
             ):
                 if track.kind == rtc.TrackKind.KIND_AUDIO:
                     logger.info("transcribing track %s", track.sid)
-                    track_task = asyncio.create_task(transcribe_track(participant, track))
+                    track_task = asyncio.create_task(
+                        transcribe_track(participant, track)
+                    )
                     def on_transcription_done(t):
                         try:
                             t.result()
                         except Exception as e:
                             logger.error("Transcription failed", exc_info=e)
                     track_task.add_done_callback(on_transcription_done)
                     pending_tasks.append(track_task)
                     subscriptions[publication] = track_task
             for p in room.remote_participants.values():
                 on_participant_connected(p)
             room.on("participant_connected", on_participant_connected)
             room.on("track_published", on_track_published)
@@ -263,16 +344,15 @@ class Transcriber(TaskRunner):
             room.on("transcription_received", on_transcript_event)
             await self._wait_for_disconnect(room)
             logger.info("waited for termination")
             await room.disconnect()
             logger.info("closing audio streams")
             for stream in audio_streams:
                 await stream.aclose()
             logger.info("waiting for pending tasks")
             gather_future = asyncio.gather(*pending_tasks)
@@ -280,7 +360,7 @@ class Transcriber(TaskRunner):
             try:
                 await gather_future
             except Exception as e:
-                if isinstance(e, asyncio.CancelledError) == False:
+                if not isinstance(e, asyncio.CancelledError):
                     logger.warning("Did not shut down cleanly", exc_info=e)
                 pass
@@ -288,11 +368,10 @@ class Transcriber(TaskRunner):
         except Exception as e:
             logger.info("Transcription failed", exc_info=e)
         finally:
             await utils.http_context._close_http_ctx()
             logger.info("Transcription done")
             await asyncio.sleep(5)
             await client.sync.close(path=output_path)
             return {}

meshagent/livekit/agents/voice.py CHANGED Viewed

@@ -2,7 +2,15 @@ import logging
 import asyncio
 from asyncio import CancelledError
-from meshagent.api import RoomMessage, ErrorResponse, JsonResponse, FileResponse, Requirement, Participant, JsonResponse, EmptyResponse, TextResponse
+from meshagent.api import (
+    RoomMessage,
+    ErrorResponse,
+    Requirement,
+    Participant,
+    JsonResponse,
+    EmptyResponse,
+    TextResponse,
+)
 from meshagent.api.room_server_client import RoomClient
 from meshagent.agents import ToolResponseAdapter
@@ -14,34 +22,23 @@ from openai import AsyncOpenAI
 from meshagent.agents import AgentChatContext
 from livekit.agents import BackgroundAudioPlayer, AudioConfig, BuiltinAudioClip
-from typing import Annotated
 from livekit.plugins import openai, silero
-#from livekit.plugins.turn_detector.multilingual import MultilingualModel
-import uuid
-import asyncio
-import logging
+# from livekit.plugins.turn_detector.multilingual import MultilingualModel
-import os
-import json
+import json
 from typing import Any
-from livekit.plugins import openai
-from livekit.plugins import openai, silero
 from livekit import rtc
-from livekit.agents import Agent, AgentSession, RunContext
+from livekit.agents import RunContext
 from typing import Optional
-from copy import deepcopy
-from meshagent.api.schema_util import merge, prompt_schema
 from meshagent.agents import SingleRoomAgent
-from livekit.plugins.turn_detector.multilingual import MultilingualModel
 import re
@@ -52,7 +49,7 @@ def _replace_non_matching(text: str, allowed_chars: str, replacement: str) -> st
     """
     Replaces every character in `text` that does not match the given
     `allowed_chars` regex set with `replacement`.
     Parameters:
     -----------
     text : str
@@ -62,7 +59,7 @@ def _replace_non_matching(text: str, allowed_chars: str, replacement: str) -> st
         For example, "a-zA-Z0-9" will keep only letters and digits.
     replacement : str
         The string to replace non-matching characters with.
     Returns:
     --------
     str
@@ -72,35 +69,40 @@ def _replace_non_matching(text: str, allowed_chars: str, replacement: str) -> st
     pattern = rf"[^{allowed_chars}]"
     return re.sub(pattern, replacement, text)
 def safe_tool_name(name: str):
     return _replace_non_matching(name, "a-zA-Z0-9_-", "_")
 class VoiceConnection:
     def __init__(self, *, room: RoomClient, breakout_room: str):
         self.room = room
         self.breakout_room = breakout_room
-    async def __aenter__(self):
+    async def __aenter__(self):
         client = self.room
         room_options = rtc.RoomOptions(auto_subscribe=True)
         room = rtc.Room()
         self.livekit_room = room
-        connection_info = await client.livekit.get_connection_info(breakout_room=self.breakout_room)
-        await room.connect(url=connection_info.url, token=connection_info.token, options=room_options)
+        connection_info = await client.livekit.get_connection_info(
+            breakout_room=self.breakout_room
+        )
+        await room.connect(
+            url=connection_info.url, token=connection_info.token, options=room_options
+        )
         return self
     async def __aexit__(self, exc_type, exc, tb):
         await self.livekit_room.disconnect()
-class VoiceBot(SingleRoomAgent):
+class VoiceBot(SingleRoomAgent):
     def __init__(
         self,
         name: str,
@@ -112,20 +114,20 @@ class VoiceBot(SingleRoomAgent):
         auto_greet_prompt: Optional[str] = None,
         tool_adapter: ToolResponseAdapter = None,
         toolkits: list[Toolkit] = None,
-        requires: list[Requirement] = None
+        requires: list[Requirement] = None,
     ):
-        if toolkits == None:
+        if toolkits is None:
             toolkits = []
         self.toolkits = toolkits
-        if rules == None:
-            rules = [ "You are a helpful assistant communicating through voice." ]
+        if rules is None:
+            rules = ["You are a helpful assistant communicating through voice."]
         self.tool_adapter = tool_adapter
         self.auto_greet_message = auto_greet_message
         self.auto_greet_prompt = auto_greet_prompt
         self.rules = rules
         super().__init__(
@@ -133,7 +135,7 @@ class VoiceBot(SingleRoomAgent):
             description=description,
             title=title,
             labels=labels,
-            requires=requires
+            requires=requires,
         )
     async def start(self, *, room):
@@ -141,66 +143,75 @@ class VoiceBot(SingleRoomAgent):
         await room.local_participant.set_attribute("supports_voice", True)
         await room.messaging.enable()
         room.messaging.on("message", self.on_message)
     def on_message(self, message: RoomMessage):
         if message.type == "voice_call":
             breakout_room = message.message["breakout_room"]
             logger.info(f"joining breakout room {breakout_room}")
             def on_done(task: asyncio.Task):
                 try:
                     task.result()
-                except CancelledError as e:
-                        pass
+                except CancelledError:
+                    pass
                 except Exception as e:
                     logger.error(f"{e}", exc_info=e)
             for participant in self.room.messaging.remote_participants:
                 if participant.id == message.from_participant_id:
-                    task = asyncio.create_task(self.run_voice_agent(participant=participant, breakout_room=breakout_room))
+                    task = asyncio.create_task(
+                        self.run_voice_agent(
+                            participant=participant, breakout_room=breakout_room
+                        )
+                    )
                     task.add_done_callback(on_done)
                     return
             logger.error(f"unable to find participant {message.from_participant_id}")
     async def _wait_for_disconnect(self, room: rtc.Room):
         disconnected = asyncio.Future()
         def on_disconnected(_):
             disconnected.set_result(True)
         room.on("disconnected", on_disconnected)
         logger.info("waiting for disconnection")
         await disconnected
     async def make_function_tools(self, *, context: ToolContext):
-        toolkits = [
-            *await self.get_required_toolkits(context=context),
-            *self.toolkits
-        ]
+        toolkits = [*await self.get_required_toolkits(context=context), *self.toolkits]
         tools = []
         for toolkit in toolkits:
             for tool in toolkit.tools:
-                tools.append(self._make_function_tool(toolkits, context, tool.name, tool.description, tool.input_schema))
+                tools.append(
+                    self._make_function_tool(
+                        toolkits,
+                        context,
+                        tool.name,
+                        tool.description,
+                        tool.input_schema,
+                    )
+                )
         return tools
     def _make_function_tool(
-        self, toolkits: list[Toolkit], context: ToolContext, name: str, description: str | None, input_schema: dict
+        self,
+        toolkits: list[Toolkit],
+        context: ToolContext,
+        name: str,
+        description: str | None,
+        input_schema: dict,
     ) -> RawFunctionTool:
         name = safe_tool_name(name)
         async def _tool_called(raw_arguments: dict) -> Any:
             try:
                 tool = None
                 for toolkit in toolkits:
                     for t in toolkit.tools:
@@ -208,9 +219,7 @@ class VoiceBot(SingleRoomAgent):
                             tool = t
                 if tool is None:
-                    raise ToolError(
-                    f"Could not find tool {name}"
-                    )
+                    raise ToolError(f"Could not find tool {name}")
                 try:
                     logger.info(f"executing tool {name}: {raw_arguments}")
@@ -218,32 +227,31 @@ class VoiceBot(SingleRoomAgent):
                 except Exception as e:
                     logger.error(f"failed to call tool {tool.name}: {e}")
                     return ToolError("f{e}")
-                if self.tool_adapter == None:
+                if self.tool_adapter is None:
                     if isinstance(tool_result, ErrorResponse):
                         raise ToolError(tool_result.text)
                     if isinstance(tool_result, JsonResponse):
                         return json.dumps(tool_result.json)
                     if isinstance(tool_result, TextResponse):
                         return tool_result.text
                     if isinstance(tool_result, EmptyResponse):
                         return "success"
-                    if tool_result == None:
+                    if tool_result is None:
                         return "success"
                     raise ToolError(
                         f"Tool '{name}' returned an unexpected result {type(tool_result)}, attach a tool response adapter"
                     )
                 else:
-                    text = await self.tool_adapter.to_plain_text(room=context.room, response=tool_result)
-                    if text == None:
+                    text = await self.tool_adapter.to_plain_text(
+                        room=context.room, response=tool_result
+                    )
+                    if text is None:
                         text = "success"
                     return text
@@ -251,14 +259,17 @@ class VoiceBot(SingleRoomAgent):
                 logger.error("unable to call tool", exc_info=e)
                 raise
         return function_tool(
             _tool_called,
-            raw_schema={"name": name, "description": description, "strict" : True, "parameters": input_schema},
+            raw_schema={
+                "name": name,
+                "description": description,
+                "strict": True,
+                "parameters": input_schema,
+            },
         )
     async def create_agent(self, *, context: ToolContext, session: AgentSession):
         @function_tool
         async def say(context: RunContext, text: str):
             "says something out loud to the user"
@@ -266,8 +277,8 @@ class VoiceBot(SingleRoomAgent):
             session.say(text)
             return "success"
-        ctx=ChatContext()
+        ctx = ChatContext()
         initial_context = await self.init_chat_context()
         for message in initial_context.messages:
             ctx.add_message(role=message["role"], content=message["content"])
@@ -276,10 +287,7 @@ class VoiceBot(SingleRoomAgent):
             chat_ctx=ctx,
             instructions="\n".join(self.rules),
             allow_interruptions=True,
-            tools=[
-                *await self.make_function_tools(context=context),
-                say
-            ]
+            tools=[*await self.make_function_tools(context=context), say],
         )
         # agent = Agent(
@@ -291,88 +299,77 @@ class VoiceBot(SingleRoomAgent):
         #    tts=openai.TTS(),
         #    vad=silero.VAD.load(),
         #   allow_interruptions=True
-        #)
+        # )
     async def init_chat_context(self) -> AgentChatContext:
-        return AgentChatContext()
+        return AgentChatContext()
     def create_session(self, *, context: ToolContext) -> AgentSession:
+        token: str = context.room.protocol.token
+        url: str = context.room.room_url
-        token : str = context.room.protocol.token
-        url : str = context.room.room_url
         room_proxy_url = f"{url}/v1"
         oaiclient = AsyncOpenAI(
             api_key=token,
             base_url=room_proxy_url,
-            default_headers={
-                "Meshagent-Session" : context.room.session_id
-            }
+            default_headers={"Meshagent-Session": context.room.session_id},
         )
         session = AgentSession(
             max_tool_steps=50,
             allow_interruptions=True,
             vad=silero.VAD.load(),
-            stt=openai.STT(
-                client=oaiclient
-            ),
-            tts=openai.TTS(
-                client=oaiclient,
-                voice="echo"
-            ),
-            llm=openai.LLM(
-                client=oaiclient
-            ),
-            #turn_detection=MultilingualModel(),
+            stt=openai.STT(client=oaiclient),
+            tts=openai.TTS(client=oaiclient, voice="echo"),
+            llm=openai.LLM(client=oaiclient),
+            # turn_detection=MultilingualModel(),
         )
         return session
     async def run_voice_agent(self, *, participant: Participant, breakout_room: str):
-        async with VoiceConnection(room=self.room, breakout_room=breakout_room) as connection:
+        async with VoiceConnection(
+            room=self.room, breakout_room=breakout_room
+        ) as connection:
             logger.info("starting voice agent")
             context = ToolContext(
                 room=self.room,
                 caller=self.room.local_participant,
-                on_behalf_of=participant
+                on_behalf_of=participant,
             )
             session = self.create_session(context=context)
             agent = await self.create_agent(context=context, session=session)
             background_audio = BackgroundAudioPlayer(
                 thinking_sound=[
-                    #AudioConfig(
+                    # AudioConfig(
                     #    os.path.dirname(os.path.abspath(__file__)) +"/sfx/thinking.mp3", volume=0.2),
                     AudioConfig(BuiltinAudioClip.KEYBOARD_TYPING, volume=0.3),
                     AudioConfig(BuiltinAudioClip.KEYBOARD_TYPING2, volume=0.4),
                 ],
             )
-            await background_audio.start(room=connection.livekit_room, agent_session=session)
+            await background_audio.start(
+                room=connection.livekit_room, agent_session=session
+            )
             await session.start(agent=agent, room=connection.livekit_room)
-            if self.auto_greet_prompt != None:
+            if self.auto_greet_prompt is not None:
                 session.generate_reply(user_input=self.auto_greet_prompt)
-            if self.auto_greet_message != None:
+            if self.auto_greet_message is not None:
                 session.say(self.auto_greet_message)
             logger.info("started voice agent")
             await self._wait_for_disconnect(room=connection.livekit_room)
 class Voicebot(VoiceBot):
     def __init__(self, **kwargs):
-        logger.warning("Voicebot is deprecated, use VoiceBot instead. This class will be removed in a future release.")
+        logger.warning(
+            "Voicebot is deprecated, use VoiceBot instead. This class will be removed in a future release."
+        )
         super().__init__(**kwargs)

meshagent/livekit/livekit_protocol.py CHANGED Viewed

@@ -14,35 +14,37 @@ class LivekitProtocol(Protocol):
         self.remote = remote
         self.topic = topic
     async def __aenter__(self):
         self.room.on("data_received", self._on_data_packet)
         return await super().__aenter__()
     async def __aexit__(self, exc_type, exc, tb):
         self.room.off("data_received", self._on_data_packet)
-        return await super().__aexit__(exc_type, exc, tb)
-    async def send_packet(self, data:bytes) -> None:
+        return await super().__aexit__(exc_type, exc, tb)
-        logger.info("sending data packet %s  %s to %s", self.topic, self.remote.identity, self.room.remote_participants[self.remote.identity].sid)
+    async def send_packet(self, data: bytes) -> None:
+        logger.info(
+            "sending data packet %s  %s to %s",
+            self.topic,
+            self.remote.identity,
+            self.room.remote_participants[self.remote.identity].sid,
+        )
         await self.local.publish_data(
             payload=data,
-            topic = self.topic,
-            reliable = True,
-            destination_identities = [ self.remote.identity ],
+            topic=self.topic,
+            reliable=True,
+            destination_identities=[self.remote.identity],
         )
     def _on_data_packet(self, evt: rtc.DataPacket):
         if self.remote != evt.participant:
             return
-        logger.info("received data packet %s from %s", evt.topic, evt.participant.identity)
+        logger.info(
+            "received data packet %s from %s", evt.topic, evt.participant.identity
+        )
         if evt.topic == self.topic:
             self.receive_packet(evt.data)

meshagent/livekit/livekit_protocol_test.py CHANGED Viewed

@@ -15,60 +15,76 @@ import asyncio
 logger = logging.getLogger(__name__)
 @pytest.mark.asyncio
 async def test_protocol():
+    url = os.getenv("LIVEKIT_URL")
+    api_key = os.getenv("LIVEKIT_API_KEY")
+    api_secret = os.getenv("LIVEKIT_API_SECRET")
+    token1 = (
+        api.AccessToken(api_key=api_key, api_secret=api_secret)
+        .with_identity("core:user.test.agent-send")
+        .with_name("Agent")
+        .with_kind("agent")
+        .with_grants(
+            api.VideoGrants(
+                can_update_own_metadata=True,
+                room_join=True,
+                room="test-process",
+                agent=True,
+            )
+        )
+    )
-    url = os.getenv('LIVEKIT_URL')
-    api_key = os.getenv('LIVEKIT_API_KEY')
-    api_secret = os.getenv('LIVEKIT_API_SECRET')
-    token1 = api.AccessToken(api_key=api_key, api_secret=api_secret) \
-        .with_identity('core:user.test.agent-send') \
-        .with_name("Agent") \
-        .with_kind("agent") \
-        .with_grants(api.VideoGrants(
-            can_update_own_metadata=True,
-            room_join=True,
-            room="test-process",
-            agent=True
-        ))
     jwt1 = token1.to_jwt()
-    token2 = api.AccessToken(api_key=api_key, api_secret=api_secret) \
-        .with_identity('core:user.test.agent-recv') \
-        .with_name("Agent") \
-        .with_kind("agent") \
-        .with_grants(api.VideoGrants(
-            can_update_own_metadata=True,
-            room_join=True,
-            room="test-process",
-            agent=True
-        ))
-    jwt2 = token2.to_jwt()
-    room1 = rtc.Room()
+    token2 = (
+        api.AccessToken(api_key=api_key, api_secret=api_secret)
+        .with_identity("core:user.test.agent-recv")
+        .with_name("Agent")
+        .with_kind("agent")
+        .with_grants(
+            api.VideoGrants(
+                can_update_own_metadata=True,
+                room_join=True,
+                room="test-process",
+                agent=True,
+            )
+        )
+    )
+    jwt2 = token2.to_jwt()
+    room1 = rtc.Room()
     await room1.connect(url=url, token=jwt1)
-    room2 = rtc.Room()
+    room2 = rtc.Room()
     await room2.connect(url=url, token=jwt2)
     topic = "test_topic"
     while True:
         await asyncio.sleep(0.1)
-        if room2.local_participant.identity in room1.remote_participants and room1.local_participant.identity in room2.remote_participants:
+        if (
+            room2.local_participant.identity in room1.remote_participants
+            and room1.local_participant.identity in room2.remote_participants
+        ):
             break
-    async with livekit_protocol.LivekitProtocol(room = room1, remote = room1.remote_participants[room2.local_participant.identity], topic=topic) as proto1:
-        async  with livekit_protocol.LivekitProtocol(room = room2, remote = room2.remote_participants[room1.local_participant.identity], topic=topic) as proto2:
+    async with livekit_protocol.LivekitProtocol(
+        room=room1,
+        remote=room1.remote_participants[room2.local_participant.identity],
+        topic=topic,
+    ) as proto1:
+        async with livekit_protocol.LivekitProtocol(
+            room=room2,
+            remote=room2.remote_participants[room1.local_participant.identity],
+            topic=topic,
+        ) as proto2:
             test_data_builder = bytearray()
-            for i in range(1024*1024):
+            for i in range(1024 * 1024):
                 test_data_builder.append(i % 255)
             test_data = bytes(test_data_builder)
@@ -76,16 +92,17 @@ async def test_protocol():
             done = asyncio.Future[bool]()
             matches = 0
-            async def test_fn(protocol, id: int, type:str, data:bytes):
+            async def test_fn(protocol, id: int, type: str, data: bytes):
                 nonlocal matches
                 logger.info("Message received")
                 if test_data != data:
                     raise "data isn't equal"
-                matches+=1
+                matches += 1
                 if matches == 2:
                     done.set_result(True)
             proto2.register_handler("test", test_fn)
             await asyncio.sleep(1)
@@ -95,12 +112,5 @@ async def test_protocol():
             await done
     await room2.disconnect()
     await room1.disconnect()

meshagent/livekit/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.37"
1	+ __version__ = "0.0.38"

meshagent_livekit-0.0.38.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,37 @@
+Metadata-Version: 2.4
+Name: meshagent-livekit
+Version: 0.0.38
+Summary: Livekit support for Meshagent
+License-Expression: Apache-2.0
+Project-URL: Documentation, https://docs.meshagent.com
+Project-URL: Website, https://www.meshagent.com
+Project-URL: Source, https://www.meshagent.com
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pytest~=8.4
+Requires-Dist: pytest-asyncio~=0.26
+Requires-Dist: strip-markdown~=1.3
+Requires-Dist: livekit-api>=1.0
+Requires-Dist: livekit-agents~=1.1
+Requires-Dist: livekit-plugins-openai~=1.1
+Requires-Dist: livekit-plugins-silero~=1.1
+Requires-Dist: livekit-plugins-turn-detector~=1.1
+Requires-Dist: meshagent-api~=0.0.38
+Requires-Dist: meshagent-tools~=0.0.38
+Dynamic: license-file
+## MeshAgent LiveKit
+The ``meshagent.livekit`` package equips agents with real-time audio and voice capabilities via the LiveKit SDK.
+### VoiceBot
+The ``VoiceBot`` agent handles two-way voice conversations allowing users to interact with the agent verbally. Agents based on the ``VoiceBot`` class can be given the same tools as ``ChatBot`` based agents. This means you only need to write a tool once and the same tool can be used across both text and voice based agents. Check out the [Build and Deploy a Voice Agent](https://docs.meshagent.com/agents/standard/buildanddeployvoicebot) example to learn how to create a simple Voice Agent without tools then add built-in MeshAgent tools and custom tools to the agent.
+---
+### Learn more about MeshAgent on our website or check out the docs for additional examples!
+**Website**: [www.meshagent.com](https://www.meshagent.com/)
+**Documentation**: [docs.meshagent.com](https://docs.meshagent.com/)
+---

meshagent_livekit-0.0.38.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+meshagent/livekit/__init__.py,sha256=X78Z4yEg5XfkNKH0HiIdG4k1q5ktB-ampTuXHLNFrAw,58
+meshagent/livekit/livekit_protocol.py,sha256=5Zu4ymLWEGt5SGXLNu94gOeyjnjhaV6uTS2FhSdODqs,1470
+meshagent/livekit/livekit_protocol_test.py,sha256=o7yYxXad4tMazcxFkq44yW-A9tJ0Lk6WdZpG5ifxcU4,2980
+meshagent/livekit/version.py,sha256=R5QxTjVaID7odO0eBWpOnyCjNQxBZ7cpyruM_NMOoDc,23
+meshagent/livekit/agents/transcriber.py,sha256=oqfHBhBSwU62LbsO8WFiJg3Xoi4vkWlTFzgTxBP0erg,13297
+meshagent/livekit/agents/voice.py,sha256=STgjMSqzUgV9UAmleOy1vkgRXP93MDSYgiOO6Lo0peU,11964
+meshagent_livekit-0.0.38.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
+meshagent_livekit-0.0.38.dist-info/METADATA,sha256=pVJlT13Hr3NnFwMSuA60PG_U_uZoPgWcwlepxZl9k_w,1721
+meshagent_livekit-0.0.38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+meshagent_livekit-0.0.38.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
+meshagent_livekit-0.0.38.dist-info/RECORD,,

meshagent_livekit-0.0.37.dist-info/METADATA DELETED Viewed

@@ -1,24 +0,0 @@
-Metadata-Version: 2.4
-Name: meshagent-livekit
-Version: 0.0.37
-Summary: Livekit support for Meshagent
-License-Expression: Apache-2.0
-Project-URL: Documentation, https://docs.meshagent.com
-Project-URL: Website, https://www.meshagent.com
-Project-URL: Source, https://www.meshagent.com
-Requires-Python: >=3.12
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: pytest~=8.3
-Requires-Dist: pytest-asyncio~=0.26
-Requires-Dist: strip-markdown~=1.3
-Requires-Dist: livekit-api>=1.0
-Requires-Dist: livekit-agents~=1.1
-Requires-Dist: livekit-plugins-openai~=1.1
-Requires-Dist: livekit-plugins-silero~=1.1
-Requires-Dist: livekit-plugins-turn-detector~=1.1
-Requires-Dist: meshagent-api~=0.0.37
-Requires-Dist: meshagent-tools~=0.0.37
-Dynamic: license-file
-### Meshagent LiveKit

meshagent_livekit-0.0.37.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-meshagent/livekit/__init__.py,sha256=8zLGg-DfQhnDl2Ky0n-zXpN-8e-g7iR0AcaI4l4Vvpk,32
-meshagent/livekit/livekit_protocol.py,sha256=K9yP-qpxag5_7TXlKjFEx3cOJJJpYI_z6zGzFHoN1Hs,1421
-meshagent/livekit/livekit_protocol_test.py,sha256=n_ZQjt7n4u7TM7eENzH8L0tw8LvypS_JHF_PuJ2o6h4,2836
-meshagent/livekit/version.py,sha256=JaGEpJ5xP3R4j7pGgCziGajlIRjy1_NJdv_OaXPQius,22
-meshagent/livekit/agents/transcriber.py,sha256=Dq1Ijx4gmA-0jQGM-f3w7X-JIZpkRCFDxWae9AOwz-k,12290
-meshagent/livekit/agents/voice.py,sha256=CHbzuLdkxEcuHfC1skjl_0KHV-hmf8i6NWxV7AYTWyc,12348
-meshagent_livekit-0.0.37.dist-info/licenses/LICENSE,sha256=eTt0SPW-sVNdkZe9PS_S8WfCIyLjRXRl7sUBWdlteFg,10254
-meshagent_livekit-0.0.37.dist-info/METADATA,sha256=tdVvbqxsQp0faK5xMyHqFUdLUd2nXywubcydiE2B9kM,790
-meshagent_livekit-0.0.37.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-meshagent_livekit-0.0.37.dist-info/top_level.txt,sha256=GlcXnHtRP6m7zlG3Df04M35OsHtNXy_DY09oFwWrH74,10
-meshagent_livekit-0.0.37.dist-info/RECORD,,

{meshagent_livekit-0.0.37.dist-info → meshagent_livekit-0.0.38.dist-info}/WHEEL RENAMED Viewed

File without changes

{meshagent_livekit-0.0.37.dist-info → meshagent_livekit-0.0.38.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{meshagent_livekit-0.0.37.dist-info → meshagent_livekit-0.0.38.dist-info}/top_level.txt RENAMED Viewed

File without changes

meshagent-livekit 0.0.37__py3-none-any.whl → 0.0.38__py3-none-any.whl

Potentially problematic release.

meshagent-livekit 0.0.37py3-none-any.whl → 0.0.38py3-none-any.whl