PyPI - simli-ai - Versions diffs - 0.1.0__tar.gz - Mend

simli-ai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

simli_ai-0.1.0/PKG-INFO +15 -0
simli_ai-0.1.0/README.md +1 -0
simli_ai-0.1.0/pyproject.toml +26 -0
simli_ai-0.1.0/setup.cfg +4 -0
simli_ai-0.1.0/simli/__init__.py +5 -0
simli_ai-0.1.0/simli/renderers.py +131 -0
simli_ai-0.1.0/simli/simli.py +319 -0
simli_ai-0.1.0/simli/utils.py +0 -0
simli_ai-0.1.0/simli_ai.egg-info/PKG-INFO +15 -0
simli_ai-0.1.0/simli_ai.egg-info/SOURCES.txt +11 -0
simli_ai-0.1.0/simli_ai.egg-info/dependency_links.txt +1 -0
simli_ai-0.1.0/simli_ai.egg-info/requires.txt +10 -0
simli_ai-0.1.0/simli_ai.egg-info/top_level.txt +1 -0

simli_ai-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,15 @@
+Metadata-Version: 2.1
+Name: simli-ai
+Version: 0.1.0
+Summary: Add your description here
+Description-Content-Type: text/markdown
+Requires-Dist: aiortc
+Requires-Dist: websockets
+Requires-Dist: requests
+Provides-Extra: local
+Requires-Dist: opencv-python; extra == "local"
+Requires-Dist: pyaudio; extra == "local"
+Provides-Extra: processing
+Requires-Dist: numpy; extra == "processing"
+to check if connection works just uncomment the consumeTrack call in registerTrack

simli_ai-0.1.0/README.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ to check if connection works just uncomment the consumeTrack call in registerTrack

simli_ai-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,26 @@
+[project]
+name = "simli-ai"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+dependencies = [
+    "aiortc",
+    "websockets",
+    "requests",
+]
+[project.optional-dependencies]
+local = [
+    "opencv-python",
+    "pyaudio",
+]
+processing = [
+    "numpy",
+]
+[tool.uv]
+dev-dependencies = [
+    "python-dotenv",
+    "ruff",
+    "uv",
+]

simli_ai-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

simli_ai-0.1.0/simli/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .simli import SimliClient, SimliConfig
+from . import renderers
+from . import utils
+__all__ = ["SimliClient", "SimliConfig", "renderers", "utils"]

simli_ai-0.1.0/simli/renderers.py ADDED Viewed

@@ -0,0 +1,131 @@
+import asyncio
+import av
+import av.audio
+import av.container
+import av.packet
+import av.video
+from .simli import SimliClient
+class NPArrayRenderer:
+    pass
+class FileRenderer:
+    """
+    Dumps the video and audio stream from a :class:`SimliClient` to a file.
+    """
+    def __init__(
+        self,
+        client: SimliClient,
+        filename: str = "output.mp4",
+        videoCodec: str = "h264",
+        audioCodec: str = "aac",
+    ):
+        self.client = client
+        self.videoStream: av.video.VideoStream
+        self.audioStream: av.audio.AudioStream
+        self.container: av.container.OutputContainer
+        self.filename = filename
+        self.videoCodec = videoCodec
+        self.audioCodec = audioCodec
+    async def render(self):
+        """
+        Start rendering the video and audio stream to the file.
+        """
+        self.container = av.open(self.filename, "w")
+        self.videoStream = self.container.add_stream(self.videoCodec, rate=30)
+        self.videoStream.pix_fmt = "yuv420p"
+        self.audioStream = self.container.add_stream(self.audioCodec)
+        videoEncodeTask = asyncio.create_task(self.encodeVideo())
+        audioEncodeTask = asyncio.create_task(self.encodeAudio())
+        await asyncio.gather(videoEncodeTask, audioEncodeTask)
+        # Close the file
+        self.container.close()
+    async def encodeVideo(self):
+        async for frame in self.client.getVideoStreamIterator("yuva420p"):
+            if frame is None:
+                break
+            self.videoStream.width = frame.width
+            self.videoStream.height = frame.height
+            for packet in self.videoStream.encode(frame):
+                self.container.mux(packet)
+        for packet in self.videoStream.encode():
+            self.container.mux(packet)
+    async def encodeAudio(self):
+        async for frame in self.client.getAudioStreamIterator():
+            if frame is None:
+                break
+            for packet in self.audioStream.encode(frame):
+                self.container.mux(packet)
+        for packet in self.audioStream.encode():
+            self.container.mux(packet)
+class LocalRenderer:
+    """
+    Outputs the video and audio steram to local display and speaker respectively. Can not be used in a headless environment. Uses OpenCV for video and PyAudio for audio.
+    """
+    def __init__(self, client: SimliClient, windowName: str = "Simli"):
+        try:
+            import cv2
+            import pyaudio  # type: ignore # noqa: F821
+        except ImportError:
+            raise ImportError(
+                "cv2 and pyaudio are required for LocalRenderer, Install optional dependencies using \n\"pip install 'simli[local]'\""
+            )
+        self.client = client
+        self.videoOutput = cv2.namedWindow(
+            windowName, cv2.WINDOW_NORMAL | cv2.WINDOW_AUTOSIZE
+        )
+        cv2.resizeWindow(windowName, (512, 512))
+        self.videoBuffer = []
+        self.audioFormat = pyaudio.paInt16
+        self.audioChannels = 2
+        self.audioRate = 48000
+        self.pyaudio = pyaudio.PyAudio()
+        self.audioOutput = self.pyaudio.open(
+            format=self.audioFormat,
+            channels=self.audioChannels,
+            rate=self.audioRate,
+            output=True,
+            frames_per_buffer=1024,
+        )
+        self.audioBuffer = []
+    async def render(self):
+        """
+        Start displaying the video
+        """
+        videoTask = asyncio.create_task(self.displayVideo())
+        audioTask = asyncio.create_task(self.playAudio())
+        await asyncio.gather(videoTask, audioTask)
+    async def displayVideo(self):
+        async for frame in self.client.getVideoStreamIterator("rgb24"):
+            if frame is None:
+                cv2.destroyAllWindows()  # type: ignore # noqa: F821
+                break
+            self.videoBuffer.append(frame.to_ndarray())
+            cv2.imshow("Simli", cv2.cvtColor(self.videoBuffer[0], cv2.COLOR_RGB2BGR))  # type: ignore # noqa: F821
+            self.videoBuffer.pop(0)
+            cv2.waitKey(1)  # type: ignore # noqa: F821
+    async def playAudio(self):
+        async for frame in self.client.getAudioStreamIterator():
+            if frame is None:
+                break
+            self.audioBuffer.append(frame.to_ndarray())
+            self.audioOutput.write(self.audioBuffer[0].tobytes())
+            self.audioBuffer.pop(0)

simli_ai-0.1.0/simli/simli.py ADDED Viewed

@@ -0,0 +1,319 @@
+import time
+import asyncio
+import json
+from dataclasses import dataclass
+import requests
+from aiortc import (
+    RTCPeerConnection,
+    RTCSessionDescription,
+    RTCIceServer,
+    RTCConfiguration,
+)
+from aiortc.mediastreams import MediaStreamTrack, VideoStreamTrack, AudioStreamTrack
+from av import VideoFrame, AudioFrame
+from av.audio.resampler import AudioResampler
+import websockets.asyncio.client
+@dataclass
+class SimliConfig:
+    apiKey: str
+    faceId: str
+    syncAudio: bool = True
+    handleSilence: bool = True
+    maxSessionLength: int = 600
+    maxIdleTime: int = 30
+class VideoFrameReceiver(MediaStreamTrack):
+    kind = "video"
+    def __init__(self, source: VideoStreamTrack):
+        self.source = source
+    async def recv(self) -> VideoFrame:
+        try:
+            frame: VideoFrame = await self.source.recv()
+            return frame
+        except Exception as e:
+            print(e)
+            return None
+class AudioFrameReceiver(MediaStreamTrack):
+    kind = "audio"
+    def __init__(self, source: AudioStreamTrack):
+        super().__init__()
+        self.source = source
+    async def recv(self) -> AudioFrame:
+        try:
+            frame: AudioFrame = await self.source.recv()
+            return frame
+        except Exception as e:
+            print(e)
+            return None
+class SimliClient:
+    """
+    SimliConnection is the main class for interacting with the Simli API. It is used to establish a connection with the Simli servers and receive audio and video data from the servers.
+    For more information on the Simli API, visit https://docs.simli.com/
+    """
+    def __init__(self, config: SimliConfig, userTrunServer: bool = False):
+        """
+        :param config: SimliConfig object containing the API Key and Face ID and other optional parameters for the Simli API refer to https://docs.simli.com for more information
+        :param useTrunServer: Whether to use the TURN server provided by the Simli API, if set to False, the default STUN server will be used, use only if you are having issues with the default STUN server
+        """
+        self.config = config
+        self.pc: RTCPeerConnection = None
+        self.iceConfig: list[RTCIceServer] = None
+        self.ready = False
+        self.run = True
+        self.receiverTask: asyncio.Task = None
+        self.pingTask: asyncio.Task = None
+        self.stopping = False
+        self.useTrunServer: bool = False
+    async def Initialize(
+        self,
+        latencyInterval: int = 60,
+    ):
+        """
+        Start Simli Connection
+        :param get_latency: Interval between pings to measure the latency between the client and the simli servers in seconds, set to 0 to disable
+        """
+        configJson = self.config.__dict__
+        response = requests.post(
+            "https://api.simli.ai/startAudioToVideoSession", json=configJson
+        )
+        response.raise_for_status()
+        self.session_token = response.json()["session_token"]
+        if self.useTrunServer:
+            self.iceJSON = requests.post(
+                "https://api.simli.ai/getIceServers",
+                json={"apiKey": self.config.apiKey},
+            )
+            self.iceJSON.raise_for_status()
+            self.iceJSON = self.iceJSON.json()
+            self.iceConfig = []
+            for server in self.iceJSON:
+                self.iceConfig.append(RTCIceServer(**server))
+        else:
+            self.iceConfig = [
+                RTCIceServer(
+                    urls=[
+                        "stun:stun.l.google.com:19302",
+                    ]
+                )
+            ]
+        self.pc = RTCPeerConnection(RTCConfiguration(iceServers=self.iceConfig))
+        self.pc.addTransceiver("audio", direction="recvonly")
+        self.pc.addTransceiver("video", direction="recvonly")
+        self.pc.on("track", self.registerTrack)
+        self.dc = self.pc.createDataChannel("datachannel", ordered=True)
+        await self.pc.setLocalDescription(await self.pc.createOffer())
+        while self.pc.iceGatheringState != "complete":
+            await asyncio.sleep(0.001)
+        jsonOffer = self.pc.localDescription.__dict__
+        self.wsConnection: websockets.asyncio.client.ClientConnection = (
+            websockets.asyncio.client.connect("wss://api.simli.ai/StartWebRTCSession")
+        )
+        self.wsConnection = await self.wsConnection.__aenter__()
+        await self.wsConnection.send(json.dumps(jsonOffer))
+        await self.wsConnection.recv()  # ACK
+        answer = await self.wsConnection.recv()  # ANSWER
+        await self.wsConnection.send(self.session_token)
+        await self.wsConnection.recv()  # ACK
+        ready = await self.wsConnection.recv()  # START MESSAGE
+        if ready == "START":
+            self.ready = True
+        await self.pc.setRemoteDescription(RTCSessionDescription(**json.loads(answer)))
+        self.receiverTask = asyncio.create_task(self.handleMessages())
+        if latencyInterval > 0:
+            self.pingTask = asyncio.create_task(self.ping(latencyInterval))
+    def registerTrack(self, track: MediaStreamTrack):
+        print("Registering track", track.kind)
+        if track.kind == "audio":
+            receiver = AudioFrameReceiver(track)
+            self.audioReceiver = receiver
+        elif track.kind == "video":
+            receiver = VideoFrameReceiver(track)
+            self.videoReceiver = receiver
+    async def handleMessages(self):
+        """
+        Internal: Handles messages from the websocket connection. Called in the Initialize function
+        """
+        while self.run:
+            if not self.ready:
+                await asyncio.sleep(0.001)
+                continue
+            message = await self.wsConnection.recv()
+            if message == "STOP":
+                self.run = False
+                print("Terminating session due to STOP message")
+                await self.stop()
+                break
+            elif "error" in message:
+                print("Error:", message)
+                await self.stop()
+                break
+            elif "pong" in message:
+                pingTime = float(message.split(" ")[1])
+                print(f"Ping: {time.time() - pingTime}")
+            elif message != "ACK":
+                print(message)
+    async def ping(self, interval: int):
+        """
+        Internal: Pings the simli servers to measure the latency between the client and the simli servers. Called in the Initialize function
+        """
+        while self.run:
+            pingTime = time.time()
+            await self.send(f"ping {pingTime}")
+            await asyncio.sleep(interval)
+    async def stop(self):
+        """
+        Gracefully terminates the connection
+        """
+        if self.stopping:
+            return
+        self.stopping = True
+        try:
+            await self.wsConnection.send("DONE")
+        except Exception:
+            pass
+        try:
+            while await asyncio.wait_for(self.getNextAudioFrame(), timeout=0.03):
+                continue
+        except asyncio.TimeoutError:
+            pass
+        try:
+            while await asyncio.wait_for(self.getNextVideoFrame(), timeout=0.03):
+                continue
+        except asyncio.TimeoutError:
+            pass
+        try:
+            print("Stopping Simli Connection")
+            await self.wsConnection.__aexit__(None, None, None)
+            self.receiverTask.cancel()
+            if self.pingTask:
+                self.pingTask.cancel()
+            await self.pc.close()
+        except Exception:
+            import traceback
+            traceback.print_exc()
+    async def send(self, data: str | bytes):
+        """
+        Sends Audio data or control messages to the simli servers
+        """
+        if not self.ready:
+            raise Exception("WSDC Not ready, please wait until self.ready is True")
+        for i in range(0, len(data), 6000):
+            await self.wsConnection.send(data[i : i + 6000])
+    async def sendSilence(self, duration: float = 0.1875):
+        """
+        Sends silence to the simli servers for the specified duration in seconds
+        Can be used without args to bootstrap the connection to start receiving silent audio and video frames
+        """
+        await self.send((0).to_bytes(2, "little") * int(16000 * duration))
+    async def clearBuffer(self):
+        """
+        Clears the buffered audio on the simli servers, useful for interrupting the current audio spoken by the avatar
+        """
+        await self.send("SKIP")
+    async def getVideoStreamIterator(self, targetFormat: str = "rgb24"):
+        """
+        Returns the video output as an async iterator with the specified format (default: rgb24)
+        Refer to https://pyav.org for more information on the available formats
+        """
+        first = True
+        while True:
+            try:
+                if first:
+                    frame = await self.videoReceiver.recv()
+                else:
+                    frame = await asyncio.wait_for(
+                        self.videoReceiver.recv(), timeout=1 / 15
+                    )
+            except asyncio.TimeoutError:
+                return
+            if targetFormat != "yuva420p":
+                frame = frame.reformat(format=targetFormat)
+            yield frame
+    async def getAudioStreamIterator(self, targetSampleRate: int = 48000):
+        """
+        Returns the audio output as an async iterator
+        """
+        resampler = None
+        if targetSampleRate != 48000:  # default WebRTC sample rate
+            resampler = AudioResampler(
+                format="s16", layout="stereo", rate=targetSampleRate
+            )
+        first = True
+        while True:
+            try:
+                if first:
+                    frame = await self.audioReceiver.recv()
+                else:
+                    frame = await asyncio.wait_for(
+                        self.audioReceiver.recv(), timeout=0.04
+                    )
+            except asyncio.TimeoutError:
+                return
+            if resampler:
+                frame = resampler.resample(frame)[0]
+            yield frame
+    async def getNextVideoFrame(self):
+        """
+        Returns the next video frame in the specified format (default: rgb24)
+        """
+        return await self.videoReceiver.recv()
+    async def getNextAudioFrame(self):
+        """
+        Returns the next audio frame
+        """
+        return await self.audioReceiver.recv()
+    async def __aenter__(self):
+        await self.Initialize()
+        return self
+    async def __aexit__(self, exc_type, exc, tb):
+        await self.stop()
+async def consumeTrack(
+    track: MediaStreamTrack,
+    connection: SimliClient,
+):
+    """
+    Used for debugging without dumping the output anywhere, just consumes the track and prints the data
+    """
+    while connection.run:
+        print(await track.recv())

simli_ai-0.1.0/simli/utils.py ADDED Viewed

File without changes

simli_ai-0.1.0/simli_ai.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,15 @@
+Metadata-Version: 2.1
+Name: simli-ai
+Version: 0.1.0
+Summary: Add your description here
+Description-Content-Type: text/markdown
+Requires-Dist: aiortc
+Requires-Dist: websockets
+Requires-Dist: requests
+Provides-Extra: local
+Requires-Dist: opencv-python; extra == "local"
+Requires-Dist: pyaudio; extra == "local"
+Provides-Extra: processing
+Requires-Dist: numpy; extra == "processing"
+to check if connection works just uncomment the consumeTrack call in registerTrack

simli_ai-0.1.0/simli_ai.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,11 @@
+README.md
+pyproject.toml
+simli/__init__.py
+simli/renderers.py
+simli/simli.py
+simli/utils.py
+simli_ai.egg-info/PKG-INFO
+simli_ai.egg-info/SOURCES.txt
+simli_ai.egg-info/dependency_links.txt
+simli_ai.egg-info/requires.txt
+simli_ai.egg-info/top_level.txt

simli_ai-0.1.0/simli_ai.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

simli_ai-0.1.0/simli_ai.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,10 @@
+aiortc
+websockets
+requests
+[local]
+opencv-python
+pyaudio
+[processing]
+numpy

simli_ai-0.1.0/simli_ai.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ simli