simli-ai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.1
2
+ Name: simli-ai
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Description-Content-Type: text/markdown
6
+ Requires-Dist: aiortc
7
+ Requires-Dist: websockets
8
+ Requires-Dist: requests
9
+ Provides-Extra: local
10
+ Requires-Dist: opencv-python; extra == "local"
11
+ Requires-Dist: pyaudio; extra == "local"
12
+ Provides-Extra: processing
13
+ Requires-Dist: numpy; extra == "processing"
14
+
15
+ to check if connection works just uncomment the consumeTrack call in registerTrack
@@ -0,0 +1 @@
1
+ to check if connection works just uncomment the consumeTrack call in registerTrack
@@ -0,0 +1,26 @@
1
+ [project]
2
+ name = "simli-ai"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ dependencies = [
7
+ "aiortc",
8
+ "websockets",
9
+ "requests",
10
+ ]
11
+
12
+ [project.optional-dependencies]
13
+ local = [
14
+ "opencv-python",
15
+ "pyaudio",
16
+ ]
17
+ processing = [
18
+ "numpy",
19
+ ]
20
+
21
+ [tool.uv]
22
+ dev-dependencies = [
23
+ "python-dotenv",
24
+ "ruff",
25
+ "uv",
26
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,5 @@
1
+ from .simli import SimliClient, SimliConfig
2
+ from . import renderers
3
+ from . import utils
4
+
5
+ __all__ = ["SimliClient", "SimliConfig", "renderers", "utils"]
@@ -0,0 +1,131 @@
1
+ import asyncio
2
+
3
+ import av
4
+ import av.audio
5
+ import av.container
6
+ import av.packet
7
+ import av.video
8
+
9
+ from .simli import SimliClient
10
+
11
+
12
+ class NPArrayRenderer:
13
+ pass
14
+
15
+
16
+ class FileRenderer:
17
+ """
18
+ Dumps the video and audio stream from a :class:`SimliClient` to a file.
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ client: SimliClient,
24
+ filename: str = "output.mp4",
25
+ videoCodec: str = "h264",
26
+ audioCodec: str = "aac",
27
+ ):
28
+ self.client = client
29
+ self.videoStream: av.video.VideoStream
30
+ self.audioStream: av.audio.AudioStream
31
+ self.container: av.container.OutputContainer
32
+ self.filename = filename
33
+ self.videoCodec = videoCodec
34
+ self.audioCodec = audioCodec
35
+
36
+ async def render(self):
37
+ """
38
+ Start rendering the video and audio stream to the file.
39
+ """
40
+ self.container = av.open(self.filename, "w")
41
+
42
+ self.videoStream = self.container.add_stream(self.videoCodec, rate=30)
43
+ self.videoStream.pix_fmt = "yuv420p"
44
+
45
+ self.audioStream = self.container.add_stream(self.audioCodec)
46
+ videoEncodeTask = asyncio.create_task(self.encodeVideo())
47
+ audioEncodeTask = asyncio.create_task(self.encodeAudio())
48
+ await asyncio.gather(videoEncodeTask, audioEncodeTask)
49
+ # Close the file
50
+ self.container.close()
51
+
52
+ async def encodeVideo(self):
53
+ async for frame in self.client.getVideoStreamIterator("yuva420p"):
54
+ if frame is None:
55
+ break
56
+ self.videoStream.width = frame.width
57
+ self.videoStream.height = frame.height
58
+ for packet in self.videoStream.encode(frame):
59
+ self.container.mux(packet)
60
+ for packet in self.videoStream.encode():
61
+ self.container.mux(packet)
62
+
63
+ async def encodeAudio(self):
64
+ async for frame in self.client.getAudioStreamIterator():
65
+ if frame is None:
66
+ break
67
+ for packet in self.audioStream.encode(frame):
68
+ self.container.mux(packet)
69
+ for packet in self.audioStream.encode():
70
+ self.container.mux(packet)
71
+
72
+
73
+ class LocalRenderer:
74
+ """
75
+ Outputs the video and audio steram to local display and speaker respectively. Can not be used in a headless environment. Uses OpenCV for video and PyAudio for audio.
76
+ """
77
+
78
+ def __init__(self, client: SimliClient, windowName: str = "Simli"):
79
+ try:
80
+ import cv2
81
+ import pyaudio # type: ignore # noqa: F821
82
+ except ImportError:
83
+ raise ImportError(
84
+ "cv2 and pyaudio are required for LocalRenderer, Install optional dependencies using \n\"pip install 'simli[local]'\""
85
+ )
86
+
87
+ self.client = client
88
+ self.videoOutput = cv2.namedWindow(
89
+ windowName, cv2.WINDOW_NORMAL | cv2.WINDOW_AUTOSIZE
90
+ )
91
+ cv2.resizeWindow(windowName, (512, 512))
92
+ self.videoBuffer = []
93
+
94
+ self.audioFormat = pyaudio.paInt16
95
+ self.audioChannels = 2
96
+ self.audioRate = 48000
97
+ self.pyaudio = pyaudio.PyAudio()
98
+ self.audioOutput = self.pyaudio.open(
99
+ format=self.audioFormat,
100
+ channels=self.audioChannels,
101
+ rate=self.audioRate,
102
+ output=True,
103
+ frames_per_buffer=1024,
104
+ )
105
+ self.audioBuffer = []
106
+
107
+ async def render(self):
108
+ """
109
+ Start displaying the video
110
+ """
111
+ videoTask = asyncio.create_task(self.displayVideo())
112
+ audioTask = asyncio.create_task(self.playAudio())
113
+ await asyncio.gather(videoTask, audioTask)
114
+
115
+ async def displayVideo(self):
116
+ async for frame in self.client.getVideoStreamIterator("rgb24"):
117
+ if frame is None:
118
+ cv2.destroyAllWindows() # type: ignore # noqa: F821
119
+ break
120
+ self.videoBuffer.append(frame.to_ndarray())
121
+ cv2.imshow("Simli", cv2.cvtColor(self.videoBuffer[0], cv2.COLOR_RGB2BGR)) # type: ignore # noqa: F821
122
+ self.videoBuffer.pop(0)
123
+ cv2.waitKey(1) # type: ignore # noqa: F821
124
+
125
+ async def playAudio(self):
126
+ async for frame in self.client.getAudioStreamIterator():
127
+ if frame is None:
128
+ break
129
+ self.audioBuffer.append(frame.to_ndarray())
130
+ self.audioOutput.write(self.audioBuffer[0].tobytes())
131
+ self.audioBuffer.pop(0)
@@ -0,0 +1,319 @@
1
+ import time
2
+ import asyncio
3
+ import json
4
+ from dataclasses import dataclass
5
+
6
+ import requests
7
+ from aiortc import (
8
+ RTCPeerConnection,
9
+ RTCSessionDescription,
10
+ RTCIceServer,
11
+ RTCConfiguration,
12
+ )
13
+ from aiortc.mediastreams import MediaStreamTrack, VideoStreamTrack, AudioStreamTrack
14
+ from av import VideoFrame, AudioFrame
15
+ from av.audio.resampler import AudioResampler
16
+ import websockets.asyncio.client
17
+
18
+
19
+ @dataclass
20
+ class SimliConfig:
21
+ apiKey: str
22
+ faceId: str
23
+ syncAudio: bool = True
24
+ handleSilence: bool = True
25
+ maxSessionLength: int = 600
26
+ maxIdleTime: int = 30
27
+
28
+
29
+ class VideoFrameReceiver(MediaStreamTrack):
30
+ kind = "video"
31
+
32
+ def __init__(self, source: VideoStreamTrack):
33
+ self.source = source
34
+
35
+ async def recv(self) -> VideoFrame:
36
+ try:
37
+ frame: VideoFrame = await self.source.recv()
38
+ return frame
39
+ except Exception as e:
40
+ print(e)
41
+ return None
42
+
43
+
44
+ class AudioFrameReceiver(MediaStreamTrack):
45
+ kind = "audio"
46
+
47
+ def __init__(self, source: AudioStreamTrack):
48
+ super().__init__()
49
+ self.source = source
50
+
51
+ async def recv(self) -> AudioFrame:
52
+ try:
53
+ frame: AudioFrame = await self.source.recv()
54
+ return frame
55
+ except Exception as e:
56
+ print(e)
57
+ return None
58
+
59
+
60
+ class SimliClient:
61
+ """
62
+ SimliConnection is the main class for interacting with the Simli API. It is used to establish a connection with the Simli servers and receive audio and video data from the servers.
63
+ For more information on the Simli API, visit https://docs.simli.com/
64
+ """
65
+
66
+ def __init__(self, config: SimliConfig, userTrunServer: bool = False):
67
+ """
68
+ :param config: SimliConfig object containing the API Key and Face ID and other optional parameters for the Simli API refer to https://docs.simli.com for more information
69
+ :param useTrunServer: Whether to use the TURN server provided by the Simli API, if set to False, the default STUN server will be used, use only if you are having issues with the default STUN server
70
+
71
+ """
72
+ self.config = config
73
+ self.pc: RTCPeerConnection = None
74
+ self.iceConfig: list[RTCIceServer] = None
75
+ self.ready = False
76
+ self.run = True
77
+ self.receiverTask: asyncio.Task = None
78
+ self.pingTask: asyncio.Task = None
79
+ self.stopping = False
80
+ self.useTrunServer: bool = False
81
+
82
+ async def Initialize(
83
+ self,
84
+ latencyInterval: int = 60,
85
+ ):
86
+ """
87
+ Start Simli Connection
88
+
89
+ :param get_latency: Interval between pings to measure the latency between the client and the simli servers in seconds, set to 0 to disable
90
+ """
91
+ configJson = self.config.__dict__
92
+
93
+ response = requests.post(
94
+ "https://api.simli.ai/startAudioToVideoSession", json=configJson
95
+ )
96
+ response.raise_for_status()
97
+ self.session_token = response.json()["session_token"]
98
+ if self.useTrunServer:
99
+ self.iceJSON = requests.post(
100
+ "https://api.simli.ai/getIceServers",
101
+ json={"apiKey": self.config.apiKey},
102
+ )
103
+ self.iceJSON.raise_for_status()
104
+ self.iceJSON = self.iceJSON.json()
105
+ self.iceConfig = []
106
+ for server in self.iceJSON:
107
+ self.iceConfig.append(RTCIceServer(**server))
108
+ else:
109
+ self.iceConfig = [
110
+ RTCIceServer(
111
+ urls=[
112
+ "stun:stun.l.google.com:19302",
113
+ ]
114
+ )
115
+ ]
116
+ self.pc = RTCPeerConnection(RTCConfiguration(iceServers=self.iceConfig))
117
+ self.pc.addTransceiver("audio", direction="recvonly")
118
+ self.pc.addTransceiver("video", direction="recvonly")
119
+ self.pc.on("track", self.registerTrack)
120
+ self.dc = self.pc.createDataChannel("datachannel", ordered=True)
121
+
122
+ await self.pc.setLocalDescription(await self.pc.createOffer())
123
+ while self.pc.iceGatheringState != "complete":
124
+ await asyncio.sleep(0.001)
125
+
126
+ jsonOffer = self.pc.localDescription.__dict__
127
+ self.wsConnection: websockets.asyncio.client.ClientConnection = (
128
+ websockets.asyncio.client.connect("wss://api.simli.ai/StartWebRTCSession")
129
+ )
130
+ self.wsConnection = await self.wsConnection.__aenter__()
131
+ await self.wsConnection.send(json.dumps(jsonOffer))
132
+ await self.wsConnection.recv() # ACK
133
+ answer = await self.wsConnection.recv() # ANSWER
134
+ await self.wsConnection.send(self.session_token)
135
+ await self.wsConnection.recv() # ACK
136
+ ready = await self.wsConnection.recv() # START MESSAGE
137
+ if ready == "START":
138
+ self.ready = True
139
+ await self.pc.setRemoteDescription(RTCSessionDescription(**json.loads(answer)))
140
+ self.receiverTask = asyncio.create_task(self.handleMessages())
141
+ if latencyInterval > 0:
142
+ self.pingTask = asyncio.create_task(self.ping(latencyInterval))
143
+
144
+ def registerTrack(self, track: MediaStreamTrack):
145
+ print("Registering track", track.kind)
146
+ if track.kind == "audio":
147
+ receiver = AudioFrameReceiver(track)
148
+ self.audioReceiver = receiver
149
+ elif track.kind == "video":
150
+ receiver = VideoFrameReceiver(track)
151
+ self.videoReceiver = receiver
152
+
153
+ async def handleMessages(self):
154
+ """
155
+ Internal: Handles messages from the websocket connection. Called in the Initialize function
156
+ """
157
+ while self.run:
158
+ if not self.ready:
159
+ await asyncio.sleep(0.001)
160
+ continue
161
+ message = await self.wsConnection.recv()
162
+ if message == "STOP":
163
+ self.run = False
164
+ print("Terminating session due to STOP message")
165
+ await self.stop()
166
+ break
167
+
168
+ elif "error" in message:
169
+ print("Error:", message)
170
+ await self.stop()
171
+ break
172
+
173
+ elif "pong" in message:
174
+ pingTime = float(message.split(" ")[1])
175
+ print(f"Ping: {time.time() - pingTime}")
176
+
177
+ elif message != "ACK":
178
+ print(message)
179
+
180
+ async def ping(self, interval: int):
181
+ """
182
+ Internal: Pings the simli servers to measure the latency between the client and the simli servers. Called in the Initialize function
183
+ """
184
+ while self.run:
185
+ pingTime = time.time()
186
+ await self.send(f"ping {pingTime}")
187
+ await asyncio.sleep(interval)
188
+
189
+ async def stop(self):
190
+ """
191
+ Gracefully terminates the connection
192
+ """
193
+ if self.stopping:
194
+ return
195
+ self.stopping = True
196
+ try:
197
+ await self.wsConnection.send("DONE")
198
+ except Exception:
199
+ pass
200
+ try:
201
+ while await asyncio.wait_for(self.getNextAudioFrame(), timeout=0.03):
202
+ continue
203
+
204
+ except asyncio.TimeoutError:
205
+ pass
206
+ try:
207
+ while await asyncio.wait_for(self.getNextVideoFrame(), timeout=0.03):
208
+ continue
209
+ except asyncio.TimeoutError:
210
+ pass
211
+
212
+ try:
213
+ print("Stopping Simli Connection")
214
+ await self.wsConnection.__aexit__(None, None, None)
215
+ self.receiverTask.cancel()
216
+ if self.pingTask:
217
+ self.pingTask.cancel()
218
+ await self.pc.close()
219
+ except Exception:
220
+ import traceback
221
+
222
+ traceback.print_exc()
223
+
224
+ async def send(self, data: str | bytes):
225
+ """
226
+ Sends Audio data or control messages to the simli servers
227
+ """
228
+ if not self.ready:
229
+ raise Exception("WSDC Not ready, please wait until self.ready is True")
230
+ for i in range(0, len(data), 6000):
231
+ await self.wsConnection.send(data[i : i + 6000])
232
+
233
+ async def sendSilence(self, duration: float = 0.1875):
234
+ """
235
+ Sends silence to the simli servers for the specified duration in seconds
236
+ Can be used without args to bootstrap the connection to start receiving silent audio and video frames
237
+ """
238
+ await self.send((0).to_bytes(2, "little") * int(16000 * duration))
239
+
240
+ async def clearBuffer(self):
241
+ """
242
+ Clears the buffered audio on the simli servers, useful for interrupting the current audio spoken by the avatar
243
+ """
244
+ await self.send("SKIP")
245
+
246
+ async def getVideoStreamIterator(self, targetFormat: str = "rgb24"):
247
+ """
248
+ Returns the video output as an async iterator with the specified format (default: rgb24)
249
+
250
+ Refer to https://pyav.org for more information on the available formats
251
+ """
252
+ first = True
253
+ while True:
254
+ try:
255
+ if first:
256
+ frame = await self.videoReceiver.recv()
257
+ else:
258
+ frame = await asyncio.wait_for(
259
+ self.videoReceiver.recv(), timeout=1 / 15
260
+ )
261
+ except asyncio.TimeoutError:
262
+ return
263
+ if targetFormat != "yuva420p":
264
+ frame = frame.reformat(format=targetFormat)
265
+ yield frame
266
+
267
+ async def getAudioStreamIterator(self, targetSampleRate: int = 48000):
268
+ """
269
+ Returns the audio output as an async iterator
270
+ """
271
+ resampler = None
272
+ if targetSampleRate != 48000: # default WebRTC sample rate
273
+ resampler = AudioResampler(
274
+ format="s16", layout="stereo", rate=targetSampleRate
275
+ )
276
+ first = True
277
+ while True:
278
+ try:
279
+ if first:
280
+ frame = await self.audioReceiver.recv()
281
+ else:
282
+ frame = await asyncio.wait_for(
283
+ self.audioReceiver.recv(), timeout=0.04
284
+ )
285
+ except asyncio.TimeoutError:
286
+ return
287
+ if resampler:
288
+ frame = resampler.resample(frame)[0]
289
+ yield frame
290
+
291
+ async def getNextVideoFrame(self):
292
+ """
293
+ Returns the next video frame in the specified format (default: rgb24)
294
+ """
295
+ return await self.videoReceiver.recv()
296
+
297
+ async def getNextAudioFrame(self):
298
+ """
299
+ Returns the next audio frame
300
+ """
301
+ return await self.audioReceiver.recv()
302
+
303
+ async def __aenter__(self):
304
+ await self.Initialize()
305
+ return self
306
+
307
+ async def __aexit__(self, exc_type, exc, tb):
308
+ await self.stop()
309
+
310
+
311
+ async def consumeTrack(
312
+ track: MediaStreamTrack,
313
+ connection: SimliClient,
314
+ ):
315
+ """
316
+ Used for debugging without dumping the output anywhere, just consumes the track and prints the data
317
+ """
318
+ while connection.run:
319
+ print(await track.recv())
File without changes
@@ -0,0 +1,15 @@
1
+ Metadata-Version: 2.1
2
+ Name: simli-ai
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Description-Content-Type: text/markdown
6
+ Requires-Dist: aiortc
7
+ Requires-Dist: websockets
8
+ Requires-Dist: requests
9
+ Provides-Extra: local
10
+ Requires-Dist: opencv-python; extra == "local"
11
+ Requires-Dist: pyaudio; extra == "local"
12
+ Provides-Extra: processing
13
+ Requires-Dist: numpy; extra == "processing"
14
+
15
+ to check if connection works just uncomment the consumeTrack call in registerTrack
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ simli/__init__.py
4
+ simli/renderers.py
5
+ simli/simli.py
6
+ simli/utils.py
7
+ simli_ai.egg-info/PKG-INFO
8
+ simli_ai.egg-info/SOURCES.txt
9
+ simli_ai.egg-info/dependency_links.txt
10
+ simli_ai.egg-info/requires.txt
11
+ simli_ai.egg-info/top_level.txt
@@ -0,0 +1,10 @@
1
+ aiortc
2
+ websockets
3
+ requests
4
+
5
+ [local]
6
+ opencv-python
7
+ pyaudio
8
+
9
+ [processing]
10
+ numpy
@@ -0,0 +1 @@
1
+ simli