livekit-plugins-humanlike 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: livekit-plugins-humanlike
3
+ Version: 0.1.0
4
+ Summary: Humanlike avatar plugin for LiveKit Agents — real-time talking-head video with expression control
5
+ Author: Humanlike AI
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/HumanlikeAI/livekit-plugins-humanlike
8
+ Project-URL: Repository, https://github.com/HumanlikeAI/livekit-plugins-humanlike
9
+ Keywords: livekit,avatar,talking-head,real-time,humanlike
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Multimedia :: Video
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: livekit-agents>=1.0
21
+ Requires-Dist: numpy
22
+ Requires-Dist: websockets
23
+ Requires-Dist: Pillow
24
+ Provides-Extra: fast
25
+ Requires-Dist: turbojpeg; extra == "fast"
26
+ Requires-Dist: soxr; extra == "fast"
27
+
28
+ # livekit-plugins-humanlike
29
+
30
+ Real-time talking-head avatar plugin for [LiveKit Agents](https://docs.livekit.io/agents/).
31
+
32
+ Streams TTS audio to a GPU-backed avatar orchestrator that returns lip-synced video frames with facial expressions guided by a natural-language prompt.
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ pip install livekit-plugins-humanlike
38
+ ```
39
+
40
+ ## Quick start
41
+
42
+ ```python
43
+ from livekit.plugins.humanlike import AvatarSession
44
+
45
+ avatar = AvatarSession(
46
+ orchestrator_url="ws://your-gpu-server:8000/ws/stream",
47
+ image="/path/to/face.png",
48
+ avatar_model="humanlike-homo",
49
+ prompt="warm, friendly, subtly smiling, occasional nods",
50
+ )
51
+
52
+ # Wire into your LiveKit agent
53
+ await avatar.start(agent_session, room=ctx.room)
54
+ ```
55
+
56
+ ## Parameters
57
+
58
+ | Parameter | Default | Description |
59
+ |-----------|---------|-------------|
60
+ | `orchestrator_url` | `ws://localhost:8000/ws/stream` | Avatar server WebSocket URL |
61
+ | `image` | `./face.png` | Face image (file path or raw bytes) |
62
+ | `avatar_model` | `humanlike-homo` | Model identifier |
63
+ | `prompt` | `""` | Expression prompt (e.g. "warm, smiling, nods") |
64
+ | `seed` | `42` | Random seed for reproducibility |
65
+ | `video_width` | `512` | Output video width |
66
+ | `video_height` | `512` | Output video height |
67
+ | `tts_sample_rate` | `16000` | Must match your TTS output rate |
68
+
69
+ ## Live expression updates
70
+
71
+ Update the avatar's expression mid-conversation:
72
+
73
+ ```python
74
+ await avatar.set_prompt("excited, wide eyes, big smile")
75
+ ```
76
+
77
+ ## Full agent example
78
+
79
+ ```python
80
+ from livekit.agents import Agent, AgentSession, JobContext, RoomOutputOptions, WorkerOptions, cli
81
+ from livekit.plugins import cartesia, openai, silero
82
+ from livekit.plugins.humanlike import AvatarSession
83
+
84
+ async def entrypoint(ctx: JobContext):
85
+ await ctx.connect()
86
+
87
+ session = AgentSession(
88
+ stt=openai.STT(),
89
+ llm=openai.LLM(model="gpt-4o"),
90
+ tts=cartesia.TTS(model="sonic-3", sample_rate=16000),
91
+ vad=silero.VAD.load(),
92
+ )
93
+
94
+ avatar = AvatarSession(
95
+ orchestrator_url="ws://localhost:8000/ws/stream",
96
+ image="./face.png",
97
+ avatar_model="humanlike-homo",
98
+ prompt="warm, friendly, natural eye movement",
99
+ )
100
+ await avatar.start(session, room=ctx.room)
101
+
102
+ await session.start(
103
+ room=ctx.room,
104
+ agent=Agent(instructions="You are a helpful assistant."),
105
+ room_output_options=RoomOutputOptions(audio_enabled=False),
106
+ )
107
+
108
+ if __name__ == "__main__":
109
+ cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
110
+ ```
@@ -0,0 +1,83 @@
1
+ # livekit-plugins-humanlike
2
+
3
+ Real-time talking-head avatar plugin for [LiveKit Agents](https://docs.livekit.io/agents/).
4
+
5
+ Streams TTS audio to a GPU-backed avatar orchestrator that returns lip-synced video frames with facial expressions guided by a natural-language prompt.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ pip install livekit-plugins-humanlike
11
+ ```
12
+
13
+ ## Quick start
14
+
15
+ ```python
16
+ from livekit.plugins.humanlike import AvatarSession
17
+
18
+ avatar = AvatarSession(
19
+ orchestrator_url="ws://your-gpu-server:8000/ws/stream",
20
+ image="/path/to/face.png",
21
+ avatar_model="humanlike-homo",
22
+ prompt="warm, friendly, subtly smiling, occasional nods",
23
+ )
24
+
25
+ # Wire into your LiveKit agent
26
+ await avatar.start(agent_session, room=ctx.room)
27
+ ```
28
+
29
+ ## Parameters
30
+
31
+ | Parameter | Default | Description |
32
+ |-----------|---------|-------------|
33
+ | `orchestrator_url` | `ws://localhost:8000/ws/stream` | Avatar server WebSocket URL |
34
+ | `image` | `./face.png` | Face image (file path or raw bytes) |
35
+ | `avatar_model` | `humanlike-homo` | Model identifier |
36
+ | `prompt` | `""` | Expression prompt (e.g. "warm, smiling, nods") |
37
+ | `seed` | `42` | Random seed for reproducibility |
38
+ | `video_width` | `512` | Output video width |
39
+ | `video_height` | `512` | Output video height |
40
+ | `tts_sample_rate` | `16000` | Must match your TTS output rate |
41
+
42
+ ## Live expression updates
43
+
44
+ Update the avatar's expression mid-conversation:
45
+
46
+ ```python
47
+ await avatar.set_prompt("excited, wide eyes, big smile")
48
+ ```
49
+
50
+ ## Full agent example
51
+
52
+ ```python
53
+ from livekit.agents import Agent, AgentSession, JobContext, RoomOutputOptions, WorkerOptions, cli
54
+ from livekit.plugins import cartesia, openai, silero
55
+ from livekit.plugins.humanlike import AvatarSession
56
+
57
+ async def entrypoint(ctx: JobContext):
58
+ await ctx.connect()
59
+
60
+ session = AgentSession(
61
+ stt=openai.STT(),
62
+ llm=openai.LLM(model="gpt-4o"),
63
+ tts=cartesia.TTS(model="sonic-3", sample_rate=16000),
64
+ vad=silero.VAD.load(),
65
+ )
66
+
67
+ avatar = AvatarSession(
68
+ orchestrator_url="ws://localhost:8000/ws/stream",
69
+ image="./face.png",
70
+ avatar_model="humanlike-homo",
71
+ prompt="warm, friendly, natural eye movement",
72
+ )
73
+ await avatar.start(session, room=ctx.room)
74
+
75
+ await session.start(
76
+ room=ctx.room,
77
+ agent=Agent(instructions="You are a helpful assistant."),
78
+ room_output_options=RoomOutputOptions(audio_enabled=False),
79
+ )
80
+
81
+ if __name__ == "__main__":
82
+ cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
83
+ ```
File without changes
@@ -0,0 +1,22 @@
1
+ """
2
+ LiveKit Agents plugin — Humanlike Avatar.
3
+
4
+ Provides a real-time talking-head avatar with expression-aware generation.
5
+ Streams TTS audio to a GPU-backed avatar orchestrator that returns lip-synced
6
+ video frames with facial expressions guided by a natural-language prompt.
7
+
8
+ Usage:
9
+ from livekit.plugins.humanlike import AvatarSession
10
+
11
+ avatar = AvatarSession(
12
+ orchestrator_url="ws://localhost:8000/ws/stream",
13
+ image="/path/to/face.png", # or raw bytes
14
+ avatar_model="humanlike-homo",
15
+ prompt="warm, friendly, subtly smiling, occasional nods",
16
+ )
17
+ await avatar.start(agent_session, room=ctx.room)
18
+ """
19
+
20
+ from .avatar import AvatarSession, VideoGenerator
21
+
22
+ __all__ = ["AvatarSession", "VideoGenerator"]
@@ -0,0 +1,399 @@
1
+ """
2
+ Humanlike avatar — LiveKit Agents VideoGenerator + AvatarSession.
3
+
4
+ Connects to the avatar orchestrator WebSocket and streams audio in / video
5
+ frames out, with an expression prompt that guides facial behaviour.
6
+
7
+ Orchestrator WS protocol (extended):
8
+ 1. Client sends JSON config:
9
+ {
10
+ "model": "humanlike-homo",
11
+ "image": "<base64>",
12
+ "seed": 42,
13
+ "prompt": "warm, friendly, subtly smiling"
14
+ }
15
+ 2. Server responds: {"status": "ready", "fps": 25, "chunk_samples": N, ...}
16
+ 3. Client streams binary PCM int16 LE 16 kHz mono chunks
17
+ 4. Server streams binary: [4-byte frame_idx LE uint32][JPEG bytes]
18
+ 5. Client sends empty bytes to end the stream
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import asyncio
24
+ import base64
25
+ import json
26
+ import logging
27
+ from pathlib import Path
28
+
29
+ import numpy as np
30
+ import websockets
31
+
32
+ from livekit import rtc
33
+ from livekit.agents.voice import avatar as av
34
+
35
+ logger = logging.getLogger("humanlike-avatar")
36
+
37
+ PLUGIN_NAME = "humanlike"
38
+ AVATAR_IDENTITY = "humanlike-avatar"
39
+ TARGET_SR = 16000
40
+
41
+
42
+ def _resample(samples: np.ndarray, src_sr: int, dst_sr: int) -> np.ndarray:
43
+ if src_sr == dst_sr:
44
+ return samples
45
+ try:
46
+ import soxr
47
+ return soxr.resample(samples, src_sr, dst_sr).astype(np.float32)
48
+ except ImportError:
49
+ # Fallback: linear interpolation (lower quality but no extra dep)
50
+ ratio = dst_sr / src_sr
51
+ n_out = int(len(samples) * ratio)
52
+ indices = np.linspace(0, len(samples) - 1, n_out)
53
+ return np.interp(indices, np.arange(len(samples)), samples).astype(np.float32)
54
+
55
+
56
+ def _jpeg_to_video_frame(jpeg_bytes: bytes) -> rtc.VideoFrame:
57
+ """Decode JPEG to RGBA VideoFrame."""
58
+ try:
59
+ from turbojpeg import TurboJPEG, TJPF_RGBA
60
+ _tj = TurboJPEG()
61
+ rgba = _tj.decode(jpeg_bytes, pixel_format=TJPF_RGBA)
62
+ except ImportError:
63
+ from PIL import Image
64
+ import io
65
+ img = Image.open(io.BytesIO(jpeg_bytes)).convert("RGBA")
66
+ rgba = np.array(img)
67
+
68
+ h, w = rgba.shape[:2]
69
+ return rtc.VideoFrame(
70
+ width=w,
71
+ height=h,
72
+ type=rtc.VideoBufferType.RGBA,
73
+ data=bytes(rgba),
74
+ )
75
+
76
+
77
+ class VideoGenerator(av.VideoGenerator):
78
+ """
79
+ LiveKit VideoGenerator that streams audio to the Humanlike avatar
80
+ orchestrator and yields video frames.
81
+
82
+ Parameters
83
+ ----------
84
+ ws_url : str
85
+ WebSocket URL of the avatar orchestrator.
86
+ image : bytes
87
+ Reference face image (PNG/JPEG bytes).
88
+ avatar_model : str
89
+ Avatar model identifier, e.g. "humanlike-homo".
90
+ prompt : str
91
+ Natural-language expression prompt that guides facial behaviour
92
+ during generation, e.g. "expressive, warm smile, occasional nods".
93
+ seed : int
94
+ Random seed for reproducible generation.
95
+ """
96
+
97
+ def __init__(
98
+ self,
99
+ ws_url: str,
100
+ image: bytes,
101
+ avatar_model: str = "humanlike-homo",
102
+ prompt: str = "",
103
+ seed: int = 42,
104
+ ) -> None:
105
+ self._ws_url = ws_url
106
+ self._image_b64 = base64.b64encode(image).decode()
107
+ self._image_bytes = image
108
+ self._avatar_model = avatar_model
109
+ self._prompt = prompt
110
+ self._seed = seed
111
+
112
+ self._ws: websockets.WebSocketClientProtocol | None = None
113
+ self._fps: float = 25.0
114
+ self._chunk_samples: int = 0
115
+
116
+ self._audio_buf = np.array([], dtype=np.float32)
117
+ self._out_q: asyncio.Queue = asyncio.Queue()
118
+ self._recv_task: asyncio.Task | None = None
119
+ self._idle_task: asyncio.Task | None = None
120
+ self._idle_frame: rtc.VideoFrame | None = None
121
+ self._generating = False
122
+
123
+ # ------------------------------------------------------------------
124
+ # Setup
125
+ # ------------------------------------------------------------------
126
+
127
+ async def connect(self) -> None:
128
+ """Open WS, perform handshake, start background recv loop."""
129
+ self._ws = await websockets.connect(self._ws_url, max_size=10 * 1024 * 1024)
130
+
131
+ config = {
132
+ "model": self._avatar_model,
133
+ "image": self._image_b64,
134
+ "seed": self._seed,
135
+ "prompt": self._prompt,
136
+ }
137
+ await self._ws.send(json.dumps(config))
138
+ logger.info(
139
+ "Sent config: model=%s prompt=%r seed=%d",
140
+ self._avatar_model, self._prompt[:60], self._seed,
141
+ )
142
+
143
+ # Wait for ready
144
+ while True:
145
+ raw = await self._ws.recv()
146
+ if isinstance(raw, str):
147
+ msg = json.loads(raw)
148
+ if msg.get("status") == "ready":
149
+ self._fps = float(msg.get("fps", 25))
150
+ self._chunk_samples = int(msg.get("chunk_samples", 0))
151
+ logger.info(
152
+ "Orchestrator ready: model=%s fps=%.0f chunk_samples=%d",
153
+ self._avatar_model, self._fps, self._chunk_samples,
154
+ )
155
+ break
156
+ if "error" in msg:
157
+ raise RuntimeError(f"Orchestrator error: {msg['error']}")
158
+
159
+ self._recv_task = asyncio.ensure_future(self._recv_loop())
160
+
161
+ # Build idle frame from reference image
162
+ self._idle_frame = _jpeg_to_video_frame(self._image_bytes)
163
+ self._idle_task = asyncio.ensure_future(self._idle_loop())
164
+
165
+ async def _idle_loop(self) -> None:
166
+ """Push the reference image at fps until the first real chunk arrives."""
167
+ interval = 1.0 / self._fps
168
+ while not self._generating and self._idle_frame is not None:
169
+ await self._out_q.put(self._idle_frame)
170
+ await asyncio.sleep(interval)
171
+
172
+ async def _recv_loop(self) -> None:
173
+ """Background: read JPEG frames from WS, enqueue as VideoFrames."""
174
+ try:
175
+ assert self._ws is not None
176
+ async for message in self._ws:
177
+ if not isinstance(message, bytes) or len(message) < 4:
178
+ continue
179
+ jpeg_data = message[4:] # strip 4-byte frame_idx header
180
+ try:
181
+ frame = _jpeg_to_video_frame(jpeg_data)
182
+ await self._out_q.put(frame)
183
+ except Exception as exc:
184
+ logger.warning("Failed to decode frame: %s", exc)
185
+ except websockets.ConnectionClosed:
186
+ logger.info("Orchestrator WS closed")
187
+ except Exception as exc:
188
+ logger.exception("Recv loop error: %s", exc)
189
+
190
+ # ------------------------------------------------------------------
191
+ # VideoGenerator protocol
192
+ # ------------------------------------------------------------------
193
+
194
+ async def push_audio(self, frame: rtc.AudioFrame | av.AudioSegmentEnd) -> None:
195
+ if isinstance(frame, av.AudioSegmentEnd):
196
+ await self._flush_audio()
197
+ await self._out_q.put(av.AudioSegmentEnd())
198
+ return
199
+
200
+ pcm_f32 = np.frombuffer(frame.data, dtype=np.int16).astype(np.float32) / 32768.0
201
+ if frame.num_channels > 1:
202
+ pcm_f32 = pcm_f32.reshape(-1, frame.num_channels).mean(axis=1)
203
+
204
+ pcm_f32 = _resample(pcm_f32, frame.sample_rate, TARGET_SR)
205
+ self._audio_buf = np.concatenate([self._audio_buf, pcm_f32])
206
+
207
+ while self._chunk_samples > 0 and len(self._audio_buf) >= self._chunk_samples:
208
+ chunk = self._audio_buf[: self._chunk_samples]
209
+ self._audio_buf = self._audio_buf[self._chunk_samples :]
210
+ await self._send_chunk(chunk)
211
+
212
+ def clear_buffer(self) -> None:
213
+ self._audio_buf = np.array([], dtype=np.float32)
214
+ while not self._out_q.empty():
215
+ try:
216
+ self._out_q.get_nowait()
217
+ except asyncio.QueueEmpty:
218
+ break
219
+
220
+ def __aiter__(self):
221
+ return self._output_stream()
222
+
223
+ async def _output_stream(self):
224
+ while True:
225
+ item = await self._out_q.get()
226
+ yield item
227
+
228
+ # ------------------------------------------------------------------
229
+ # Prompt update (live)
230
+ # ------------------------------------------------------------------
231
+
232
+ async def set_prompt(self, prompt: str) -> None:
233
+ """Update the expression prompt mid-session (if orchestrator supports it)."""
234
+ self._prompt = prompt
235
+ if self._ws is not None:
236
+ try:
237
+ await self._ws.send(json.dumps({"update_prompt": prompt}))
238
+ logger.info("Expression prompt updated: %r", prompt[:60])
239
+ except Exception as exc:
240
+ logger.warning("Failed to send prompt update: %s", exc)
241
+
242
+ # ------------------------------------------------------------------
243
+ # Helpers
244
+ # ------------------------------------------------------------------
245
+
246
+ async def _flush_audio(self) -> None:
247
+ if self._chunk_samples > 0 and len(self._audio_buf) > 0:
248
+ chunk = self._audio_buf
249
+ self._audio_buf = np.array([], dtype=np.float32)
250
+ if len(chunk) < self._chunk_samples:
251
+ chunk = np.pad(chunk, (0, self._chunk_samples - len(chunk)))
252
+ await self._send_chunk(chunk)
253
+ if self._ws is not None:
254
+ try:
255
+ await self._ws.send(b"")
256
+ except Exception:
257
+ pass
258
+
259
+ async def _send_chunk(self, chunk: np.ndarray) -> None:
260
+ if self._ws is None:
261
+ return
262
+ if not self._generating:
263
+ self._generating = True
264
+ if self._idle_task is not None:
265
+ self._idle_task.cancel()
266
+ self._idle_task = None
267
+ pcm_int16 = (np.clip(chunk, -1.0, 1.0) * 32767).astype(np.int16)
268
+ await self._ws.send(pcm_int16.tobytes())
269
+
270
+ async def aclose(self) -> None:
271
+ if self._idle_task is not None:
272
+ self._idle_task.cancel()
273
+ if self._recv_task is not None:
274
+ self._recv_task.cancel()
275
+ if self._ws is not None:
276
+ await self._ws.close()
277
+
278
+ @property
279
+ def fps(self) -> float:
280
+ return self._fps
281
+
282
+
283
+ class AvatarSession:
284
+ """
285
+ High-level Humanlike avatar session for LiveKit Agents.
286
+
287
+ Wires up the VideoGenerator with LiveKit's AvatarRunner, DataStream
288
+ audio routing, and video publishing.
289
+
290
+ Parameters
291
+ ----------
292
+ orchestrator_url : str
293
+ WebSocket URL, e.g. "ws://localhost:8000/ws/stream".
294
+ image : str | bytes
295
+ Path to a face image, or raw image bytes.
296
+ avatar_model : str
297
+ Model identifier. Default "humanlike-homo".
298
+ prompt : str
299
+ Expression guidance prompt, e.g. "warm, friendly, subtly smiling".
300
+ Can be updated live via set_prompt().
301
+ seed : int
302
+ Random seed.
303
+ video_width / video_height : int
304
+ Output dimensions (should match orchestrator output, default 512).
305
+ avatar_identity : str
306
+ LiveKit participant identity for the avatar.
307
+ tts_sample_rate : int
308
+ TTS output sample rate (default 16000 to match pipeline).
309
+ """
310
+
311
+ def __init__(
312
+ self,
313
+ orchestrator_url: str = "ws://localhost:8000/ws/stream",
314
+ image: str | bytes = "./face.png",
315
+ avatar_model: str = "humanlike-homo",
316
+ prompt: str = "",
317
+ seed: int = 42,
318
+ video_width: int = 512,
319
+ video_height: int = 512,
320
+ avatar_identity: str = AVATAR_IDENTITY,
321
+ tts_sample_rate: int = TARGET_SR,
322
+ ) -> None:
323
+ self._orchestrator_url = orchestrator_url
324
+ self._image = image
325
+ self._avatar_model = avatar_model
326
+ self._prompt = prompt
327
+ self._seed = seed
328
+ self._video_width = video_width
329
+ self._video_height = video_height
330
+ self._avatar_identity = avatar_identity
331
+ self._tts_sample_rate = tts_sample_rate
332
+ self._runner: av.AvatarRunner | None = None
333
+ self._gen: VideoGenerator | None = None
334
+
335
+ def _load_image(self) -> bytes:
336
+ if isinstance(self._image, bytes):
337
+ return self._image
338
+ return Path(self._image).read_bytes()
339
+
340
+ async def start(
341
+ self,
342
+ agent_session,
343
+ room: rtc.Room,
344
+ ) -> None:
345
+ """
346
+ Connect to the orchestrator and wire up the LiveKit room.
347
+ Must be called before agent_session.start().
348
+ """
349
+ image_bytes = self._load_image()
350
+
351
+ gen = VideoGenerator(
352
+ ws_url=self._orchestrator_url,
353
+ image=image_bytes,
354
+ avatar_model=self._avatar_model,
355
+ prompt=self._prompt,
356
+ seed=self._seed,
357
+ )
358
+ await gen.connect()
359
+ self._gen = gen
360
+
361
+ audio_recv = av.DataStreamAudioReceiver(room)
362
+
363
+ agent_session.output.audio = av.DataStreamAudioOutput(
364
+ room=room,
365
+ destination_identity=self._avatar_identity,
366
+ sample_rate=self._tts_sample_rate,
367
+ )
368
+
369
+ opts = av.AvatarOptions(
370
+ video_width=self._video_width,
371
+ video_height=self._video_height,
372
+ video_fps=gen.fps,
373
+ audio_sample_rate=self._tts_sample_rate,
374
+ audio_channels=1,
375
+ )
376
+
377
+ self._runner = av.AvatarRunner(
378
+ room=room,
379
+ audio_recv=audio_recv,
380
+ video_gen=gen,
381
+ avatar_opts=opts,
382
+ )
383
+ await self._runner.start()
384
+ logger.info(
385
+ "Humanlike avatar started: model=%s prompt=%r identity=%s",
386
+ self._avatar_model, self._prompt[:60], self._avatar_identity,
387
+ )
388
+
389
+ async def set_prompt(self, prompt: str) -> None:
390
+ """Update the expression prompt on a live session."""
391
+ self._prompt = prompt
392
+ if self._gen is not None:
393
+ await self._gen.set_prompt(prompt)
394
+
395
+ async def aclose(self) -> None:
396
+ if self._runner is not None:
397
+ await self._runner.aclose()
398
+ if self._gen is not None:
399
+ await self._gen.aclose()
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: livekit-plugins-humanlike
3
+ Version: 0.1.0
4
+ Summary: Humanlike avatar plugin for LiveKit Agents — real-time talking-head video with expression control
5
+ Author: Humanlike AI
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/HumanlikeAI/livekit-plugins-humanlike
8
+ Project-URL: Repository, https://github.com/HumanlikeAI/livekit-plugins-humanlike
9
+ Keywords: livekit,avatar,talking-head,real-time,humanlike
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Multimedia :: Video
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: livekit-agents>=1.0
21
+ Requires-Dist: numpy
22
+ Requires-Dist: websockets
23
+ Requires-Dist: Pillow
24
+ Provides-Extra: fast
25
+ Requires-Dist: turbojpeg; extra == "fast"
26
+ Requires-Dist: soxr; extra == "fast"
27
+
28
+ # livekit-plugins-humanlike
29
+
30
+ Real-time talking-head avatar plugin for [LiveKit Agents](https://docs.livekit.io/agents/).
31
+
32
+ Streams TTS audio to a GPU-backed avatar orchestrator that returns lip-synced video frames with facial expressions guided by a natural-language prompt.
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ pip install livekit-plugins-humanlike
38
+ ```
39
+
40
+ ## Quick start
41
+
42
+ ```python
43
+ from livekit.plugins.humanlike import AvatarSession
44
+
45
+ avatar = AvatarSession(
46
+ orchestrator_url="ws://your-gpu-server:8000/ws/stream",
47
+ image="/path/to/face.png",
48
+ avatar_model="humanlike-homo",
49
+ prompt="warm, friendly, subtly smiling, occasional nods",
50
+ )
51
+
52
+ # Wire into your LiveKit agent
53
+ await avatar.start(agent_session, room=ctx.room)
54
+ ```
55
+
56
+ ## Parameters
57
+
58
+ | Parameter | Default | Description |
59
+ |-----------|---------|-------------|
60
+ | `orchestrator_url` | `ws://localhost:8000/ws/stream` | Avatar server WebSocket URL |
61
+ | `image` | `./face.png` | Face image (file path or raw bytes) |
62
+ | `avatar_model` | `humanlike-homo` | Model identifier |
63
+ | `prompt` | `""` | Expression prompt (e.g. "warm, smiling, nods") |
64
+ | `seed` | `42` | Random seed for reproducibility |
65
+ | `video_width` | `512` | Output video width |
66
+ | `video_height` | `512` | Output video height |
67
+ | `tts_sample_rate` | `16000` | Must match your TTS output rate |
68
+
69
+ ## Live expression updates
70
+
71
+ Update the avatar's expression mid-conversation:
72
+
73
+ ```python
74
+ await avatar.set_prompt("excited, wide eyes, big smile")
75
+ ```
76
+
77
+ ## Full agent example
78
+
79
+ ```python
80
+ from livekit.agents import Agent, AgentSession, JobContext, RoomOutputOptions, WorkerOptions, cli
81
+ from livekit.plugins import cartesia, openai, silero
82
+ from livekit.plugins.humanlike import AvatarSession
83
+
84
+ async def entrypoint(ctx: JobContext):
85
+ await ctx.connect()
86
+
87
+ session = AgentSession(
88
+ stt=openai.STT(),
89
+ llm=openai.LLM(model="gpt-4o"),
90
+ tts=cartesia.TTS(model="sonic-3", sample_rate=16000),
91
+ vad=silero.VAD.load(),
92
+ )
93
+
94
+ avatar = AvatarSession(
95
+ orchestrator_url="ws://localhost:8000/ws/stream",
96
+ image="./face.png",
97
+ avatar_model="humanlike-homo",
98
+ prompt="warm, friendly, natural eye movement",
99
+ )
100
+ await avatar.start(session, room=ctx.room)
101
+
102
+ await session.start(
103
+ room=ctx.room,
104
+ agent=Agent(instructions="You are a helpful assistant."),
105
+ room_output_options=RoomOutputOptions(audio_enabled=False),
106
+ )
107
+
108
+ if __name__ == "__main__":
109
+ cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
110
+ ```
@@ -0,0 +1,11 @@
1
+ README.md
2
+ pyproject.toml
3
+ livekit/__init__.py
4
+ livekit/plugins/__init__.py
5
+ livekit/plugins/humanlike/__init__.py
6
+ livekit/plugins/humanlike/avatar.py
7
+ livekit_plugins_humanlike.egg-info/PKG-INFO
8
+ livekit_plugins_humanlike.egg-info/SOURCES.txt
9
+ livekit_plugins_humanlike.egg-info/dependency_links.txt
10
+ livekit_plugins_humanlike.egg-info/requires.txt
11
+ livekit_plugins_humanlike.egg-info/top_level.txt
@@ -0,0 +1,8 @@
1
+ livekit-agents>=1.0
2
+ numpy
3
+ websockets
4
+ Pillow
5
+
6
+ [fast]
7
+ turbojpeg
8
+ soxr
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "livekit-plugins-humanlike"
7
+ version = "0.1.0"
8
+ description = "Humanlike avatar plugin for LiveKit Agents — real-time talking-head video with expression control"
9
+ readme = "README.md"
10
+ license = "Apache-2.0"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Humanlike AI" },
14
+ ]
15
+ keywords = ["livekit", "avatar", "talking-head", "real-time", "humanlike"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "Topic :: Multimedia :: Video",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ ]
26
+ dependencies = [
27
+ "livekit-agents>=1.0",
28
+ "numpy",
29
+ "websockets",
30
+ "Pillow",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ fast = ["turbojpeg", "soxr"]
35
+
36
+ [project.urls]
37
+ Homepage = "https://github.com/HumanlikeAI/livekit-plugins-humanlike"
38
+ Repository = "https://github.com/HumanlikeAI/livekit-plugins-humanlike"
39
+
40
+ [tool.setuptools.packages.find]
41
+ include = ["livekit*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+