bithuman 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bithuman/__init__.py +13 -0
- bithuman/_version.py +1 -0
- bithuman/api.py +164 -0
- bithuman/audio/__init__.py +19 -0
- bithuman/audio/audio.py +396 -0
- bithuman/audio/hparams.py +108 -0
- bithuman/audio/utils.py +255 -0
- bithuman/config.py +88 -0
- bithuman/engine/__init__.py +15 -0
- bithuman/engine/auth.py +335 -0
- bithuman/engine/compression.py +257 -0
- bithuman/engine/enums.py +16 -0
- bithuman/engine/image_ops.py +192 -0
- bithuman/engine/inference.py +108 -0
- bithuman/engine/knn.py +58 -0
- bithuman/engine/video_data.py +391 -0
- bithuman/engine/video_reader.py +168 -0
- bithuman/lib/__init__.py +1 -0
- bithuman/lib/audio_encoder.onnx +45631 -28
- bithuman/lib/generator.py +763 -0
- bithuman/lib/pth2h5.py +106 -0
- bithuman/plugins/__init__.py +0 -0
- bithuman/plugins/stt.py +185 -0
- bithuman/runtime.py +1004 -0
- bithuman/runtime_async.py +469 -0
- bithuman/service/__init__.py +9 -0
- bithuman/service/client.py +788 -0
- bithuman/service/messages.py +210 -0
- bithuman/service/server.py +759 -0
- bithuman/utils/__init__.py +43 -0
- bithuman/utils/agent.py +359 -0
- bithuman/utils/fps_controller.py +90 -0
- bithuman/utils/image.py +41 -0
- bithuman/utils/unzip.py +38 -0
- bithuman/video_graph/__init__.py +16 -0
- bithuman/video_graph/action_trigger.py +83 -0
- bithuman/video_graph/driver_video.py +482 -0
- bithuman/video_graph/navigator.py +736 -0
- bithuman/video_graph/trigger.py +90 -0
- bithuman/video_graph/video_script.py +344 -0
- bithuman-1.0.2.dist-info/METADATA +37 -0
- bithuman-1.0.2.dist-info/RECORD +44 -0
- bithuman-1.0.2.dist-info/WHEEL +5 -0
- bithuman-1.0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Message definitions for bithuman runtime service."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import asdict, dataclass, field
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from functools import cached_property
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from bithuman.api import AudioChunk, VideoControl
|
|
12
|
+
from bithuman.utils.image import decode_image, encode_image
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CommandType(str, Enum):
|
|
16
|
+
"""Types of commands that can be sent to the server."""
|
|
17
|
+
|
|
18
|
+
INIT = "init"
|
|
19
|
+
AUDIO = "audio"
|
|
20
|
+
HEARTBEAT = "heartbeat"
|
|
21
|
+
INTERRUPT = "interrupt"
|
|
22
|
+
CHECK_INIT_STATUS = "check_init_status" # Add new command type
|
|
23
|
+
GET_SETTING = "get_setting"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ResponseStatus(str, Enum):
|
|
27
|
+
"""Possible response statuses."""
|
|
28
|
+
|
|
29
|
+
SUCCESS = "success"
|
|
30
|
+
ERROR = "error"
|
|
31
|
+
LOADING = "loading" # Add new status for async initialization
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(kw_only=True)
|
|
35
|
+
class BaseRequest:
|
|
36
|
+
"""Base class for all requests."""
|
|
37
|
+
|
|
38
|
+
client_id: str
|
|
39
|
+
command: CommandType
|
|
40
|
+
|
|
41
|
+
def to_dict(self) -> dict:
|
|
42
|
+
"""Convert request to dictionary format."""
|
|
43
|
+
return asdict(self)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass(kw_only=True)
|
|
47
|
+
class InitRequest(BaseRequest):
|
|
48
|
+
"""Request to initialize a client workspace."""
|
|
49
|
+
|
|
50
|
+
avatar_model_path: str
|
|
51
|
+
video_file: Optional[str] = None
|
|
52
|
+
inference_data_file: Optional[str] = None
|
|
53
|
+
command: CommandType = CommandType.INIT
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(kw_only=True)
|
|
57
|
+
class AudioRequest(BaseRequest):
|
|
58
|
+
"""Request to process audio data."""
|
|
59
|
+
|
|
60
|
+
data: VideoControl
|
|
61
|
+
command: CommandType = CommandType.AUDIO
|
|
62
|
+
|
|
63
|
+
def __post_init__(self) -> None:
|
|
64
|
+
"""Post initialization."""
|
|
65
|
+
if isinstance(self.data, dict):
|
|
66
|
+
self.data = VideoControl(**self.data)
|
|
67
|
+
|
|
68
|
+
def to_dict(self) -> dict:
|
|
69
|
+
"""Convert request to dictionary format."""
|
|
70
|
+
request_dict = asdict(self)
|
|
71
|
+
# Use numpy's more efficient serialization
|
|
72
|
+
if self.data.audio is not None:
|
|
73
|
+
audio_dict = asdict(self.data.audio)
|
|
74
|
+
del audio_dict["data"]
|
|
75
|
+
audio_dict["audio_bytes"] = self.data.audio.bytes
|
|
76
|
+
request_dict["data"]["audio"] = audio_dict
|
|
77
|
+
return request_dict
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def from_dict(cls, msg: dict) -> "AudioRequest":
|
|
81
|
+
"""Create an AudioRequest from a dictionary."""
|
|
82
|
+
request = cls(**msg)
|
|
83
|
+
if request.data.audio is not None:
|
|
84
|
+
request.data.audio = AudioChunk.from_bytes(**request.data.audio)
|
|
85
|
+
|
|
86
|
+
return request
|
|
87
|
+
|
|
88
|
+
def __repr__(self) -> str:
|
|
89
|
+
"""String representation of the AudioRequest."""
|
|
90
|
+
data_dict = self.to_dict()["data"]
|
|
91
|
+
data_dict.pop("audio_fp32")
|
|
92
|
+
data_dict["audio_duration"] = (
|
|
93
|
+
(len(self.data.audio_fp32) / self.data.audio_sample_rate)
|
|
94
|
+
if self.data.audio_fp32 is not None
|
|
95
|
+
else None
|
|
96
|
+
)
|
|
97
|
+
return f"AudioRequest(data={data_dict})"
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def audio_bytes(self) -> Optional[bytes]:
|
|
101
|
+
"""Get the audio data as bytes."""
|
|
102
|
+
if self.data.audio_fp32 is None:
|
|
103
|
+
return None
|
|
104
|
+
return self.data.audio_fp32.tobytes()
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@dataclass(kw_only=True)
|
|
108
|
+
class HeartbeatRequest(BaseRequest):
|
|
109
|
+
"""Heartbeat request to keep connection alive."""
|
|
110
|
+
|
|
111
|
+
command: CommandType = CommandType.HEARTBEAT
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass(kw_only=True)
|
|
115
|
+
class InterruptRequest(BaseRequest):
|
|
116
|
+
"""Request to interrupt current audio processing."""
|
|
117
|
+
|
|
118
|
+
command: CommandType = CommandType.INTERRUPT
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dataclass(kw_only=True)
|
|
122
|
+
class CheckInitStatusRequest(BaseRequest):
|
|
123
|
+
"""Request to check initialization status."""
|
|
124
|
+
|
|
125
|
+
command: CommandType = CommandType.CHECK_INIT_STATUS
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@dataclass(kw_only=True)
|
|
129
|
+
class GetSettingRequest(BaseRequest):
|
|
130
|
+
"""Request to get the current settings."""
|
|
131
|
+
|
|
132
|
+
command: CommandType = CommandType.GET_SETTING
|
|
133
|
+
name: str
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class ServerResponse:
|
|
138
|
+
"""Generic server response."""
|
|
139
|
+
|
|
140
|
+
status: ResponseStatus
|
|
141
|
+
message: Optional[str] = None
|
|
142
|
+
extra: Optional[dict] = None
|
|
143
|
+
|
|
144
|
+
@classmethod
|
|
145
|
+
def from_dict(cls, response_dict: dict) -> "ServerResponse":
|
|
146
|
+
"""Create a ServerResponse from a dictionary."""
|
|
147
|
+
return ServerResponse(
|
|
148
|
+
status=ResponseStatus(response_dict["status"]),
|
|
149
|
+
message=response_dict.get("message"),
|
|
150
|
+
extra=response_dict.get("extra"),
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
def to_dict(self) -> dict:
|
|
154
|
+
"""Convert response to dictionary format."""
|
|
155
|
+
return asdict(self)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass
|
|
159
|
+
class FrameMessage:
|
|
160
|
+
"""Frame data sent from server to client."""
|
|
161
|
+
|
|
162
|
+
client_id: str
|
|
163
|
+
frame_data: bytes # JPEG encoded image data
|
|
164
|
+
frame_index: Optional[int]
|
|
165
|
+
source_message_id: str
|
|
166
|
+
end_of_speech: bool # mark the end of the speech
|
|
167
|
+
audio_bytes: Optional[bytes] = None # Audio chunk data
|
|
168
|
+
sample_rate: Optional[int] = None # Audio sample rate
|
|
169
|
+
metadata: dict = field(default_factory=dict) # For additional frame info
|
|
170
|
+
|
|
171
|
+
def to_dict(self) -> dict:
|
|
172
|
+
"""Convert response to dictionary format."""
|
|
173
|
+
return asdict(self)
|
|
174
|
+
|
|
175
|
+
@classmethod
|
|
176
|
+
def create(
|
|
177
|
+
cls,
|
|
178
|
+
client_id: str,
|
|
179
|
+
frame_image: np.ndarray,
|
|
180
|
+
frame_index: Optional[int],
|
|
181
|
+
end_of_speech: bool,
|
|
182
|
+
audio_bytes: Optional[bytes] = None,
|
|
183
|
+
sample_rate: Optional[int] = None,
|
|
184
|
+
source_message_id: Optional[str] = None,
|
|
185
|
+
**kwargs: dict[str, Any],
|
|
186
|
+
) -> "FrameMessage":
|
|
187
|
+
"""Create a frame message from frame data."""
|
|
188
|
+
if frame_image is not None:
|
|
189
|
+
frame_image = encode_image(frame_image)
|
|
190
|
+
|
|
191
|
+
return FrameMessage(
|
|
192
|
+
client_id=client_id,
|
|
193
|
+
frame_data=frame_image,
|
|
194
|
+
frame_index=frame_index,
|
|
195
|
+
source_message_id=source_message_id,
|
|
196
|
+
end_of_speech=end_of_speech,
|
|
197
|
+
audio_bytes=audio_bytes,
|
|
198
|
+
sample_rate=sample_rate,
|
|
199
|
+
metadata=kwargs,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
@cached_property
|
|
203
|
+
def image(self) -> np.ndarray:
|
|
204
|
+
"""Get the image as a numpy array."""
|
|
205
|
+
return decode_image(self.frame_data)
|
|
206
|
+
|
|
207
|
+
@property
|
|
208
|
+
def has_audio(self) -> bool:
|
|
209
|
+
"""Check if frame has valid audio data."""
|
|
210
|
+
return bool(self.audio_bytes and self.sample_rate)
|