bithuman 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. bithuman/__init__.py +13 -0
  2. bithuman/_version.py +1 -0
  3. bithuman/api.py +164 -0
  4. bithuman/audio/__init__.py +19 -0
  5. bithuman/audio/audio.py +396 -0
  6. bithuman/audio/hparams.py +108 -0
  7. bithuman/audio/utils.py +255 -0
  8. bithuman/config.py +88 -0
  9. bithuman/engine/__init__.py +15 -0
  10. bithuman/engine/auth.py +335 -0
  11. bithuman/engine/compression.py +257 -0
  12. bithuman/engine/enums.py +16 -0
  13. bithuman/engine/image_ops.py +192 -0
  14. bithuman/engine/inference.py +108 -0
  15. bithuman/engine/knn.py +58 -0
  16. bithuman/engine/video_data.py +391 -0
  17. bithuman/engine/video_reader.py +168 -0
  18. bithuman/lib/__init__.py +1 -0
  19. bithuman/lib/audio_encoder.onnx +45631 -28
  20. bithuman/lib/generator.py +763 -0
  21. bithuman/lib/pth2h5.py +106 -0
  22. bithuman/plugins/__init__.py +0 -0
  23. bithuman/plugins/stt.py +185 -0
  24. bithuman/runtime.py +1004 -0
  25. bithuman/runtime_async.py +469 -0
  26. bithuman/service/__init__.py +9 -0
  27. bithuman/service/client.py +788 -0
  28. bithuman/service/messages.py +210 -0
  29. bithuman/service/server.py +759 -0
  30. bithuman/utils/__init__.py +43 -0
  31. bithuman/utils/agent.py +359 -0
  32. bithuman/utils/fps_controller.py +90 -0
  33. bithuman/utils/image.py +41 -0
  34. bithuman/utils/unzip.py +38 -0
  35. bithuman/video_graph/__init__.py +16 -0
  36. bithuman/video_graph/action_trigger.py +83 -0
  37. bithuman/video_graph/driver_video.py +482 -0
  38. bithuman/video_graph/navigator.py +736 -0
  39. bithuman/video_graph/trigger.py +90 -0
  40. bithuman/video_graph/video_script.py +344 -0
  41. bithuman-1.0.2.dist-info/METADATA +37 -0
  42. bithuman-1.0.2.dist-info/RECORD +44 -0
  43. bithuman-1.0.2.dist-info/WHEEL +5 -0
  44. bithuman-1.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,210 @@
1
+ """Message definitions for bithuman runtime service."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import asdict, dataclass, field
5
+ from enum import Enum
6
+ from functools import cached_property
7
+ from typing import Any, Optional
8
+
9
+ import numpy as np
10
+
11
+ from bithuman.api import AudioChunk, VideoControl
12
+ from bithuman.utils.image import decode_image, encode_image
13
+
14
+
15
+ class CommandType(str, Enum):
16
+ """Types of commands that can be sent to the server."""
17
+
18
+ INIT = "init"
19
+ AUDIO = "audio"
20
+ HEARTBEAT = "heartbeat"
21
+ INTERRUPT = "interrupt"
22
+ CHECK_INIT_STATUS = "check_init_status" # Add new command type
23
+ GET_SETTING = "get_setting"
24
+
25
+
26
+ class ResponseStatus(str, Enum):
27
+ """Possible response statuses."""
28
+
29
+ SUCCESS = "success"
30
+ ERROR = "error"
31
+ LOADING = "loading" # Add new status for async initialization
32
+
33
+
34
+ @dataclass(kw_only=True)
35
+ class BaseRequest:
36
+ """Base class for all requests."""
37
+
38
+ client_id: str
39
+ command: CommandType
40
+
41
+ def to_dict(self) -> dict:
42
+ """Convert request to dictionary format."""
43
+ return asdict(self)
44
+
45
+
46
+ @dataclass(kw_only=True)
47
+ class InitRequest(BaseRequest):
48
+ """Request to initialize a client workspace."""
49
+
50
+ avatar_model_path: str
51
+ video_file: Optional[str] = None
52
+ inference_data_file: Optional[str] = None
53
+ command: CommandType = CommandType.INIT
54
+
55
+
56
+ @dataclass(kw_only=True)
57
+ class AudioRequest(BaseRequest):
58
+ """Request to process audio data."""
59
+
60
+ data: VideoControl
61
+ command: CommandType = CommandType.AUDIO
62
+
63
+ def __post_init__(self) -> None:
64
+ """Post initialization."""
65
+ if isinstance(self.data, dict):
66
+ self.data = VideoControl(**self.data)
67
+
68
+ def to_dict(self) -> dict:
69
+ """Convert request to dictionary format."""
70
+ request_dict = asdict(self)
71
+ # Use numpy's more efficient serialization
72
+ if self.data.audio is not None:
73
+ audio_dict = asdict(self.data.audio)
74
+ del audio_dict["data"]
75
+ audio_dict["audio_bytes"] = self.data.audio.bytes
76
+ request_dict["data"]["audio"] = audio_dict
77
+ return request_dict
78
+
79
+ @classmethod
80
+ def from_dict(cls, msg: dict) -> "AudioRequest":
81
+ """Create an AudioRequest from a dictionary."""
82
+ request = cls(**msg)
83
+ if request.data.audio is not None:
84
+ request.data.audio = AudioChunk.from_bytes(**request.data.audio)
85
+
86
+ return request
87
+
88
+ def __repr__(self) -> str:
89
+ """String representation of the AudioRequest."""
90
+ data_dict = self.to_dict()["data"]
91
+ data_dict.pop("audio_fp32")
92
+ data_dict["audio_duration"] = (
93
+ (len(self.data.audio_fp32) / self.data.audio_sample_rate)
94
+ if self.data.audio_fp32 is not None
95
+ else None
96
+ )
97
+ return f"AudioRequest(data={data_dict})"
98
+
99
+ @property
100
+ def audio_bytes(self) -> Optional[bytes]:
101
+ """Get the audio data as bytes."""
102
+ if self.data.audio_fp32 is None:
103
+ return None
104
+ return self.data.audio_fp32.tobytes()
105
+
106
+
107
+ @dataclass(kw_only=True)
108
+ class HeartbeatRequest(BaseRequest):
109
+ """Heartbeat request to keep connection alive."""
110
+
111
+ command: CommandType = CommandType.HEARTBEAT
112
+
113
+
114
+ @dataclass(kw_only=True)
115
+ class InterruptRequest(BaseRequest):
116
+ """Request to interrupt current audio processing."""
117
+
118
+ command: CommandType = CommandType.INTERRUPT
119
+
120
+
121
+ @dataclass(kw_only=True)
122
+ class CheckInitStatusRequest(BaseRequest):
123
+ """Request to check initialization status."""
124
+
125
+ command: CommandType = CommandType.CHECK_INIT_STATUS
126
+
127
+
128
+ @dataclass(kw_only=True)
129
+ class GetSettingRequest(BaseRequest):
130
+ """Request to get the current settings."""
131
+
132
+ command: CommandType = CommandType.GET_SETTING
133
+ name: str
134
+
135
+
136
+ @dataclass
137
+ class ServerResponse:
138
+ """Generic server response."""
139
+
140
+ status: ResponseStatus
141
+ message: Optional[str] = None
142
+ extra: Optional[dict] = None
143
+
144
+ @classmethod
145
+ def from_dict(cls, response_dict: dict) -> "ServerResponse":
146
+ """Create a ServerResponse from a dictionary."""
147
+ return ServerResponse(
148
+ status=ResponseStatus(response_dict["status"]),
149
+ message=response_dict.get("message"),
150
+ extra=response_dict.get("extra"),
151
+ )
152
+
153
+ def to_dict(self) -> dict:
154
+ """Convert response to dictionary format."""
155
+ return asdict(self)
156
+
157
+
158
+ @dataclass
159
+ class FrameMessage:
160
+ """Frame data sent from server to client."""
161
+
162
+ client_id: str
163
+ frame_data: bytes # JPEG encoded image data
164
+ frame_index: Optional[int]
165
+ source_message_id: str
166
+ end_of_speech: bool # mark the end of the speech
167
+ audio_bytes: Optional[bytes] = None # Audio chunk data
168
+ sample_rate: Optional[int] = None # Audio sample rate
169
+ metadata: dict = field(default_factory=dict) # For additional frame info
170
+
171
+ def to_dict(self) -> dict:
172
+ """Convert response to dictionary format."""
173
+ return asdict(self)
174
+
175
+ @classmethod
176
+ def create(
177
+ cls,
178
+ client_id: str,
179
+ frame_image: np.ndarray,
180
+ frame_index: Optional[int],
181
+ end_of_speech: bool,
182
+ audio_bytes: Optional[bytes] = None,
183
+ sample_rate: Optional[int] = None,
184
+ source_message_id: Optional[str] = None,
185
+ **kwargs: dict[str, Any],
186
+ ) -> "FrameMessage":
187
+ """Create a frame message from frame data."""
188
+ if frame_image is not None:
189
+ frame_image = encode_image(frame_image)
190
+
191
+ return FrameMessage(
192
+ client_id=client_id,
193
+ frame_data=frame_image,
194
+ frame_index=frame_index,
195
+ source_message_id=source_message_id,
196
+ end_of_speech=end_of_speech,
197
+ audio_bytes=audio_bytes,
198
+ sample_rate=sample_rate,
199
+ metadata=kwargs,
200
+ )
201
+
202
+ @cached_property
203
+ def image(self) -> np.ndarray:
204
+ """Get the image as a numpy array."""
205
+ return decode_image(self.frame_data)
206
+
207
+ @property
208
+ def has_audio(self) -> bool:
209
+ """Check if frame has valid audio data."""
210
+ return bool(self.audio_bytes and self.sample_rate)