nersemble-benchmark 0.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nersemble_benchmark-0.0.12/PKG-INFO +323 -0
- nersemble_benchmark-0.0.12/README.md +301 -0
- nersemble_benchmark-0.0.12/pyproject.toml +44 -0
- nersemble_benchmark-0.0.12/setup.cfg +4 -0
- nersemble_benchmark-0.0.12/setup.py +7 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/__init__.py +0 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/constants.py +99 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/data/__init__.py +0 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/data/benchmark_data.py +187 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/data/submission_data.py +264 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/env.py +16 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/models/__init__.py +0 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/models/flame.py +105 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/scripts/__init__.py +0 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/scripts/download_data.py +184 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/util/__init__.py +0 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/util/download.py +33 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/util/metadata.py +61 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/util/security.py +41 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark/util/video.py +36 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark.egg-info/PKG-INFO +323 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark.egg-info/SOURCES.txt +28 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark.egg-info/dependency_links.txt +1 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark.egg-info/entry_points.txt +2 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark.egg-info/requires.txt +11 -0
- nersemble_benchmark-0.0.12/src/nersemble_benchmark.egg-info/top_level.txt +1 -0
- nersemble_benchmark-0.0.12/test/test_mono_avatar_data_manager.py +107 -0
- nersemble_benchmark-0.0.12/test/test_nvs_data_manager.py +24 -0
- nersemble_benchmark-0.0.12/test/test_readme.py +32 -0
- nersemble_benchmark-0.0.12/test/test_submission_data.py +52 -0
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: nersemble_benchmark
|
|
3
|
+
Version: 0.0.12
|
|
4
|
+
Summary: Official devkit for the NeRSemble Photorealistic 3D Head Avatar Benchmark
|
|
5
|
+
Author-email: Tobias Kirschstein <tobias.kirschstein@gmail.com>
|
|
6
|
+
License: Apache 2.0
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.8.0
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: tyro
|
|
13
|
+
Requires-Dist: environs
|
|
14
|
+
Requires-Dist: elias
|
|
15
|
+
Requires-Dist: dreifus
|
|
16
|
+
Requires-Dist: tqdm
|
|
17
|
+
Requires-Dist: open3d
|
|
18
|
+
Requires-Dist: flame-model
|
|
19
|
+
Requires-Dist: mediapy
|
|
20
|
+
Requires-Dist: imageio[pyav]
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
|
|
23
|
+
# NeRSemble Photorealistic 3D Head Avatar Benchmark
|
|
24
|
+
|
|
25
|
+
This is the official NeRSemble Benchmark Toolkit for downloading the data and preparing submissions for the NeRSemble Photorealistic 3D Head Avatar benchmarks.
|
|
26
|
+
For submitting your results, please go to our [submission system](https://kaldir.vc.in.tum.de/nersemble_benchmark/).
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
## 1. Data Access & Setup
|
|
30
|
+
|
|
31
|
+
1. Request access to the NeRSemble dataset (only necessary if you did not request access previously): https://forms.gle/rYRoGNh2ed51TDWX9
|
|
32
|
+
2. Once approved, you will receive a mail with the download link in the form of
|
|
33
|
+
```python
|
|
34
|
+
NERSEMBLE_BENCHMARK_URL = "..."
|
|
35
|
+
```
|
|
36
|
+
3. Create a file at `~/.config/nersemble_benchmark/.env` with following content:
|
|
37
|
+
```python
|
|
38
|
+
NERSEMBLE_BENCHMARK_URL = "<<<URL YOU GOT WHEN REQUESTING ACCESS TO NERSEMBLE>>>"
|
|
39
|
+
```
|
|
40
|
+
4. Install this repository via
|
|
41
|
+
```pip install nersemble_benchmark```
|
|
42
|
+
|
|
43
|
+
## 2. Data Download
|
|
44
|
+
|
|
45
|
+
After installation of the benchmark repository, a `nersemble-benchmark-download` command will be available in your environment.
|
|
46
|
+
This is the main tool to download the benchmark data. To get a detailed description of download options, run `nersemble-benchmark-download --help`.
|
|
47
|
+
In the following, `${benchmark_folder}` denotes the path to your local folder where the benchmark data should be downloaded to.
|
|
48
|
+
|
|
49
|
+
### Overview
|
|
50
|
+
|
|
51
|
+
#### NVS Benchmark (1604 x 1100)
|
|
52
|
+
|
|
53
|
+
| Participant ID | Sequence | #Frames | Size | Size (incl. pointclouds) |
|
|
54
|
+
|----------------|----------------|----------|-------------|--------------------------|
|
|
55
|
+
| 388 | GLASSES | 1118 | 1.06 GB | 21.8 GB |
|
|
56
|
+
| 422 | EXP-2-eyes | 517 | 386 MB | 16.1 GB |
|
|
57
|
+
| 443 | FREE | 1108 | 1.19 GB | 17.3 GB |
|
|
58
|
+
| 445 | EXP-6-tongue-1 | 514 | 401 MB | 13.4 GB |
|
|
59
|
+
| 475 | HAIR | 259 | 325 MB | 773 MB |
|
|
60
|
+
| | | Σ = 3516 | Σ = 3.34 GB | Σ = 69.6 GB |
|
|
61
|
+
|
|
62
|
+
13 out of the available 16 cameras are provided for training, the remaining 3 cameras (`222200046`, `222200037`, `222200039`) are hold-out and used to compute the test metrics.
|
|
63
|
+
|
|
64
|
+
#### Mono FLAME Avatar Benchmark (512 x 512)
|
|
65
|
+
| Participant ID | #Sequences (train / test) | #Frames (train / test) | Size |
|
|
66
|
+
|----------------|---------------------------|------------------------|------------|
|
|
67
|
+
| 393 | 18 / 4 | 2,964 / 816 | 27 MB |
|
|
68
|
+
| 404 | 18 / 4 | 2,009 / 665 | 28 MB |
|
|
69
|
+
| 461 | 18 / 4 | 2,057 / 486 | 29 MB |
|
|
70
|
+
| 477 | 18 / 4 | 2,543 / 530 | 37 MB |
|
|
71
|
+
| 486 | 18 / 4 | 2,440 / 608 | 23 MB |
|
|
72
|
+
| | | 12,013 / 3,105 | Σ = 144 MB |
|
|
73
|
+
|
|
74
|
+
Only a single camera is provided for training: `222200037`.
|
|
75
|
+
For all participants, the same 4 sequences are held out: `EMO-1-shout+laugh`, `FREE`, `SEN-09-frown_events_bad`, and `SEN-10-port_strong_smokey`.
|
|
76
|
+
To compute test metrics, both the training camera as well as 3 hold-out cameras (`222200046`, `220700191`, `222200039`) are used to compute the test metrics.
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
### NVS Benchmark download
|
|
80
|
+
|
|
81
|
+
```shell
|
|
82
|
+
nersemble-benchmark-download ${benchmark_folder} nvs
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
#### NVS pointclouds
|
|
86
|
+
The NVS benchmark also comes with pointclouds for each timestep that can be used to solve the task.
|
|
87
|
+
Due to their size, per default only the first pointcloud of each sequence is downloaded which can be helpful to initialize 3D Gaussians for example.
|
|
88
|
+
To download the pointclouds for all frames of the benchmark sequences, use `--pointcloud_frames all`. The pointclouds contain 3D point positions, colors, and normals.
|
|
89
|
+
|
|
90
|
+
### Mono FLAME Avatar Benchmark download
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
nersemble-benchmark-download ${benchmark_folder} mono_flame_avatar
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
#### FLAME tracking
|
|
97
|
+
|
|
98
|
+
The Mono FLAME Avatar benchmark comes with FLAME tracking for each timesteps of both the train sequences as well as the hold-out sequences.
|
|
99
|
+
These are downloaded per default, but can also be specifically targeted for download via `--assets flame2023_tracking`.
|
|
100
|
+
|
|
101
|
+
## 3. Usage
|
|
102
|
+
|
|
103
|
+
### Data Managers
|
|
104
|
+
|
|
105
|
+
The benchmark repository provides data managers to simplify loading individual assets such as images in Python code.
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from nersemble_benchmark.data.benchmark_data import NVSDataManager
|
|
109
|
+
from nersemble_benchmark.constants import BENCHMARK_NVS_IDS_AND_SEQUENCES, BENCHMARK_NVS_TRAIN_SERIALS
|
|
110
|
+
|
|
111
|
+
benchmark_folder = "path/to/local/benchmark/folder"
|
|
112
|
+
participant_id, sequence_name = BENCHMARK_NVS_IDS_AND_SEQUENCES[0] # <- Use first benchmark subject
|
|
113
|
+
serial = BENCHMARK_NVS_TRAIN_SERIALS[0] # <- Use first train camera
|
|
114
|
+
timestep = 0 # <- Use first timestep
|
|
115
|
+
|
|
116
|
+
data_manager = NVSDataManager(benchmark_folder, participant_id)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
#### Load image
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
image = data_manager.load_image(sequence_name, serial, timestep) # <- Load first frame. Background is already removed
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
<img src="static/images/example_image.jpg" width="150px" alt="Loaded example image"/>
|
|
126
|
+
|
|
127
|
+
#### Load Alpha Map
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
image = data_manager.load_alpha_map(sequence_name, serial, timestep) # <- Load alpha map
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
<img src="static/images/example_alpha_map.jpg" width="150px" alt="Loaded example alpha map"/>
|
|
134
|
+
|
|
135
|
+
#### Load cameras
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
camera_params = data_manager.load_camera_calibration()
|
|
139
|
+
world_2_cam_pose = camera_params.world_2_cam[serial] # <- 4x4 world2cam extrinsic matrix in OpenCV camera coordinate convention
|
|
140
|
+
intrinsics = camera_params.intrinsics[serial] # <- 3x3 intrinsic matrix
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Furthermore, the [visualize_cameras.py](scripts/visualize/visualize_cameras.py) script shows the arrangement of the cameras in 3D. The hold-out cameras used for
|
|
144
|
+
the hidden test set are shown in red. The `388` indicates the ID of the participant (see the data section for available participant IDs in the benchmark)
|
|
145
|
+
|
|
146
|
+
```shell
|
|
147
|
+
python scripts/visualize/visualize_cameras.py ${benchmark_folder} 388
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
<img src="static/images/example_cameras.jpg" width="300px" alt="Loaded example cameras"/>
|
|
151
|
+
|
|
152
|
+
### NVS Data Manager assets
|
|
153
|
+
The dynamic NVS benchmark has some assets specific to the benchmark. The following code assumes the use of a `NVSDataManager`:
|
|
154
|
+
```python
|
|
155
|
+
from nersemble_benchmark.data.benchmark_data import NVSDataManager
|
|
156
|
+
nvs_data_manager = NVSDataManager(benchmark_folder, participant_id)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
#### Load Pointcloud
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
points, colors, normals = nvs_data_manager.load_pointcloud(sequence_name, timestep) # <- Load pointcloud of some timestep
|
|
163
|
+
```
|
|
164
|
+
<img src="static/images/example_pointcloud.jpg" width="150px" alt="Loaded example pointcloud"/>
|
|
165
|
+
|
|
166
|
+
### Mono FLAME Avatar assets
|
|
167
|
+
The Mono FLAME Avatar benchmark has some additional assets specific to the benchmark. The following code assumes the use of a `MonoFlameAvatarDataManager`:
|
|
168
|
+
```python
|
|
169
|
+
from nersemble_benchmark.data.benchmark_data import MonoFlameAvatarDataManager
|
|
170
|
+
mono_flame_data_manager = MonoFlameAvatarDataManager(benchmark_folder, participant_id)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
#### FLAME tracking
|
|
174
|
+
|
|
175
|
+
The FLAME tracking for the benchmark has been conducted with the FLAME 2023 model.
|
|
176
|
+
The tracking result can be loaded via:
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
flame_tracking = mono_flame_data_manager.load_flame_tracking(sequence_name) # <- Load the FLAME tracking for an entire sequence
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
it contains shape and expression codes, jaw and eyes parameters, as well as rigid head rotation and translation in world space:
|
|
183
|
+
```python
|
|
184
|
+
class FlameTracking:
|
|
185
|
+
shape # (1, 300)
|
|
186
|
+
expression # (T, 100)
|
|
187
|
+
rotation # (T, 3)
|
|
188
|
+
rotation_matrices # (T, 3, 3)
|
|
189
|
+
translation # (T, 3)
|
|
190
|
+
jaw # (T, 3)
|
|
191
|
+
frames # (T,)
|
|
192
|
+
scale # (1, 1)
|
|
193
|
+
neck # (T, 3)
|
|
194
|
+
eyes # (T, 6)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
The FLAME tracking will provide a FLAME mesh that is already perfectly aligned with the given cameras from the benchmark.
|
|
198
|
+
The easiest way to obtain the mesh from the tracking parameters is using the `FlameProvider` class:
|
|
199
|
+
```python
|
|
200
|
+
from nersemble_benchmark.models.flame import FlameProvider
|
|
201
|
+
|
|
202
|
+
flame_provider = FlameProvider(flame_tracking)
|
|
203
|
+
mesh = flame_provider.get_mesh(timestep) # <- Get tracked mesh for the specified timestep in the sequence
|
|
204
|
+
```
|
|
205
|
+
The [visualize_flame_tracking.py](scripts/visualize/visualize_flame_tracking.py) script shows how to load the FLAME tracking and visualizes the corresponding FLAME mesh with the correct cameras:
|
|
206
|
+
```shell
|
|
207
|
+
python scripts/visualize/visualize_flame_tracking.py ${benchmark_folder} --participant_id 461
|
|
208
|
+
```
|
|
209
|
+
<img src="static/images/example_flame_tracking.jpg" width="300px" alt="FLAME Tracking example"/>
|
|
210
|
+
|
|
211
|
+
## 4. Submission
|
|
212
|
+
|
|
213
|
+
Submissions to the benchmark tasks are done by uploading a submission `.zip` file to our [submission system](https://kaldir.vc.in.tum.de/nersemble_benchmark/).
|
|
214
|
+
The following describes the expected format of a submission `.zip` file.
|
|
215
|
+
|
|
216
|
+
### 4.1. NVS Benchmark
|
|
217
|
+
|
|
218
|
+
#### Submission .zip creation
|
|
219
|
+
|
|
220
|
+
For each of the 5 benchmark sequences, you need to render the whole sequence from the three hold-out cameras (`222200046`, `222200037`, `222200039`).
|
|
221
|
+
The corresponding camera extrinsics and intrinsics can be loaded the same way as the train cameras:
|
|
222
|
+
```python
|
|
223
|
+
from nersemble_benchmark.constants import BENCHMARK_NVS_HOLD_OUT_SERIALS
|
|
224
|
+
|
|
225
|
+
camera_params = data_manager.load_camera_calibration()
|
|
226
|
+
for serial in BENCHMARK_NVS_HOLD_OUT_SERIALS:
|
|
227
|
+
world_2_cam_pose = camera_params.world_2_cam[serial]
|
|
228
|
+
intrinsics = camera_params.intrinsics[serial]
|
|
229
|
+
... # <- Render video from your reconstructed 4D representation
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
Once you rendered the images from the hold out viewpoints for all frames of the 5 benchmark sequences, you can pack them into a `.zip` file for submission.
|
|
233
|
+
The expected structure of the `.zip` file is as follows:
|
|
234
|
+
```yaml
|
|
235
|
+
nvs_submission.zip
|
|
236
|
+
├── 388
|
|
237
|
+
│ └── GLASSES
|
|
238
|
+
│ ├── cam_222200037.mp4 # <- Video predictions from your method
|
|
239
|
+
│ ├── cam_222200039.mp4
|
|
240
|
+
│ └── cam_222200046.mp4
|
|
241
|
+
├── 422
|
|
242
|
+
│ └── EXP-2-eyes
|
|
243
|
+
│ ├── cam_222200037.mp4
|
|
244
|
+
│ ├── cam_222200039.mp4
|
|
245
|
+
│ └── cam_222200046.mp4
|
|
246
|
+
┆
|
|
247
|
+
└── 475
|
|
248
|
+
└── ...
|
|
249
|
+
```
|
|
250
|
+
Since `.mp4` is a lossy compression format, we use a very high quality setting of `--crf 14` to ensure the metric calculation is not affected by compression artifacts.
|
|
251
|
+
|
|
252
|
+
To facilitate the creation of the submission .zip, this repository also contains some Python helpers that you can use:
|
|
253
|
+
```python
|
|
254
|
+
from nersemble_benchmark.data.submission_data import NVSSubmissionDataWriter
|
|
255
|
+
|
|
256
|
+
zip_path = ... # <- Local path where you want to create your submission .zip file
|
|
257
|
+
images = ... # <- List of uint8 numpy arrays (H, W, 3) in range 0-255 that hold the image data for all frames of a single camera
|
|
258
|
+
|
|
259
|
+
with NVSSubmissionDataWriter(zip_path) as submission_data_manager:
|
|
260
|
+
submission_data_manager.add_video(participant, sequence_name, serial, images) # <- will automatically package the images into a .mp4 file and place it correctly into the .zip
|
|
261
|
+
```
|
|
262
|
+
Note that the `NVSSubmissionDataWriter` will overwrite any previously existing `.zip` file with the same path. So, the predictions for all sequences and all hold out cameras have to be added at once.
|
|
263
|
+
|
|
264
|
+
### 4.2. Monocular FLAME Avatar Benchmark
|
|
265
|
+
|
|
266
|
+
#### Submission .zip creation
|
|
267
|
+
|
|
268
|
+
For each of the 4 hold-out sequences of the 5 benchmark people, you need to render the whole sequence from the three hold-out cameras (`222200046`, `220700191`, `222200039`) as well as the train camera (`222200037`).
|
|
269
|
+
The corresponding camera extrinsics and intrinsics can be loaded the same way as the train cameras:
|
|
270
|
+
```python
|
|
271
|
+
from nersemble_benchmark.constants import BENCHMARK_MONO_FLAME_AVATAR_IDS, BENCHMARK_MONO_FLAME_AVATAR_TRAIN_SERIAL, BENCHMARK_MONO_FLAME_AVATAR_HOLD_OUT_SERIALS, BENCHMARK_MONO_FLAME_AVATAR_SEQUENCES_TEST
|
|
272
|
+
|
|
273
|
+
camera_params = data_manager.load_camera_calibration()
|
|
274
|
+
for participant_id in BENCHMARK_MONO_FLAME_AVATAR_IDS:
|
|
275
|
+
for sequence_name in BENCHMARK_MONO_FLAME_AVATAR_SEQUENCES_TEST:
|
|
276
|
+
flame_tracking = data_manager.load_flame_tracking(sequence_name)
|
|
277
|
+
flame_provider = FlameProvider(flame_tracking) # <- Use FLAME tracking to get expression codes / tracked meshes for hold-out sequence
|
|
278
|
+
# 3 hold-out cameras
|
|
279
|
+
for serial in BENCHMARK_MONO_FLAME_AVATAR_HOLD_OUT_SERIALS:
|
|
280
|
+
world_2_cam_pose = camera_params.world_2_cam[serial]
|
|
281
|
+
intrinsics = camera_params.intrinsics[serial]
|
|
282
|
+
... # <- Render video from your reconstructed 3D head avatar representation
|
|
283
|
+
|
|
284
|
+
# train viewpoint
|
|
285
|
+
serial = BENCHMARK_MONO_FLAME_AVATAR_TRAIN_SERIAL
|
|
286
|
+
world_2_cam_pose = camera_params.world_2_cam[serial]
|
|
287
|
+
intrinsics = camera_params.intrinsics[serial]
|
|
288
|
+
... # <- Render video from your reconstructed 3D head avatar representation
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
Once you rendered all frames from the 4 viewpoints for all 4 hold-out sequences of the 5 benchmark persons, you can pack them into a `.zip` file for submission.
|
|
292
|
+
The expected structure of the `.zip` file is as follows:
|
|
293
|
+
```yaml
|
|
294
|
+
mono_flame_avatar_submission.zip
|
|
295
|
+
├── 393
|
|
296
|
+
│ ├── EMO-1-shout+laugh
|
|
297
|
+
│ │ ├── cam_220700191.mp4 # <- Video predictions from your method
|
|
298
|
+
│ │ ├── cam_222200037.mp4
|
|
299
|
+
│ │ ├── cam_222200039.mp4
|
|
300
|
+
│ │ └── cam_222200046.mp4
|
|
301
|
+
│ ┆
|
|
302
|
+
│ └── SEN-10-port_strong_smokey
|
|
303
|
+
│ ├── cam_220700191.mp4
|
|
304
|
+
│ ├── cam_222200037.mp4
|
|
305
|
+
│ ├── cam_222200039.mp4
|
|
306
|
+
│ └── cam_222200046.mp4
|
|
307
|
+
┆
|
|
308
|
+
└── 486
|
|
309
|
+
└── ...
|
|
310
|
+
```
|
|
311
|
+
Since `.mp4` is a lossy compression format, we use a very high quality setting of `--crf 14` to ensure the metric calculation is not affected by compression artifacts.
|
|
312
|
+
|
|
313
|
+
To facilitate the creation of the submission .zip, this repository also contains some Python helpers that you can use:
|
|
314
|
+
```python
|
|
315
|
+
from nersemble_benchmark.data.submission_data import MonoFlameAvatarSubmissionDataWriter
|
|
316
|
+
|
|
317
|
+
zip_path = ... # <- Local path where you want to create your submission .zip file
|
|
318
|
+
images = ... # <- List of uint8 numpy arrays (H, W, 3) in range 0-255 that hold the image data for all frames of a single camera
|
|
319
|
+
|
|
320
|
+
with MonoFlameAvatarSubmissionDataWriter(zip_path) as submission_data_manager:
|
|
321
|
+
submission_data_manager.add_video(participant, sequence_name, serial, images) # <- will automatically package the images into a .mp4 file and place it correctly into the .zip
|
|
322
|
+
```
|
|
323
|
+
Note that the `MonoFlameAvatarSubmissionDataWriter` will overwrite any previously existing `.zip` file with the same path. So, the predictions for all sequences and all hold out cameras have to be added at once.
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# NeRSemble Photorealistic 3D Head Avatar Benchmark
|
|
2
|
+
|
|
3
|
+
This is the official NeRSemble Benchmark Toolkit for downloading the data and preparing submissions for the NeRSemble Photorealistic 3D Head Avatar benchmarks.
|
|
4
|
+
For submitting your results, please go to our [submission system](https://kaldir.vc.in.tum.de/nersemble_benchmark/).
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
## 1. Data Access & Setup
|
|
8
|
+
|
|
9
|
+
1. Request access to the NeRSemble dataset (only necessary if you did not request access previously): https://forms.gle/rYRoGNh2ed51TDWX9
|
|
10
|
+
2. Once approved, you will receive a mail with the download link in the form of
|
|
11
|
+
```python
|
|
12
|
+
NERSEMBLE_BENCHMARK_URL = "..."
|
|
13
|
+
```
|
|
14
|
+
3. Create a file at `~/.config/nersemble_benchmark/.env` with following content:
|
|
15
|
+
```python
|
|
16
|
+
NERSEMBLE_BENCHMARK_URL = "<<<URL YOU GOT WHEN REQUESTING ACCESS TO NERSEMBLE>>>"
|
|
17
|
+
```
|
|
18
|
+
4. Install this repository via
|
|
19
|
+
```pip install nersemble_benchmark```
|
|
20
|
+
|
|
21
|
+
## 2. Data Download
|
|
22
|
+
|
|
23
|
+
After installation of the benchmark repository, a `nersemble-benchmark-download` command will be available in your environment.
|
|
24
|
+
This is the main tool to download the benchmark data. To get a detailed description of download options, run `nersemble-benchmark-download --help`.
|
|
25
|
+
In the following, `${benchmark_folder}` denotes the path to your local folder where the benchmark data should be downloaded to.
|
|
26
|
+
|
|
27
|
+
### Overview
|
|
28
|
+
|
|
29
|
+
#### NVS Benchmark (1604 x 1100)
|
|
30
|
+
|
|
31
|
+
| Participant ID | Sequence | #Frames | Size | Size (incl. pointclouds) |
|
|
32
|
+
|----------------|----------------|----------|-------------|--------------------------|
|
|
33
|
+
| 388 | GLASSES | 1118 | 1.06 GB | 21.8 GB |
|
|
34
|
+
| 422 | EXP-2-eyes | 517 | 386 MB | 16.1 GB |
|
|
35
|
+
| 443 | FREE | 1108 | 1.19 GB | 17.3 GB |
|
|
36
|
+
| 445 | EXP-6-tongue-1 | 514 | 401 MB | 13.4 GB |
|
|
37
|
+
| 475 | HAIR | 259 | 325 MB | 773 MB |
|
|
38
|
+
| | | Σ = 3516 | Σ = 3.34 GB | Σ = 69.6 GB |
|
|
39
|
+
|
|
40
|
+
13 out of the available 16 cameras are provided for training, the remaining 3 cameras (`222200046`, `222200037`, `222200039`) are hold-out and used to compute the test metrics.
|
|
41
|
+
|
|
42
|
+
#### Mono FLAME Avatar Benchmark (512 x 512)
|
|
43
|
+
| Participant ID | #Sequences (train / test) | #Frames (train / test) | Size |
|
|
44
|
+
|----------------|---------------------------|------------------------|------------|
|
|
45
|
+
| 393 | 18 / 4 | 2,964 / 816 | 27 MB |
|
|
46
|
+
| 404 | 18 / 4 | 2,009 / 665 | 28 MB |
|
|
47
|
+
| 461 | 18 / 4 | 2,057 / 486 | 29 MB |
|
|
48
|
+
| 477 | 18 / 4 | 2,543 / 530 | 37 MB |
|
|
49
|
+
| 486 | 18 / 4 | 2,440 / 608 | 23 MB |
|
|
50
|
+
| | | 12,013 / 3,105 | Σ = 144 MB |
|
|
51
|
+
|
|
52
|
+
Only a single camera is provided for training: `222200037`.
|
|
53
|
+
For all participants, the same 4 sequences are held out: `EMO-1-shout+laugh`, `FREE`, `SEN-09-frown_events_bad`, and `SEN-10-port_strong_smokey`.
|
|
54
|
+
To compute test metrics, both the training camera as well as 3 hold-out cameras (`222200046`, `220700191`, `222200039`) are used to compute the test metrics.
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
### NVS Benchmark download
|
|
58
|
+
|
|
59
|
+
```shell
|
|
60
|
+
nersemble-benchmark-download ${benchmark_folder} nvs
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
#### NVS pointclouds
|
|
64
|
+
The NVS benchmark also comes with pointclouds for each timestep that can be used to solve the task.
|
|
65
|
+
Due to their size, per default only the first pointcloud of each sequence is downloaded which can be helpful to initialize 3D Gaussians for example.
|
|
66
|
+
To download the pointclouds for all frames of the benchmark sequences, use `--pointcloud_frames all`. The pointclouds contain 3D point positions, colors, and normals.
|
|
67
|
+
|
|
68
|
+
### Mono FLAME Avatar Benchmark download
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
nersemble-benchmark-download ${benchmark_folder} mono_flame_avatar
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
#### FLAME tracking
|
|
75
|
+
|
|
76
|
+
The Mono FLAME Avatar benchmark comes with FLAME tracking for each timesteps of both the train sequences as well as the hold-out sequences.
|
|
77
|
+
These are downloaded per default, but can also be specifically targeted for download via `--assets flame2023_tracking`.
|
|
78
|
+
|
|
79
|
+
## 3. Usage
|
|
80
|
+
|
|
81
|
+
### Data Managers
|
|
82
|
+
|
|
83
|
+
The benchmark repository provides data managers to simplify loading individual assets such as images in Python code.
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from nersemble_benchmark.data.benchmark_data import NVSDataManager
|
|
87
|
+
from nersemble_benchmark.constants import BENCHMARK_NVS_IDS_AND_SEQUENCES, BENCHMARK_NVS_TRAIN_SERIALS
|
|
88
|
+
|
|
89
|
+
benchmark_folder = "path/to/local/benchmark/folder"
|
|
90
|
+
participant_id, sequence_name = BENCHMARK_NVS_IDS_AND_SEQUENCES[0] # <- Use first benchmark subject
|
|
91
|
+
serial = BENCHMARK_NVS_TRAIN_SERIALS[0] # <- Use first train camera
|
|
92
|
+
timestep = 0 # <- Use first timestep
|
|
93
|
+
|
|
94
|
+
data_manager = NVSDataManager(benchmark_folder, participant_id)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
#### Load image
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
image = data_manager.load_image(sequence_name, serial, timestep) # <- Load first frame. Background is already removed
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
<img src="static/images/example_image.jpg" width="150px" alt="Loaded example image"/>
|
|
104
|
+
|
|
105
|
+
#### Load Alpha Map
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
image = data_manager.load_alpha_map(sequence_name, serial, timestep) # <- Load alpha map
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
<img src="static/images/example_alpha_map.jpg" width="150px" alt="Loaded example alpha map"/>
|
|
112
|
+
|
|
113
|
+
#### Load cameras
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
camera_params = data_manager.load_camera_calibration()
|
|
117
|
+
world_2_cam_pose = camera_params.world_2_cam[serial] # <- 4x4 world2cam extrinsic matrix in OpenCV camera coordinate convention
|
|
118
|
+
intrinsics = camera_params.intrinsics[serial] # <- 3x3 intrinsic matrix
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Furthermore, the [visualize_cameras.py](scripts/visualize/visualize_cameras.py) script shows the arrangement of the cameras in 3D. The hold-out cameras used for
|
|
122
|
+
the hidden test set are shown in red. The `388` indicates the ID of the participant (see the data section for available participant IDs in the benchmark)
|
|
123
|
+
|
|
124
|
+
```shell
|
|
125
|
+
python scripts/visualize/visualize_cameras.py ${benchmark_folder} 388
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
<img src="static/images/example_cameras.jpg" width="300px" alt="Loaded example cameras"/>
|
|
129
|
+
|
|
130
|
+
### NVS Data Manager assets
|
|
131
|
+
The dynamic NVS benchmark has some assets specific to the benchmark. The following code assumes the use of a `NVSDataManager`:
|
|
132
|
+
```python
|
|
133
|
+
from nersemble_benchmark.data.benchmark_data import NVSDataManager
|
|
134
|
+
nvs_data_manager = NVSDataManager(benchmark_folder, participant_id)
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
#### Load Pointcloud
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
points, colors, normals = nvs_data_manager.load_pointcloud(sequence_name, timestep) # <- Load pointcloud of some timestep
|
|
141
|
+
```
|
|
142
|
+
<img src="static/images/example_pointcloud.jpg" width="150px" alt="Loaded example pointcloud"/>
|
|
143
|
+
|
|
144
|
+
### Mono FLAME Avatar assets
|
|
145
|
+
The Mono FLAME Avatar benchmark has some additional assets specific to the benchmark. The following code assumes the use of a `MonoFlameAvatarDataManager`:
|
|
146
|
+
```python
|
|
147
|
+
from nersemble_benchmark.data.benchmark_data import MonoFlameAvatarDataManager
|
|
148
|
+
mono_flame_data_manager = MonoFlameAvatarDataManager(benchmark_folder, participant_id)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
#### FLAME tracking
|
|
152
|
+
|
|
153
|
+
The FLAME tracking for the benchmark has been conducted with the FLAME 2023 model.
|
|
154
|
+
The tracking result can be loaded via:
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
flame_tracking = mono_flame_data_manager.load_flame_tracking(sequence_name) # <- Load the FLAME tracking for an entire sequence
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
it contains shape and expression codes, jaw and eyes parameters, as well as rigid head rotation and translation in world space:
|
|
161
|
+
```python
|
|
162
|
+
class FlameTracking:
|
|
163
|
+
shape # (1, 300)
|
|
164
|
+
expression # (T, 100)
|
|
165
|
+
rotation # (T, 3)
|
|
166
|
+
rotation_matrices # (T, 3, 3)
|
|
167
|
+
translation # (T, 3)
|
|
168
|
+
jaw # (T, 3)
|
|
169
|
+
frames # (T,)
|
|
170
|
+
scale # (1, 1)
|
|
171
|
+
neck # (T, 3)
|
|
172
|
+
eyes # (T, 6)
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
The FLAME tracking will provide a FLAME mesh that is already perfectly aligned with the given cameras from the benchmark.
|
|
176
|
+
The easiest way to obtain the mesh from the tracking parameters is using the `FlameProvider` class:
|
|
177
|
+
```python
|
|
178
|
+
from nersemble_benchmark.models.flame import FlameProvider
|
|
179
|
+
|
|
180
|
+
flame_provider = FlameProvider(flame_tracking)
|
|
181
|
+
mesh = flame_provider.get_mesh(timestep) # <- Get tracked mesh for the specified timestep in the sequence
|
|
182
|
+
```
|
|
183
|
+
The [visualize_flame_tracking.py](scripts/visualize/visualize_flame_tracking.py) script shows how to load the FLAME tracking and visualizes the corresponding FLAME mesh with the correct cameras:
|
|
184
|
+
```shell
|
|
185
|
+
python scripts/visualize/visualize_flame_tracking.py ${benchmark_folder} --participant_id 461
|
|
186
|
+
```
|
|
187
|
+
<img src="static/images/example_flame_tracking.jpg" width="300px" alt="FLAME Tracking example"/>
|
|
188
|
+
|
|
189
|
+
## 4. Submission
|
|
190
|
+
|
|
191
|
+
Submissions to the benchmark tasks are done by uploading a submission `.zip` file to our [submission system](https://kaldir.vc.in.tum.de/nersemble_benchmark/).
|
|
192
|
+
The following describes the expected format of a submission `.zip` file.
|
|
193
|
+
|
|
194
|
+
### 4.1. NVS Benchmark
|
|
195
|
+
|
|
196
|
+
#### Submission .zip creation
|
|
197
|
+
|
|
198
|
+
For each of the 5 benchmark sequences, you need to render the whole sequence from the three hold-out cameras (`222200046`, `222200037`, `222200039`).
|
|
199
|
+
The corresponding camera extrinsics and intrinsics can be loaded the same way as the train cameras:
|
|
200
|
+
```python
|
|
201
|
+
from nersemble_benchmark.constants import BENCHMARK_NVS_HOLD_OUT_SERIALS
|
|
202
|
+
|
|
203
|
+
camera_params = data_manager.load_camera_calibration()
|
|
204
|
+
for serial in BENCHMARK_NVS_HOLD_OUT_SERIALS:
|
|
205
|
+
world_2_cam_pose = camera_params.world_2_cam[serial]
|
|
206
|
+
intrinsics = camera_params.intrinsics[serial]
|
|
207
|
+
... # <- Render video from your reconstructed 4D representation
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Once you rendered the images from the hold out viewpoints for all frames of the 5 benchmark sequences, you can pack them into a `.zip` file for submission.
|
|
211
|
+
The expected structure of the `.zip` file is as follows:
|
|
212
|
+
```yaml
|
|
213
|
+
nvs_submission.zip
|
|
214
|
+
├── 388
|
|
215
|
+
│ └── GLASSES
|
|
216
|
+
│ ├── cam_222200037.mp4 # <- Video predictions from your method
|
|
217
|
+
│ ├── cam_222200039.mp4
|
|
218
|
+
│ └── cam_222200046.mp4
|
|
219
|
+
├── 422
|
|
220
|
+
│ └── EXP-2-eyes
|
|
221
|
+
│ ├── cam_222200037.mp4
|
|
222
|
+
│ ├── cam_222200039.mp4
|
|
223
|
+
│ └── cam_222200046.mp4
|
|
224
|
+
┆
|
|
225
|
+
└── 475
|
|
226
|
+
└── ...
|
|
227
|
+
```
|
|
228
|
+
Since `.mp4` is a lossy compression format, we use a very high quality setting of `--crf 14` to ensure the metric calculation is not affected by compression artifacts.
|
|
229
|
+
|
|
230
|
+
To facilitate the creation of the submission .zip, this repository also contains some Python helpers that you can use:
|
|
231
|
+
```python
|
|
232
|
+
from nersemble_benchmark.data.submission_data import NVSSubmissionDataWriter
|
|
233
|
+
|
|
234
|
+
zip_path = ... # <- Local path where you want to create your submission .zip file
|
|
235
|
+
images = ... # <- List of uint8 numpy arrays (H, W, 3) in range 0-255 that hold the image data for all frames of a single camera
|
|
236
|
+
|
|
237
|
+
with NVSSubmissionDataWriter(zip_path) as submission_data_manager:
|
|
238
|
+
submission_data_manager.add_video(participant, sequence_name, serial, images) # <- will automatically package the images into a .mp4 file and place it correctly into the .zip
|
|
239
|
+
```
|
|
240
|
+
Note that the `NVSSubmissionDataWriter` will overwrite any previously existing `.zip` file with the same path. So, the predictions for all sequences and all hold out cameras have to be added at once.
|
|
241
|
+
|
|
242
|
+
### 4.2. Monocular FLAME Avatar Benchmark
|
|
243
|
+
|
|
244
|
+
#### Submission .zip creation
|
|
245
|
+
|
|
246
|
+
For each of the 4 hold-out sequences of the 5 benchmark people, you need to render the whole sequence from the three hold-out cameras (`222200046`, `220700191`, `222200039`) as well as the train camera (`222200037`).
|
|
247
|
+
The corresponding camera extrinsics and intrinsics can be loaded the same way as the train cameras:
|
|
248
|
+
```python
|
|
249
|
+
from nersemble_benchmark.constants import BENCHMARK_MONO_FLAME_AVATAR_IDS, BENCHMARK_MONO_FLAME_AVATAR_TRAIN_SERIAL, BENCHMARK_MONO_FLAME_AVATAR_HOLD_OUT_SERIALS, BENCHMARK_MONO_FLAME_AVATAR_SEQUENCES_TEST
|
|
250
|
+
|
|
251
|
+
camera_params = data_manager.load_camera_calibration()
|
|
252
|
+
for participant_id in BENCHMARK_MONO_FLAME_AVATAR_IDS:
|
|
253
|
+
for sequence_name in BENCHMARK_MONO_FLAME_AVATAR_SEQUENCES_TEST:
|
|
254
|
+
flame_tracking = data_manager.load_flame_tracking(sequence_name)
|
|
255
|
+
flame_provider = FlameProvider(flame_tracking) # <- Use FLAME tracking to get expression codes / tracked meshes for hold-out sequence
|
|
256
|
+
# 3 hold-out cameras
|
|
257
|
+
for serial in BENCHMARK_MONO_FLAME_AVATAR_HOLD_OUT_SERIALS:
|
|
258
|
+
world_2_cam_pose = camera_params.world_2_cam[serial]
|
|
259
|
+
intrinsics = camera_params.intrinsics[serial]
|
|
260
|
+
... # <- Render video from your reconstructed 3D head avatar representation
|
|
261
|
+
|
|
262
|
+
# train viewpoint
|
|
263
|
+
serial = BENCHMARK_MONO_FLAME_AVATAR_TRAIN_SERIAL
|
|
264
|
+
world_2_cam_pose = camera_params.world_2_cam[serial]
|
|
265
|
+
intrinsics = camera_params.intrinsics[serial]
|
|
266
|
+
... # <- Render video from your reconstructed 3D head avatar representation
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
Once you rendered all frames from the 4 viewpoints for all 4 hold-out sequences of the 5 benchmark persons, you can pack them into a `.zip` file for submission.
|
|
270
|
+
The expected structure of the `.zip` file is as follows:
|
|
271
|
+
```yaml
|
|
272
|
+
mono_flame_avatar_submission.zip
|
|
273
|
+
├── 393
|
|
274
|
+
│ ├── EMO-1-shout+laugh
|
|
275
|
+
│ │ ├── cam_220700191.mp4 # <- Video predictions from your method
|
|
276
|
+
│ │ ├── cam_222200037.mp4
|
|
277
|
+
│ │ ├── cam_222200039.mp4
|
|
278
|
+
│ │ └── cam_222200046.mp4
|
|
279
|
+
│ ┆
|
|
280
|
+
│ └── SEN-10-port_strong_smokey
|
|
281
|
+
│ ├── cam_220700191.mp4
|
|
282
|
+
│ ├── cam_222200037.mp4
|
|
283
|
+
│ ├── cam_222200039.mp4
|
|
284
|
+
│ └── cam_222200046.mp4
|
|
285
|
+
┆
|
|
286
|
+
└── 486
|
|
287
|
+
└── ...
|
|
288
|
+
```
|
|
289
|
+
Since `.mp4` is a lossy compression format, we use a very high quality setting of `--crf 14` to ensure the metric calculation is not affected by compression artifacts.
|
|
290
|
+
|
|
291
|
+
To facilitate the creation of the submission .zip, this repository also contains some Python helpers that you can use:
|
|
292
|
+
```python
|
|
293
|
+
from nersemble_benchmark.data.submission_data import MonoFlameAvatarSubmissionDataWriter
|
|
294
|
+
|
|
295
|
+
zip_path = ... # <- Local path where you want to create your submission .zip file
|
|
296
|
+
images = ... # <- List of uint8 numpy arrays (H, W, 3) in range 0-255 that hold the image data for all frames of a single camera
|
|
297
|
+
|
|
298
|
+
with MonoFlameAvatarSubmissionDataWriter(zip_path) as submission_data_manager:
|
|
299
|
+
submission_data_manager.add_video(participant, sequence_name, serial, images) # <- will automatically package the images into a .mp4 file and place it correctly into the .zip
|
|
300
|
+
```
|
|
301
|
+
Note that the `MonoFlameAvatarSubmissionDataWriter` will overwrite any previously existing `.zip` file with the same path. So, the predictions for all sequences and all hold out cameras have to be added at once.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "nersemble_benchmark"
|
|
7
|
+
version = "0.0.12"
|
|
8
|
+
description = "Official devkit for the NeRSemble Photorealistic 3D Head Avatar Benchmark"
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "Tobias Kirschstein", email = "tobias.kirschstein@gmail.com" },
|
|
11
|
+
]
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
license = { text = "Apache 2.0" }
|
|
14
|
+
requires-python = ">=3.8.0"
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Operating System :: OS Independent"
|
|
19
|
+
]
|
|
20
|
+
# urls = { Documentation = "<<<ENTER_LINK_TO_DOCUMENTATION>>>" }
|
|
21
|
+
# Main dependencies
|
|
22
|
+
dependencies = [
|
|
23
|
+
"tyro",
|
|
24
|
+
"environs",
|
|
25
|
+
"elias",
|
|
26
|
+
"dreifus",
|
|
27
|
+
"tqdm",
|
|
28
|
+
"open3d",
|
|
29
|
+
"flame-model",
|
|
30
|
+
"mediapy",
|
|
31
|
+
"imageio[pyav]"
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.optional-dependencies]
|
|
35
|
+
# Development packages, install via <<<PROJECT_NAME>>>[dev]
|
|
36
|
+
dev = [
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.scripts]
|
|
40
|
+
nersemble-benchmark-download = "nersemble_benchmark.scripts.download_data:main_cli"
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
where = ["src"]
|
|
44
|
+
include = ["nersemble_benchmark*"] # Keep the '*', otherwise submodules are not found
|