gsvvcompressor 1.2.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsvvcompressor/__init__.py +13 -0
- gsvvcompressor/__main__.py +243 -0
- gsvvcompressor/combinations/__init__.py +84 -0
- gsvvcompressor/combinations/registry.py +52 -0
- gsvvcompressor/combinations/vq_xyz_1mask.py +89 -0
- gsvvcompressor/combinations/vq_xyz_1mask_zstd.py +103 -0
- gsvvcompressor/combinations/vq_xyz_draco.py +468 -0
- gsvvcompressor/combinations/vq_xyz_draco_2pass.py +156 -0
- gsvvcompressor/combinations/vq_xyz_zstd.py +106 -0
- gsvvcompressor/compress/__init__.py +5 -0
- gsvvcompressor/compress/zstd.py +144 -0
- gsvvcompressor/decoder.py +155 -0
- gsvvcompressor/deserializer.py +42 -0
- gsvvcompressor/draco/__init__.py +34 -0
- gsvvcompressor/draco/draco_decoder.exe +0 -0
- gsvvcompressor/draco/draco_encoder.exe +0 -0
- gsvvcompressor/draco/dracoreduced3dgs.cp310-win_amd64.pyd +0 -0
- gsvvcompressor/draco/interface.py +339 -0
- gsvvcompressor/draco/serialize.py +235 -0
- gsvvcompressor/draco/twopass.py +359 -0
- gsvvcompressor/encoder.py +122 -0
- gsvvcompressor/interframe/__init__.py +11 -0
- gsvvcompressor/interframe/combine.py +271 -0
- gsvvcompressor/interframe/decoder.py +99 -0
- gsvvcompressor/interframe/encoder.py +92 -0
- gsvvcompressor/interframe/interface.py +221 -0
- gsvvcompressor/interframe/twopass.py +226 -0
- gsvvcompressor/io/__init__.py +31 -0
- gsvvcompressor/io/bytes.py +103 -0
- gsvvcompressor/io/config.py +78 -0
- gsvvcompressor/io/gaussian_model.py +127 -0
- gsvvcompressor/movecameras.py +33 -0
- gsvvcompressor/payload.py +34 -0
- gsvvcompressor/serializer.py +42 -0
- gsvvcompressor/vq/__init__.py +15 -0
- gsvvcompressor/vq/interface.py +324 -0
- gsvvcompressor/vq/singlemask.py +127 -0
- gsvvcompressor/vq/twopass.py +1 -0
- gsvvcompressor/xyz/__init__.py +26 -0
- gsvvcompressor/xyz/dense.py +39 -0
- gsvvcompressor/xyz/interface.py +382 -0
- gsvvcompressor/xyz/knn.py +141 -0
- gsvvcompressor/xyz/quant.py +143 -0
- gsvvcompressor/xyz/size.py +44 -0
- gsvvcompressor/xyz/twopass.py +1 -0
- gsvvcompressor-1.2.0.dist-info/METADATA +690 -0
- gsvvcompressor-1.2.0.dist-info/RECORD +50 -0
- gsvvcompressor-1.2.0.dist-info/WHEEL +5 -0
- gsvvcompressor-1.2.0.dist-info/licenses/LICENSE +21 -0
- gsvvcompressor-1.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import Dict, Self
|
|
3
|
+
|
|
4
|
+
import torch
|
|
5
|
+
|
|
6
|
+
from gaussian_splatting import GaussianModel
|
|
7
|
+
from reduced_3dgs.quantization import ExcludeZeroSHQuantizer as VectorQuantizer
|
|
8
|
+
|
|
9
|
+
from ..payload import Payload
|
|
10
|
+
from ..interframe import InterframeEncoderInitConfig, InterframeCodecContext, InterframeCodecInterface
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class VQInterframeCodecConfig(InterframeEncoderInitConfig):
|
|
15
|
+
"""
|
|
16
|
+
Configuration parameters for VQ-based inter-frame codec.
|
|
17
|
+
|
|
18
|
+
This dataclass holds the initialization settings for vector quantization,
|
|
19
|
+
including the number of clusters for each attribute type.
|
|
20
|
+
"""
|
|
21
|
+
num_clusters: int = 256
|
|
22
|
+
num_clusters_rotation_re: int = 256
|
|
23
|
+
num_clusters_rotation_im: int = 256
|
|
24
|
+
num_clusters_opacity: int = 256
|
|
25
|
+
num_clusters_scaling: int = 256
|
|
26
|
+
num_clusters_features_dc: int = 256
|
|
27
|
+
num_clusters_features_rest: list = field(default_factory=list)
|
|
28
|
+
max_sh_degree: int = 3
|
|
29
|
+
tol: float = 1e-6
|
|
30
|
+
max_iter: int = 500
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class VQInterframeCodecContext(InterframeCodecContext):
|
|
35
|
+
"""
|
|
36
|
+
Context data for VQ-based inter-frame encoding/decoding.
|
|
37
|
+
|
|
38
|
+
This dataclass holds the quantization state including codebooks
|
|
39
|
+
and cluster IDs for each frame.
|
|
40
|
+
"""
|
|
41
|
+
ids_dict: Dict[str, torch.Tensor]
|
|
42
|
+
codebook_dict: Dict[str, torch.Tensor]
|
|
43
|
+
max_sh_degree: int
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class VQKeyframePayload(Payload):
|
|
48
|
+
"""
|
|
49
|
+
Payload for VQ keyframe data.
|
|
50
|
+
|
|
51
|
+
Contains the full codebook and cluster IDs for the first frame.
|
|
52
|
+
"""
|
|
53
|
+
ids_dict: Dict[str, torch.Tensor]
|
|
54
|
+
codebook_dict: Dict[str, torch.Tensor]
|
|
55
|
+
max_sh_degree: int
|
|
56
|
+
|
|
57
|
+
def to(self, device) -> Self:
|
|
58
|
+
"""
|
|
59
|
+
Move the Payload to the specified device.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
device: The target device (e.g., 'cpu', 'cuda', torch.device).
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
A new VQKeyframePayload instance on the target device.
|
|
66
|
+
"""
|
|
67
|
+
return VQKeyframePayload(
|
|
68
|
+
ids_dict={k: v.to(device) for k, v in self.ids_dict.items()},
|
|
69
|
+
codebook_dict={k: v.to(device) for k, v in self.codebook_dict.items()},
|
|
70
|
+
max_sh_degree=self.max_sh_degree,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class VQInterframePayload(Payload):
|
|
76
|
+
"""
|
|
77
|
+
Payload for VQ interframe data.
|
|
78
|
+
|
|
79
|
+
Contains only the changed cluster IDs for subsequent frames.
|
|
80
|
+
The codebook is inherited from the keyframe context.
|
|
81
|
+
|
|
82
|
+
Attributes:
|
|
83
|
+
ids_mask_dict: Dict of boolean tensors indicating which ids changed for each key.
|
|
84
|
+
ids_dict: Dict of only the changed ids values (sparse).
|
|
85
|
+
"""
|
|
86
|
+
ids_mask_dict: Dict[str, torch.Tensor]
|
|
87
|
+
ids_dict: Dict[str, torch.Tensor]
|
|
88
|
+
|
|
89
|
+
def to(self, device) -> Self:
|
|
90
|
+
"""
|
|
91
|
+
Move the Payload to the specified device.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
device: The target device (e.g., 'cpu', 'cuda', torch.device).
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
A new VQInterframePayload instance on the target device.
|
|
98
|
+
"""
|
|
99
|
+
return VQInterframePayload(
|
|
100
|
+
ids_mask_dict={k: v.to(device) for k, v in self.ids_mask_dict.items()},
|
|
101
|
+
ids_dict={k: v.to(device) for k, v in self.ids_dict.items()},
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class VQInterframeCodecInterface(InterframeCodecInterface):
|
|
106
|
+
"""
|
|
107
|
+
VQ-based inter-frame encoding/decoding interface.
|
|
108
|
+
|
|
109
|
+
This interface uses vector quantization to compress Gaussian model attributes.
|
|
110
|
+
The keyframe generates codebooks, and subsequent frames use the same codebooks
|
|
111
|
+
to find nearest cluster IDs.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def decode_interframe(self, payload: VQInterframePayload, prev_context: VQInterframeCodecContext) -> VQInterframeCodecContext:
|
|
115
|
+
"""
|
|
116
|
+
Decode a delta payload to reconstruct the next frame's context.
|
|
117
|
+
|
|
118
|
+
Applies the changed values from the payload to the previous context.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
payload: The delta payload containing changed cluster IDs with masks.
|
|
122
|
+
prev_context: The context of the previous frame (contains codebook).
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
The reconstructed context for the current frame.
|
|
126
|
+
"""
|
|
127
|
+
# Clone previous ids_dict and apply changes
|
|
128
|
+
new_ids_dict = {}
|
|
129
|
+
for key, prev_ids in prev_context.ids_dict.items():
|
|
130
|
+
new_ids = prev_ids.clone()
|
|
131
|
+
mask = payload.ids_mask_dict[key]
|
|
132
|
+
new_ids[mask] = payload.ids_dict[key]
|
|
133
|
+
new_ids_dict[key] = new_ids
|
|
134
|
+
|
|
135
|
+
return VQInterframeCodecContext(
|
|
136
|
+
ids_dict=new_ids_dict,
|
|
137
|
+
codebook_dict=prev_context.codebook_dict,
|
|
138
|
+
max_sh_degree=prev_context.max_sh_degree,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
def encode_interframe(self, prev_context: VQInterframeCodecContext, next_context: VQInterframeCodecContext) -> VQInterframePayload:
|
|
142
|
+
"""
|
|
143
|
+
Encode the difference between two consecutive frames.
|
|
144
|
+
|
|
145
|
+
Compares prev and next contexts to find changed rows and stores only
|
|
146
|
+
the changed values with their corresponding masks.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
prev_context: The context of the previous frame.
|
|
150
|
+
next_context: The context of the next frame.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
A payload containing only changed cluster IDs with masks.
|
|
154
|
+
"""
|
|
155
|
+
# Find changed ids for each key
|
|
156
|
+
ids_mask_dict = {}
|
|
157
|
+
changed_ids_dict = {}
|
|
158
|
+
for key in next_context.ids_dict.keys():
|
|
159
|
+
prev_ids = prev_context.ids_dict[key]
|
|
160
|
+
next_ids = next_context.ids_dict[key]
|
|
161
|
+
# Compare ids, handling different tensor shapes
|
|
162
|
+
if prev_ids.dim() == 1:
|
|
163
|
+
mask = prev_ids != next_ids
|
|
164
|
+
else:
|
|
165
|
+
mask = (prev_ids != next_ids).any(dim=-1)
|
|
166
|
+
ids_mask_dict[key] = mask
|
|
167
|
+
changed_ids_dict[key] = next_ids[mask]
|
|
168
|
+
|
|
169
|
+
return VQInterframePayload(
|
|
170
|
+
ids_mask_dict=ids_mask_dict,
|
|
171
|
+
ids_dict=changed_ids_dict,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
def decode_keyframe(self, payload: VQKeyframePayload) -> VQInterframeCodecContext:
|
|
175
|
+
"""
|
|
176
|
+
Decode a keyframe payload to create initial context.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
payload: The keyframe payload containing full codebook and IDs.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
The context for the first/key frame.
|
|
183
|
+
"""
|
|
184
|
+
return VQInterframeCodecContext(
|
|
185
|
+
ids_dict=payload.ids_dict,
|
|
186
|
+
codebook_dict=payload.codebook_dict,
|
|
187
|
+
max_sh_degree=payload.max_sh_degree,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def decode_keyframe_for_encode(
|
|
191
|
+
self,
|
|
192
|
+
payload: VQKeyframePayload,
|
|
193
|
+
context: VQInterframeCodecContext,
|
|
194
|
+
) -> VQInterframeCodecContext:
|
|
195
|
+
"""
|
|
196
|
+
Decode a keyframe payload during encoding to avoid error accumulation.
|
|
197
|
+
|
|
198
|
+
Since the encode/decode round-trip is lossless for VQ
|
|
199
|
+
(the IDs and codebook are simply copied), we can reuse the original context.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
payload: The keyframe payload that was just encoded.
|
|
203
|
+
context: The original context used for encoding this keyframe.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
The reconstructed context (same as original for this codec).
|
|
207
|
+
"""
|
|
208
|
+
# Round-trip is lossless, reuse original context
|
|
209
|
+
return context
|
|
210
|
+
|
|
211
|
+
def encode_keyframe(self, context: VQInterframeCodecContext) -> VQKeyframePayload:
|
|
212
|
+
"""
|
|
213
|
+
Encode the first frame as a keyframe.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
context: The context of the first frame.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
A payload containing the full codebook and IDs.
|
|
220
|
+
"""
|
|
221
|
+
return VQKeyframePayload(
|
|
222
|
+
ids_dict=context.ids_dict,
|
|
223
|
+
codebook_dict=context.codebook_dict,
|
|
224
|
+
max_sh_degree=context.max_sh_degree,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
def keyframe_to_context(self, frame: GaussianModel, init_config: VQInterframeCodecConfig) -> VQInterframeCodecContext:
|
|
228
|
+
"""
|
|
229
|
+
Convert a keyframe to a VQInterframeCodecContext.
|
|
230
|
+
|
|
231
|
+
Creates a VectorQuantizer and generates codebooks from the frame.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
frame: The GaussianModel frame to convert.
|
|
235
|
+
init_config: Configuration parameters for quantization.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
The corresponding VQInterframeCodecContext representation.
|
|
239
|
+
"""
|
|
240
|
+
quantizer = VectorQuantizer(
|
|
241
|
+
num_clusters=init_config.num_clusters,
|
|
242
|
+
num_clusters_rotation_re=init_config.num_clusters_rotation_re,
|
|
243
|
+
num_clusters_rotation_im=init_config.num_clusters_rotation_im,
|
|
244
|
+
num_clusters_opacity=init_config.num_clusters_opacity,
|
|
245
|
+
num_clusters_scaling=init_config.num_clusters_scaling,
|
|
246
|
+
num_clusters_features_dc=init_config.num_clusters_features_dc,
|
|
247
|
+
num_clusters_features_rest=init_config.num_clusters_features_rest,
|
|
248
|
+
max_sh_degree=init_config.max_sh_degree,
|
|
249
|
+
tol=init_config.tol,
|
|
250
|
+
max_iter=init_config.max_iter,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Generate codebooks and cluster IDs from the keyframe
|
|
254
|
+
ids_dict, codebook_dict = quantizer.quantize(frame, update_codebook=True)
|
|
255
|
+
|
|
256
|
+
return VQInterframeCodecContext(
|
|
257
|
+
ids_dict=ids_dict,
|
|
258
|
+
codebook_dict=codebook_dict,
|
|
259
|
+
max_sh_degree=frame.max_sh_degree,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def interframe_to_context(
|
|
263
|
+
self,
|
|
264
|
+
frame: GaussianModel,
|
|
265
|
+
prev_context: VQInterframeCodecContext,
|
|
266
|
+
) -> VQInterframeCodecContext:
|
|
267
|
+
"""
|
|
268
|
+
Convert a frame to a VQInterframeCodecContext using the previous context's codebook.
|
|
269
|
+
|
|
270
|
+
Uses the codebook from the previous context to find nearest cluster IDs.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
frame: The GaussianModel frame to convert.
|
|
274
|
+
prev_context: The context from the previous frame.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
The corresponding VQInterframeCodecContext representation.
|
|
278
|
+
"""
|
|
279
|
+
# Create a quantizer with the same settings
|
|
280
|
+
quantizer = VectorQuantizer(
|
|
281
|
+
max_sh_degree=prev_context.max_sh_degree,
|
|
282
|
+
)
|
|
283
|
+
# Set the codebook from previous context
|
|
284
|
+
quantizer._codebook_dict = prev_context.codebook_dict
|
|
285
|
+
|
|
286
|
+
# Find nearest cluster IDs using existing codebook
|
|
287
|
+
ids_dict = quantizer.find_nearest_cluster_id(frame, prev_context.codebook_dict)
|
|
288
|
+
|
|
289
|
+
return VQInterframeCodecContext(
|
|
290
|
+
ids_dict=ids_dict,
|
|
291
|
+
codebook_dict=prev_context.codebook_dict,
|
|
292
|
+
max_sh_degree=prev_context.max_sh_degree,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
def context_to_frame(self, context: VQInterframeCodecContext, frame: GaussianModel) -> GaussianModel:
|
|
296
|
+
"""
|
|
297
|
+
Convert a VQInterframeCodecContext back to a GaussianModel frame.
|
|
298
|
+
|
|
299
|
+
Dequantizes the cluster IDs using the codebook to reconstruct attributes.
|
|
300
|
+
Does not modify xyz coordinates.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
context: The VQInterframeCodecContext to convert.
|
|
304
|
+
frame: An empty GaussianModel or one from previous pipeline steps.
|
|
305
|
+
This frame will be modified in-place with the context data.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
The modified GaussianModel with the frame data.
|
|
309
|
+
"""
|
|
310
|
+
# Create a quantizer for dequantization
|
|
311
|
+
quantizer = VectorQuantizer(
|
|
312
|
+
max_sh_degree=context.max_sh_degree,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Dequantize to reconstruct the model (xyz=None means don't modify xyz)
|
|
316
|
+
frame = quantizer.dequantize(
|
|
317
|
+
frame,
|
|
318
|
+
context.ids_dict,
|
|
319
|
+
context.codebook_dict,
|
|
320
|
+
xyz=None,
|
|
321
|
+
replace=True,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
return frame
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Dict, Self
|
|
3
|
+
|
|
4
|
+
import torch
|
|
5
|
+
|
|
6
|
+
from ..payload import Payload
|
|
7
|
+
from .interface import (
|
|
8
|
+
VQInterframeCodecInterface,
|
|
9
|
+
VQInterframeCodecContext,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class VQMergeMaskInterframePayload(Payload):
|
|
15
|
+
"""
|
|
16
|
+
Payload for VQ interframe data with a single merged mask.
|
|
17
|
+
|
|
18
|
+
Contains a single mask that is the OR of all individual attribute masks,
|
|
19
|
+
and all ids arrays have the same length (based on the merged mask).
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
ids_mask: Boolean tensor indicating which positions changed (OR of all masks).
|
|
23
|
+
ids_dict: Dict of changed ids values, all with the same length.
|
|
24
|
+
"""
|
|
25
|
+
ids_mask: torch.Tensor
|
|
26
|
+
ids_dict: Dict[str, torch.Tensor]
|
|
27
|
+
|
|
28
|
+
def to(self, device) -> Self:
|
|
29
|
+
"""
|
|
30
|
+
Move the Payload to the specified device.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
device: The target device (e.g., 'cpu', 'cuda', torch.device).
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
A new VQMergeMaskInterframePayload instance on the target device.
|
|
37
|
+
"""
|
|
38
|
+
return VQMergeMaskInterframePayload(
|
|
39
|
+
ids_mask=self.ids_mask.to(device),
|
|
40
|
+
ids_dict={k: v.to(device) for k, v in self.ids_dict.items()},
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class VQMergeMaskInterframeCodecInterface(VQInterframeCodecInterface):
|
|
45
|
+
"""
|
|
46
|
+
VQ-based inter-frame encoding/decoding interface with merged mask.
|
|
47
|
+
|
|
48
|
+
This interface extends VQInterframeCodecInterface but uses a single merged mask
|
|
49
|
+
(OR of all attribute masks) for the interframe payload. All ids arrays have
|
|
50
|
+
the same length, making serialization and compression more efficient.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def decode_interframe(
|
|
54
|
+
self,
|
|
55
|
+
payload: VQMergeMaskInterframePayload,
|
|
56
|
+
prev_context: VQInterframeCodecContext,
|
|
57
|
+
) -> VQInterframeCodecContext:
|
|
58
|
+
"""
|
|
59
|
+
Decode a delta payload to reconstruct the next frame's context.
|
|
60
|
+
|
|
61
|
+
Applies the changed values from the payload to the previous context
|
|
62
|
+
using the single merged mask.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
payload: The delta payload containing changed cluster IDs with merged mask.
|
|
66
|
+
prev_context: The context of the previous frame (contains codebook).
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
The reconstructed context for the current frame.
|
|
70
|
+
"""
|
|
71
|
+
# Clone previous ids_dict and apply changes using merged mask
|
|
72
|
+
new_ids_dict = {}
|
|
73
|
+
mask = payload.ids_mask
|
|
74
|
+
for key, prev_ids in prev_context.ids_dict.items():
|
|
75
|
+
new_ids = prev_ids.clone()
|
|
76
|
+
new_ids[mask] = payload.ids_dict[key]
|
|
77
|
+
new_ids_dict[key] = new_ids
|
|
78
|
+
|
|
79
|
+
return VQInterframeCodecContext(
|
|
80
|
+
ids_dict=new_ids_dict,
|
|
81
|
+
codebook_dict=prev_context.codebook_dict,
|
|
82
|
+
max_sh_degree=prev_context.max_sh_degree,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def encode_interframe(
|
|
86
|
+
self,
|
|
87
|
+
prev_context: VQInterframeCodecContext,
|
|
88
|
+
next_context: VQInterframeCodecContext,
|
|
89
|
+
) -> VQMergeMaskInterframePayload:
|
|
90
|
+
"""
|
|
91
|
+
Encode the difference between two consecutive frames.
|
|
92
|
+
|
|
93
|
+
Computes a merged mask (OR of all individual masks) and extracts
|
|
94
|
+
changed values for all attributes using this single mask.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
prev_context: The context of the previous frame.
|
|
98
|
+
next_context: The context of the next frame.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
A payload containing changed cluster IDs with a single merged mask.
|
|
102
|
+
"""
|
|
103
|
+
# Compute individual masks and merge them
|
|
104
|
+
merged_mask = None
|
|
105
|
+
for key in next_context.ids_dict.keys():
|
|
106
|
+
prev_ids = prev_context.ids_dict[key]
|
|
107
|
+
next_ids = next_context.ids_dict[key]
|
|
108
|
+
# Compare ids, handling different tensor shapes
|
|
109
|
+
if prev_ids.dim() == 1:
|
|
110
|
+
mask = prev_ids != next_ids
|
|
111
|
+
else:
|
|
112
|
+
mask = (prev_ids != next_ids).any(dim=-1)
|
|
113
|
+
|
|
114
|
+
if merged_mask is None:
|
|
115
|
+
merged_mask = mask
|
|
116
|
+
else:
|
|
117
|
+
merged_mask = merged_mask | mask
|
|
118
|
+
|
|
119
|
+
# Extract changed ids using merged mask for all keys
|
|
120
|
+
changed_ids_dict = {}
|
|
121
|
+
for key, next_ids in next_context.ids_dict.items():
|
|
122
|
+
changed_ids_dict[key] = next_ids[merged_mask]
|
|
123
|
+
|
|
124
|
+
return VQMergeMaskInterframePayload(
|
|
125
|
+
ids_mask=merged_mask,
|
|
126
|
+
ids_dict=changed_ids_dict,
|
|
127
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# TODO: two pass VQ interframe encoder/decoder, produce codebook on the whole 3DGS seq in pass one
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
XYZ coordinate processing utilities for volumetric video compression.
|
|
3
|
+
|
|
4
|
+
This module provides functions for adaptive quantization of 3D point coordinates
|
|
5
|
+
based on local point cloud density.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .quant import XYZQuantConfig
|
|
9
|
+
from .interface import (
|
|
10
|
+
XYZQuantInterframeCodecConfig,
|
|
11
|
+
XYZQuantInterframeCodecContext,
|
|
12
|
+
XYZQuantKeyframePayload,
|
|
13
|
+
XYZQuantInterframePayload,
|
|
14
|
+
XYZQuantInterframeCodecInterface,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
# quant.py
|
|
19
|
+
'XYZQuantConfig',
|
|
20
|
+
# interface.py
|
|
21
|
+
'XYZQuantInterframeCodecConfig',
|
|
22
|
+
'XYZQuantInterframeCodecContext',
|
|
23
|
+
'XYZQuantKeyframePayload',
|
|
24
|
+
'XYZQuantInterframePayload',
|
|
25
|
+
'XYZQuantInterframeCodecInterface',
|
|
26
|
+
]
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dense region scale estimation from nearest neighbor distances.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def compute_dense_scale(
|
|
9
|
+
nn_distances: torch.Tensor,
|
|
10
|
+
quantile: float = 0.05,
|
|
11
|
+
) -> float:
|
|
12
|
+
"""
|
|
13
|
+
Compute the dense region scale from nearest neighbor distances.
|
|
14
|
+
|
|
15
|
+
Takes a low quantile of the NN distances to estimate the characteristic
|
|
16
|
+
scale in the densest regions of the point cloud.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
nn_distances: Nearest neighbor distances, shape (M,).
|
|
20
|
+
quantile: The quantile to use, in range (0, 1). E.g., 0.01 or 0.05 for dense regions.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
The dense scale estimate (d_dense).
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ValueError: If all distances are zero (all points are duplicates).
|
|
27
|
+
"""
|
|
28
|
+
# Filter out zero distances (can occur with duplicate points)
|
|
29
|
+
valid_distances = nn_distances[nn_distances > 0]
|
|
30
|
+
if valid_distances.numel() == 0:
|
|
31
|
+
raise ValueError(
|
|
32
|
+
"All nearest neighbor distances are zero. "
|
|
33
|
+
"This typically means all points are duplicates."
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# torch.quantile requires float
|
|
37
|
+
dense_scale = torch.quantile(valid_distances.float(), quantile).item()
|
|
38
|
+
|
|
39
|
+
return dense_scale
|