pureini 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pureini-0.1.1/PKG-INFO +14 -0
- pureini-0.1.1/README.md +0 -0
- pureini-0.1.1/pyproject.toml +22 -0
- pureini-0.1.1/src/pureini/__init__.py +32 -0
- pureini-0.1.1/src/pureini/decoder.py +156 -0
- pureini-0.1.1/src/pureini/encoder.py +171 -0
- pureini-0.1.1/src/pureini/encoding_utils.py +288 -0
- pureini-0.1.1/src/pureini/header.py +345 -0
- pureini-0.1.1/src/pureini/jit_codec.py +396 -0
- pureini-0.1.1/src/pureini/py.typed +0 -0
- pureini-0.1.1/src/pureini/types.py +111 -0
pureini-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: pureini
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Author: Marko Bausch
|
|
6
|
+
Author-email: Marko Bausch <60338487+mrkbac@users.noreply.github.com>
|
|
7
|
+
Requires-Dist: lz4>=4.4.4
|
|
8
|
+
Requires-Dist: numba>=0.62.1
|
|
9
|
+
Requires-Dist: numpy>=2.2.0
|
|
10
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
11
|
+
Requires-Dist: zstandard>=0.25.0
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
pureini-0.1.1/README.md
ADDED
|
File without changes
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "pureini"
|
|
3
|
+
version = "0.1.1"
|
|
4
|
+
description = "Add your description here"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Marko Bausch", email = "60338487+mrkbac@users.noreply.github.com" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"lz4>=4.4.4",
|
|
12
|
+
"numba>=0.62.1",
|
|
13
|
+
"numpy>=2.2.0",
|
|
14
|
+
"pyyaml>=6.0.3",
|
|
15
|
+
"zstandard>=0.25.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[build-system]
|
|
19
|
+
requires = ["uv_build>=0.8.9,<0.9.0"]
|
|
20
|
+
build-backend = "uv_build"
|
|
21
|
+
|
|
22
|
+
[dependency-groups]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pureini - Pure Python implementation of Cloudini point cloud compression.
|
|
3
|
+
|
|
4
|
+
Copyright 2025 Davide Faconti
|
|
5
|
+
Licensed under the Apache License, Version 2.0
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .decoder import PointcloudDecoder
|
|
9
|
+
from .encoder import PointcloudEncoder
|
|
10
|
+
from .header import HeaderEncoding, decode_header, encode_header
|
|
11
|
+
from .types import (
|
|
12
|
+
CompressionOption,
|
|
13
|
+
EncodingInfo,
|
|
14
|
+
EncodingOptions,
|
|
15
|
+
FieldType,
|
|
16
|
+
PointField,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__version__ = "0.1.0"
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"CompressionOption",
|
|
23
|
+
"EncodingInfo",
|
|
24
|
+
"EncodingOptions",
|
|
25
|
+
"FieldType",
|
|
26
|
+
"HeaderEncoding",
|
|
27
|
+
"PointField",
|
|
28
|
+
"PointcloudDecoder",
|
|
29
|
+
"PointcloudEncoder",
|
|
30
|
+
"decode_header",
|
|
31
|
+
"encode_header",
|
|
32
|
+
]
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Point cloud decoder for Pureini compression.
|
|
3
|
+
|
|
4
|
+
Copyright 2025 Davide Faconti
|
|
5
|
+
Licensed under the Apache License, Version 2.0
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import lz4.block
|
|
9
|
+
import numpy as np
|
|
10
|
+
import zstandard as zstd
|
|
11
|
+
|
|
12
|
+
from .encoding_utils import BufferView, build_field_metadata, decode
|
|
13
|
+
from .header import decode_header
|
|
14
|
+
from .jit_codec import decode_chunk_jit
|
|
15
|
+
from .types import (
|
|
16
|
+
POINTS_PER_CHUNK,
|
|
17
|
+
CompressionOption,
|
|
18
|
+
EncodingInfo,
|
|
19
|
+
EncodingOptions,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PointcloudDecoder:
|
|
24
|
+
"""
|
|
25
|
+
Point cloud decoder for two-stage decompression.
|
|
26
|
+
|
|
27
|
+
Stage 1: General-purpose decompression (LZ4 or ZSTD)
|
|
28
|
+
Stage 2: Field-specific decoding (delta, dequantization, etc.) via Numba JIT
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self) -> None:
|
|
32
|
+
"""Initialize the decoder."""
|
|
33
|
+
|
|
34
|
+
def decode(self, compressed_data: bytes) -> tuple[bytes, EncodingInfo]:
|
|
35
|
+
"""
|
|
36
|
+
Decode compressed point cloud data.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
compressed_data: Compressed data including header
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Tuple of (decompressed_data, encoding_info)
|
|
43
|
+
"""
|
|
44
|
+
# Decode header and get bytes consumed
|
|
45
|
+
info, header_size = decode_header(bytes(compressed_data))
|
|
46
|
+
|
|
47
|
+
# Create view starting after header
|
|
48
|
+
input_view = BufferView(compressed_data)
|
|
49
|
+
input_view.trim_front(header_size)
|
|
50
|
+
|
|
51
|
+
# Build field metadata arrays for JIT
|
|
52
|
+
field_offsets, field_types, field_resolutions = build_field_metadata(info)
|
|
53
|
+
|
|
54
|
+
# Allocate output buffer
|
|
55
|
+
total_points = info.width * info.height
|
|
56
|
+
output_size = total_points * info.point_step
|
|
57
|
+
output = np.zeros(output_size, dtype=np.uint8)
|
|
58
|
+
output_offset = 0
|
|
59
|
+
|
|
60
|
+
# Decode chunks (version 3+)
|
|
61
|
+
if info.version >= 3:
|
|
62
|
+
while not input_view.empty():
|
|
63
|
+
# Read chunk size
|
|
64
|
+
chunk_size = int(decode(input_view, "I")) # uint32
|
|
65
|
+
|
|
66
|
+
if chunk_size > input_view.size():
|
|
67
|
+
raise RuntimeError("Invalid chunk size found while decoding")
|
|
68
|
+
|
|
69
|
+
# Extract chunk
|
|
70
|
+
chunk_data = input_view.read_bytes(chunk_size)
|
|
71
|
+
|
|
72
|
+
# Decompress chunk
|
|
73
|
+
decompressed = self._decompress_chunk(info, chunk_data)
|
|
74
|
+
|
|
75
|
+
# Fast path for NONE encoding: copy raw bytes without transformation
|
|
76
|
+
if info.encoding_opt == EncodingOptions.NONE:
|
|
77
|
+
# Copy raw bytes directly to output
|
|
78
|
+
chunk_bytes = len(decompressed)
|
|
79
|
+
points_decoded = chunk_bytes // info.point_step
|
|
80
|
+
output[output_offset : output_offset + chunk_bytes] = np.frombuffer(
|
|
81
|
+
decompressed, dtype=np.uint8
|
|
82
|
+
)
|
|
83
|
+
output_offset += chunk_bytes
|
|
84
|
+
else:
|
|
85
|
+
# Decode chunk using JIT
|
|
86
|
+
# Calculate max points we can decode into remaining output space
|
|
87
|
+
remaining_output_bytes = output_size - output_offset
|
|
88
|
+
max_points_to_decode = remaining_output_bytes // info.point_step
|
|
89
|
+
|
|
90
|
+
output_chunk = output[output_offset:]
|
|
91
|
+
points_decoded = decode_chunk_jit(
|
|
92
|
+
memoryview(decompressed),
|
|
93
|
+
max_points_to_decode,
|
|
94
|
+
info.point_step,
|
|
95
|
+
field_offsets,
|
|
96
|
+
field_types,
|
|
97
|
+
field_resolutions,
|
|
98
|
+
output_chunk,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
output_offset += points_decoded * info.point_step
|
|
102
|
+
else:
|
|
103
|
+
# Version 2: single chunk
|
|
104
|
+
decompressed = self._decompress_chunk(info, bytes(input_view.data))
|
|
105
|
+
|
|
106
|
+
# Fast path for NONE encoding: copy raw bytes without transformation
|
|
107
|
+
if info.encoding_opt == EncodingOptions.NONE:
|
|
108
|
+
chunk_bytes = len(decompressed)
|
|
109
|
+
output[:chunk_bytes] = np.frombuffer(decompressed, dtype=np.uint8)
|
|
110
|
+
output_offset = chunk_bytes
|
|
111
|
+
else:
|
|
112
|
+
max_points_to_decode = total_points
|
|
113
|
+
points_decoded = decode_chunk_jit(
|
|
114
|
+
memoryview(decompressed),
|
|
115
|
+
max_points_to_decode,
|
|
116
|
+
info.point_step,
|
|
117
|
+
field_offsets,
|
|
118
|
+
field_types,
|
|
119
|
+
field_resolutions,
|
|
120
|
+
output,
|
|
121
|
+
)
|
|
122
|
+
output_offset = points_decoded * info.point_step
|
|
123
|
+
|
|
124
|
+
return bytes(output[:output_offset]), info
|
|
125
|
+
|
|
126
|
+
def _decompress_chunk(self, info: EncodingInfo, chunk_data: bytes) -> bytes:
|
|
127
|
+
"""
|
|
128
|
+
Decompress a single chunk.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
info: Encoding information
|
|
132
|
+
chunk_data: Compressed chunk data
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Decompressed chunk data
|
|
136
|
+
"""
|
|
137
|
+
if info.compression_opt == CompressionOption.LZ4:
|
|
138
|
+
try:
|
|
139
|
+
max_size = POINTS_PER_CHUNK * info.point_step
|
|
140
|
+
return lz4.block.decompress(chunk_data, uncompressed_size=max_size)
|
|
141
|
+
except (ValueError, RuntimeError) as e:
|
|
142
|
+
raise RuntimeError(f"LZ4 decompression failed: {e}") from e
|
|
143
|
+
|
|
144
|
+
elif info.compression_opt == CompressionOption.ZSTD:
|
|
145
|
+
try:
|
|
146
|
+
dctx = zstd.ZstdDecompressor()
|
|
147
|
+
return dctx.decompress(chunk_data)
|
|
148
|
+
except (ValueError, RuntimeError, zstd.ZstdError) as e:
|
|
149
|
+
raise RuntimeError(f"ZSTD decompression failed: {e}") from e
|
|
150
|
+
|
|
151
|
+
elif info.compression_opt == CompressionOption.NONE:
|
|
152
|
+
# No decompression needed
|
|
153
|
+
return chunk_data
|
|
154
|
+
|
|
155
|
+
else:
|
|
156
|
+
raise RuntimeError(f"Unknown compression option: {info.compression_opt}")
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Point cloud encoder for Pureini compression.
|
|
3
|
+
|
|
4
|
+
Copyright 2025 Davide Faconti
|
|
5
|
+
Licensed under the Apache License, Version 2.0
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import struct
|
|
9
|
+
|
|
10
|
+
import lz4.block
|
|
11
|
+
import numpy as np
|
|
12
|
+
import zstandard as zstd
|
|
13
|
+
|
|
14
|
+
from .encoding_utils import BufferView, build_field_metadata
|
|
15
|
+
from .header import HeaderEncoding, encode_header
|
|
16
|
+
from .jit_codec import encode_chunk_jit
|
|
17
|
+
from .types import POINTS_PER_CHUNK, CompressionOption, EncodingInfo, EncodingOptions
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _zstd_compress_bound(src_size: int) -> int:
|
|
21
|
+
"""
|
|
22
|
+
Calculate maximum compressed size for zstd compression.
|
|
23
|
+
|
|
24
|
+
Implements the ZSTD_compressBound formula from the C API:
|
|
25
|
+
srcSize + (srcSize >> 8) + margin_for_small_inputs
|
|
26
|
+
|
|
27
|
+
The Python zstandard library doesn't expose ZSTD_compressBound,
|
|
28
|
+
so we implement it based on the official zstd specification.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
src_size: Size of uncompressed data in bytes
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Maximum size needed for compressed output buffer
|
|
35
|
+
"""
|
|
36
|
+
base = src_size + (src_size >> 8)
|
|
37
|
+
# Add extra margin for inputs smaller than 128 KB
|
|
38
|
+
if src_size < (128 << 10): # 128 KB = 131072 bytes
|
|
39
|
+
margin = ((128 << 10) - src_size) >> 11
|
|
40
|
+
return base + margin
|
|
41
|
+
return base
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class PointcloudEncoder:
|
|
45
|
+
"""
|
|
46
|
+
Point cloud encoder using two-stage compression.
|
|
47
|
+
|
|
48
|
+
Stage 1: Field-specific encoding (delta, quantization, etc.) via Numba JIT
|
|
49
|
+
Stage 2: General-purpose compression (LZ4 or ZSTD)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, info: EncodingInfo) -> None:
|
|
53
|
+
"""
|
|
54
|
+
Initialize the encoder with encoding configuration.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
info: Encoding configuration
|
|
58
|
+
"""
|
|
59
|
+
self.info = info
|
|
60
|
+
self.header = encode_header(info, HeaderEncoding.YAML)
|
|
61
|
+
|
|
62
|
+
# Build field metadata arrays for JIT
|
|
63
|
+
self.field_offsets, self.field_types, self.field_resolutions = build_field_metadata(info)
|
|
64
|
+
|
|
65
|
+
# Reuse compressor across chunks
|
|
66
|
+
if info.compression_opt == CompressionOption.ZSTD:
|
|
67
|
+
self._zstd_cctx = zstd.ZstdCompressor(level=1)
|
|
68
|
+
|
|
69
|
+
def encode(self, cloud_data: bytes) -> bytes:
|
|
70
|
+
"""
|
|
71
|
+
Encode point cloud data.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
cloud_data: Raw point cloud bytes
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Compressed point cloud data with header
|
|
78
|
+
"""
|
|
79
|
+
# Convert to numpy array for JIT
|
|
80
|
+
point_data = np.frombuffer(cloud_data, dtype=np.uint8)
|
|
81
|
+
|
|
82
|
+
# Calculate points and chunks
|
|
83
|
+
points_count = len(cloud_data) // self.info.point_step
|
|
84
|
+
chunks_count = (points_count + POINTS_PER_CHUNK - 1) // POINTS_PER_CHUNK
|
|
85
|
+
|
|
86
|
+
# Worst-case compression bound
|
|
87
|
+
if self.info.compression_opt == CompressionOption.ZSTD:
|
|
88
|
+
max_compressed = _zstd_compress_bound(len(cloud_data))
|
|
89
|
+
else:
|
|
90
|
+
max_compressed = len(cloud_data) * 2 # Conservative estimate for LZ4
|
|
91
|
+
|
|
92
|
+
# Allocate output buffer: header + compressed data + chunk headers (4 bytes each)
|
|
93
|
+
output_size = len(self.header) + max_compressed + (4 * chunks_count)
|
|
94
|
+
output = bytearray(output_size)
|
|
95
|
+
output_view = BufferView(output)
|
|
96
|
+
|
|
97
|
+
# Write header
|
|
98
|
+
output_view.write_bytes(self.header)
|
|
99
|
+
|
|
100
|
+
# Encode and compress chunks
|
|
101
|
+
self._encode_chunks(point_data, points_count, output_view)
|
|
102
|
+
|
|
103
|
+
# Trim to actual size
|
|
104
|
+
actual_size = output_size - output_view.size()
|
|
105
|
+
return bytes(output[:actual_size])
|
|
106
|
+
|
|
107
|
+
def _encode_chunks(
|
|
108
|
+
self, point_data: np.ndarray, points_count: int, output_view: BufferView
|
|
109
|
+
) -> None:
|
|
110
|
+
"""
|
|
111
|
+
Encode point cloud data in chunks using JIT.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
point_data: Raw point cloud data as numpy array
|
|
115
|
+
points_count: Total number of points
|
|
116
|
+
output_view: Output buffer for compressed chunks
|
|
117
|
+
"""
|
|
118
|
+
# Allocate temporary buffer for encoded chunk (before compression)
|
|
119
|
+
chunk_buffer = np.zeros(POINTS_PER_CHUNK * self.info.point_step, dtype=np.uint8)
|
|
120
|
+
|
|
121
|
+
chunk_start = 0
|
|
122
|
+
while chunk_start < points_count:
|
|
123
|
+
# Determine chunk size
|
|
124
|
+
chunk_points = min(POINTS_PER_CHUNK, points_count - chunk_start)
|
|
125
|
+
|
|
126
|
+
# Fast path for NONE encoding: copy raw bytes without transformation
|
|
127
|
+
if self.info.encoding_opt == EncodingOptions.NONE:
|
|
128
|
+
# Calculate byte offsets
|
|
129
|
+
byte_start = chunk_start * self.info.point_step
|
|
130
|
+
byte_end = byte_start + (chunk_points * self.info.point_step)
|
|
131
|
+
chunk_data = bytes(point_data[byte_start:byte_end])
|
|
132
|
+
else:
|
|
133
|
+
# Call JIT encoder for this chunk
|
|
134
|
+
bytes_written = encode_chunk_jit(
|
|
135
|
+
point_data,
|
|
136
|
+
chunk_start,
|
|
137
|
+
chunk_points,
|
|
138
|
+
self.info.point_step,
|
|
139
|
+
self.field_offsets,
|
|
140
|
+
self.field_types,
|
|
141
|
+
self.field_resolutions,
|
|
142
|
+
chunk_buffer,
|
|
143
|
+
)
|
|
144
|
+
chunk_data = bytes(chunk_buffer[:bytes_written])
|
|
145
|
+
|
|
146
|
+
# Compress and write chunk
|
|
147
|
+
self._write_chunk(chunk_data, output_view)
|
|
148
|
+
|
|
149
|
+
chunk_start += chunk_points
|
|
150
|
+
|
|
151
|
+
def _write_chunk(self, chunk_data: bytes, output_view: BufferView) -> None:
|
|
152
|
+
"""
|
|
153
|
+
Compress and write a chunk to output.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
chunk_data: Encoded chunk data
|
|
157
|
+
output_view: Output buffer
|
|
158
|
+
"""
|
|
159
|
+
if self.info.compression_opt == CompressionOption.LZ4:
|
|
160
|
+
payload = lz4.block.compress(chunk_data, store_size=False)
|
|
161
|
+
elif self.info.compression_opt == CompressionOption.ZSTD:
|
|
162
|
+
payload = self._zstd_cctx.compress(chunk_data)
|
|
163
|
+
elif self.info.compression_opt == CompressionOption.NONE:
|
|
164
|
+
payload = chunk_data
|
|
165
|
+
else:
|
|
166
|
+
raise RuntimeError(f"Unknown compression option: {self.info.compression_opt}")
|
|
167
|
+
|
|
168
|
+
# Write uint32 size + payload
|
|
169
|
+
struct.pack_into("<I", output_view.data, 0, len(payload))
|
|
170
|
+
output_view.trim_front(4)
|
|
171
|
+
output_view.write_bytes(payload)
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Encoding utilities for Pureini point cloud compression.
|
|
3
|
+
|
|
4
|
+
Copyright 2025 Davide Faconti
|
|
5
|
+
Licensed under the Apache License, Version 2.0
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import struct
|
|
9
|
+
|
|
10
|
+
import numba as nb
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from .types import EncodingInfo, EncodingOptions, FieldType
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BufferView:
|
|
17
|
+
"""
|
|
18
|
+
A view into a byte buffer that can be consumed as data is written/read.
|
|
19
|
+
Accepts bytes, bytearray, or memoryview. Write operations require a
|
|
20
|
+
writable underlying buffer (bytearray or writable memoryview).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, data: bytes | bytearray | memoryview) -> None:
|
|
24
|
+
if isinstance(data, (bytes, bytearray)):
|
|
25
|
+
self._view = memoryview(data)
|
|
26
|
+
else:
|
|
27
|
+
self._view = data
|
|
28
|
+
self._offset = 0
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def data(self) -> memoryview:
|
|
32
|
+
"""Get the current view of remaining data."""
|
|
33
|
+
return self._view[self._offset :]
|
|
34
|
+
|
|
35
|
+
def size(self) -> int:
|
|
36
|
+
"""Get the size of remaining data."""
|
|
37
|
+
return len(self._view) - self._offset
|
|
38
|
+
|
|
39
|
+
def empty(self) -> bool:
|
|
40
|
+
"""Check if buffer is empty."""
|
|
41
|
+
return self.size() == 0
|
|
42
|
+
|
|
43
|
+
def trim_front(self, n: int) -> None:
|
|
44
|
+
if n > self.size():
|
|
45
|
+
raise RuntimeError(f"Cannot trim {n} bytes, only {self.size()} available")
|
|
46
|
+
self._offset += n
|
|
47
|
+
|
|
48
|
+
def write_bytes(self, data: bytes | bytearray) -> None:
|
|
49
|
+
n = len(data)
|
|
50
|
+
if n > self.size():
|
|
51
|
+
raise RuntimeError(f"Cannot write {n} bytes, only {self.size()} available")
|
|
52
|
+
self.data[:n] = data
|
|
53
|
+
self.trim_front(n)
|
|
54
|
+
|
|
55
|
+
def read_bytes(self, n: int) -> bytes:
|
|
56
|
+
if n > self.size():
|
|
57
|
+
raise RuntimeError(f"Cannot read {n} bytes, only {self.size()} available")
|
|
58
|
+
result = bytes(self.data[:n])
|
|
59
|
+
self.trim_front(n)
|
|
60
|
+
return result
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@nb.njit(cache=True, fastmath=True)
|
|
64
|
+
def encode_varint64_to_buffer(value: int, buffer: memoryview, offset: int = 0) -> int:
|
|
65
|
+
"""
|
|
66
|
+
Encode a signed 64-bit integer directly to a buffer (zero-copy).
|
|
67
|
+
Value 0 is reserved for NaN, so all values are shifted by +1.
|
|
68
|
+
|
|
69
|
+
JIT-compiled with Numba for maximum performance.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
value: The signed 64-bit integer to encode
|
|
73
|
+
buffer: The target buffer (memoryview)
|
|
74
|
+
offset: Starting offset in the buffer
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Number of bytes written
|
|
78
|
+
"""
|
|
79
|
+
# Zigzag encoding
|
|
80
|
+
val = value << 1 if value >= 0 else ((-value - 1) << 1) | 1
|
|
81
|
+
|
|
82
|
+
# Reserve value 0 for NaN
|
|
83
|
+
val += 1
|
|
84
|
+
|
|
85
|
+
# Varint encoding - write directly to buffer
|
|
86
|
+
ptr = offset
|
|
87
|
+
while val > 0x7F:
|
|
88
|
+
buffer[ptr] = (val & 0x7F) | 0x80
|
|
89
|
+
val >>= 7
|
|
90
|
+
ptr += 1
|
|
91
|
+
buffer[ptr] = val & 0xFF
|
|
92
|
+
ptr += 1
|
|
93
|
+
|
|
94
|
+
return ptr - offset
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def encode_varint64(value: int) -> bytes:
|
|
98
|
+
"""
|
|
99
|
+
Encode a signed 64-bit integer using zigzag encoding + varint.
|
|
100
|
+
Value 0 is reserved for NaN, so all values are shifted by +1.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
value: The signed 64-bit integer to encode
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Varint-encoded bytes
|
|
107
|
+
|
|
108
|
+
Note: This creates a new bytearray. For better performance,
|
|
109
|
+
use encode_varint64_to_buffer() to write directly to a buffer.
|
|
110
|
+
"""
|
|
111
|
+
# Zigzag encoding: (value << 1) ^ (value >> 63)
|
|
112
|
+
# For Python, we need to handle the sign extension properly
|
|
113
|
+
val = value << 1 if value >= 0 else ((-value - 1) << 1) | 1
|
|
114
|
+
|
|
115
|
+
# Reserve value 0 for NaN
|
|
116
|
+
val += 1
|
|
117
|
+
|
|
118
|
+
# Varint encoding
|
|
119
|
+
result = bytearray()
|
|
120
|
+
while val > 0x7F:
|
|
121
|
+
result.append((val & 0x7F) | 0x80)
|
|
122
|
+
val >>= 7
|
|
123
|
+
result.append(val & 0xFF)
|
|
124
|
+
|
|
125
|
+
return bytes(result)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@nb.njit(cache=True, fastmath=True)
|
|
129
|
+
def decode_varint(data: bytes | memoryview, offset: int = 0) -> tuple[int, int]:
|
|
130
|
+
"""
|
|
131
|
+
Decode a zigzag-encoded varint from bytes.
|
|
132
|
+
Handles the NaN reservation (value 0 is NaN).
|
|
133
|
+
|
|
134
|
+
JIT-compiled with Numba for maximum performance.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
data: The byte buffer
|
|
138
|
+
offset: Starting offset in the buffer
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Tuple of (decoded_value, bytes_consumed)
|
|
142
|
+
"""
|
|
143
|
+
uval = 0
|
|
144
|
+
shift = 0
|
|
145
|
+
ptr = offset
|
|
146
|
+
|
|
147
|
+
while True:
|
|
148
|
+
if ptr >= len(data):
|
|
149
|
+
raise RuntimeError("Incomplete varint in buffer")
|
|
150
|
+
|
|
151
|
+
byte = data[ptr]
|
|
152
|
+
ptr += 1
|
|
153
|
+
uval |= (byte & 0x7F) << shift
|
|
154
|
+
shift += 7
|
|
155
|
+
|
|
156
|
+
if (byte & 0x80) == 0:
|
|
157
|
+
break
|
|
158
|
+
|
|
159
|
+
# Value 0 is reserved for NaN
|
|
160
|
+
uval -= 1
|
|
161
|
+
|
|
162
|
+
# Zigzag decoding (branchless)
|
|
163
|
+
val = (uval >> 1) ^ -(uval & 1)
|
|
164
|
+
|
|
165
|
+
return val, ptr - offset
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def encode(value: float, buff: BufferView, format_char: str) -> None:
|
|
169
|
+
"""
|
|
170
|
+
Encode a primitive value into the buffer using struct.pack.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
value: The value to encode
|
|
174
|
+
buff: The buffer view to write to
|
|
175
|
+
format_char: struct format character (e.g., 'f' for float, 'I' for uint32)
|
|
176
|
+
"""
|
|
177
|
+
data = struct.pack(f"<{format_char}", value) # Little-endian
|
|
178
|
+
buff.write_bytes(data)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def encode_string(s: str, buff: BufferView) -> None:
|
|
182
|
+
"""
|
|
183
|
+
Encode a string as uint16 length + UTF-8 bytes.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
s: The string to encode
|
|
187
|
+
buff: The buffer view to write to
|
|
188
|
+
"""
|
|
189
|
+
encoded = s.encode("utf-8")
|
|
190
|
+
length = len(encoded)
|
|
191
|
+
if length > 65535:
|
|
192
|
+
raise ValueError(f"String too long: {length} bytes (max 65535)")
|
|
193
|
+
|
|
194
|
+
# Write length as uint16
|
|
195
|
+
encode(length, buff, "H")
|
|
196
|
+
# Write string bytes
|
|
197
|
+
buff.write_bytes(encoded)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def decode(buff: BufferView, format_char: str) -> int | float:
|
|
201
|
+
"""
|
|
202
|
+
Decode a primitive value from the buffer using struct.unpack.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
buff: The buffer view to read from
|
|
206
|
+
format_char: struct format character
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
The decoded value
|
|
210
|
+
"""
|
|
211
|
+
size = struct.calcsize(f"<{format_char}")
|
|
212
|
+
data = buff.read_bytes(size)
|
|
213
|
+
return struct.unpack(f"<{format_char}", data)[0]
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def decode_string(buff: BufferView) -> str:
|
|
217
|
+
"""
|
|
218
|
+
Decode a string (uint16 length + UTF-8 bytes).
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
buff: The buffer view to read from
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
The decoded string
|
|
225
|
+
"""
|
|
226
|
+
length = int(decode(buff, "H"))
|
|
227
|
+
encoded = buff.read_bytes(length)
|
|
228
|
+
return encoded.decode("utf-8")
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def to_int64(data: bytes | bytearray | memoryview, offset: int, dtype: str) -> int:
|
|
232
|
+
"""
|
|
233
|
+
Read a value from bytes and convert to int64.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
data: The byte buffer (bytes, bytearray, or memoryview)
|
|
237
|
+
offset: Offset in the buffer
|
|
238
|
+
dtype: Data type ('b', 'B', 'h', 'H', 'i', 'I', 'q', 'Q')
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
The value as int64
|
|
242
|
+
"""
|
|
243
|
+
value = struct.unpack_from(f"<{dtype}", data, offset)[0]
|
|
244
|
+
return int(value)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def build_field_metadata(
|
|
248
|
+
info: EncodingInfo,
|
|
249
|
+
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
250
|
+
"""
|
|
251
|
+
Build numpy arrays for field metadata to pass to JIT functions.
|
|
252
|
+
|
|
253
|
+
Creates three arrays:
|
|
254
|
+
- field_offsets: byte offset of each field
|
|
255
|
+
- field_types: type code for each field
|
|
256
|
+
- field_resolutions: resolution for each field
|
|
257
|
+
|
|
258
|
+
Resolution sentinel values:
|
|
259
|
+
> 0.0 -> lossy quantize+delta+varint
|
|
260
|
+
== 0.0 -> XOR lossless (FLOAT64 only)
|
|
261
|
+
-1.0 -> raw 4-byte copy (FLOAT32 without resolution)
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
info: Encoding configuration
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Tuple of (field_offsets, field_types, field_resolutions)
|
|
268
|
+
"""
|
|
269
|
+
num_fields = len(info.fields)
|
|
270
|
+
|
|
271
|
+
field_offsets = np.zeros(num_fields, dtype=np.int32)
|
|
272
|
+
field_types = np.zeros(num_fields, dtype=np.int32)
|
|
273
|
+
field_resolutions = np.zeros(num_fields, dtype=np.float64)
|
|
274
|
+
|
|
275
|
+
for idx, field in enumerate(info.fields):
|
|
276
|
+
field_offsets[idx] = field.offset
|
|
277
|
+
field_types[idx] = int(field.type)
|
|
278
|
+
|
|
279
|
+
if info.encoding_opt == EncodingOptions.LOSSY and field.resolution is not None:
|
|
280
|
+
field_resolutions[idx] = field.resolution
|
|
281
|
+
elif field.type == FieldType.FLOAT64:
|
|
282
|
+
field_resolutions[idx] = 0.0
|
|
283
|
+
elif field.type == FieldType.FLOAT32:
|
|
284
|
+
field_resolutions[idx] = -1.0
|
|
285
|
+
else:
|
|
286
|
+
field_resolutions[idx] = 0.0
|
|
287
|
+
|
|
288
|
+
return field_offsets, field_types, field_resolutions
|