numcodecs 0.16.0__cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numcodecs might be problematic. Click here for more details.
- numcodecs/__init__.py +146 -0
- numcodecs/_shuffle.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/abc.py +127 -0
- numcodecs/astype.py +72 -0
- numcodecs/base64.py +26 -0
- numcodecs/bitround.py +80 -0
- numcodecs/blosc.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/bz2.py +45 -0
- numcodecs/categorize.py +98 -0
- numcodecs/checksum32.py +183 -0
- numcodecs/compat.py +206 -0
- numcodecs/compat_ext.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/delta.py +94 -0
- numcodecs/errors.py +26 -0
- numcodecs/fixedscaleoffset.py +130 -0
- numcodecs/fletcher32.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/gzip.py +50 -0
- numcodecs/jenkins.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/json.py +107 -0
- numcodecs/lz4.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/lzma.py +72 -0
- numcodecs/msgpacks.py +86 -0
- numcodecs/ndarray_like.py +65 -0
- numcodecs/packbits.py +82 -0
- numcodecs/pcodec.py +118 -0
- numcodecs/pickles.py +55 -0
- numcodecs/quantize.py +98 -0
- numcodecs/registry.py +74 -0
- numcodecs/shuffle.py +61 -0
- numcodecs/tests/__init__.py +3 -0
- numcodecs/tests/common.py +285 -0
- numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
- numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
- numcodecs/tests/test_astype.py +74 -0
- numcodecs/tests/test_base64.py +81 -0
- numcodecs/tests/test_bitround.py +81 -0
- numcodecs/tests/test_blosc.py +284 -0
- numcodecs/tests/test_bz2.py +66 -0
- numcodecs/tests/test_categorize.py +87 -0
- numcodecs/tests/test_checksum32.py +154 -0
- numcodecs/tests/test_compat.py +111 -0
- numcodecs/tests/test_delta.py +61 -0
- numcodecs/tests/test_entrypoints.py +24 -0
- numcodecs/tests/test_entrypoints_backport.py +36 -0
- numcodecs/tests/test_fixedscaleoffset.py +77 -0
- numcodecs/tests/test_fletcher32.py +56 -0
- numcodecs/tests/test_gzip.py +110 -0
- numcodecs/tests/test_jenkins.py +150 -0
- numcodecs/tests/test_json.py +85 -0
- numcodecs/tests/test_lz4.py +83 -0
- numcodecs/tests/test_lzma.py +94 -0
- numcodecs/tests/test_msgpacks.py +126 -0
- numcodecs/tests/test_ndarray_like.py +48 -0
- numcodecs/tests/test_packbits.py +39 -0
- numcodecs/tests/test_pcodec.py +90 -0
- numcodecs/tests/test_pickles.py +61 -0
- numcodecs/tests/test_quantize.py +76 -0
- numcodecs/tests/test_registry.py +43 -0
- numcodecs/tests/test_shuffle.py +166 -0
- numcodecs/tests/test_vlen_array.py +97 -0
- numcodecs/tests/test_vlen_bytes.py +97 -0
- numcodecs/tests/test_vlen_utf8.py +91 -0
- numcodecs/tests/test_zarr3.py +279 -0
- numcodecs/tests/test_zarr3_import.py +13 -0
- numcodecs/tests/test_zfpy.py +104 -0
- numcodecs/tests/test_zlib.py +94 -0
- numcodecs/tests/test_zstd.py +92 -0
- numcodecs/version.py +21 -0
- numcodecs/vlen.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/zarr3.py +401 -0
- numcodecs/zfpy.py +113 -0
- numcodecs/zlib.py +42 -0
- numcodecs/zstd.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs-0.16.0.dist-info/METADATA +66 -0
- numcodecs-0.16.0.dist-info/RECORD +79 -0
- numcodecs-0.16.0.dist-info/WHEEL +6 -0
- numcodecs-0.16.0.dist-info/entry_points.txt +22 -0
- numcodecs-0.16.0.dist-info/licenses/LICENSE.txt +21 -0
- numcodecs-0.16.0.dist-info/top_level.txt +1 -0
numcodecs/checksum32.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import struct
|
|
3
|
+
import zlib
|
|
4
|
+
from contextlib import suppress
|
|
5
|
+
from types import ModuleType
|
|
6
|
+
from typing import Literal, Optional
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from typing_extensions import Buffer
|
|
10
|
+
|
|
11
|
+
from .abc import Codec
|
|
12
|
+
from .compat import ensure_contiguous_ndarray, ndarray_copy
|
|
13
|
+
from .jenkins import jenkins_lookup3
|
|
14
|
+
|
|
15
|
+
_crc32c: Optional[ModuleType] = None
|
|
16
|
+
with suppress(ImportError):
|
|
17
|
+
import crc32c as _crc32c # type: ignore[no-redef, unused-ignore]
|
|
18
|
+
|
|
19
|
+
CHECKSUM_LOCATION = Literal['start', 'end']
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Checksum32(Codec, abc.ABC):
|
|
23
|
+
# override in sub-class
|
|
24
|
+
location: CHECKSUM_LOCATION = 'start'
|
|
25
|
+
|
|
26
|
+
def __init__(self, location: CHECKSUM_LOCATION | None = None):
|
|
27
|
+
if location is not None:
|
|
28
|
+
self.location = location
|
|
29
|
+
if self.location not in ['start', 'end']:
|
|
30
|
+
raise ValueError(f"Invalid checksum location: {self.location}")
|
|
31
|
+
|
|
32
|
+
def encode(self, buf):
|
|
33
|
+
arr = ensure_contiguous_ndarray(buf).view('u1')
|
|
34
|
+
checksum = self.checksum(arr) & 0xFFFFFFFF
|
|
35
|
+
enc = np.empty(arr.nbytes + 4, dtype='u1')
|
|
36
|
+
if self.location == 'start':
|
|
37
|
+
checksum_view = enc[:4]
|
|
38
|
+
payload_view = enc[4:]
|
|
39
|
+
else:
|
|
40
|
+
checksum_view = enc[-4:]
|
|
41
|
+
payload_view = enc[:-4]
|
|
42
|
+
checksum_view.view('<u4')[0] = checksum
|
|
43
|
+
ndarray_copy(arr, payload_view)
|
|
44
|
+
return enc
|
|
45
|
+
|
|
46
|
+
def decode(self, buf, out=None):
|
|
47
|
+
if len(buf) < 4:
|
|
48
|
+
raise ValueError("Input buffer is too short to contain a 32-bit checksum.")
|
|
49
|
+
if out is not None:
|
|
50
|
+
ensure_contiguous_ndarray(out) # check that out is a valid ndarray
|
|
51
|
+
|
|
52
|
+
arr = ensure_contiguous_ndarray(buf).view('u1')
|
|
53
|
+
if self.location == 'start':
|
|
54
|
+
checksum_view = arr[:4]
|
|
55
|
+
payload_view = arr[4:]
|
|
56
|
+
else:
|
|
57
|
+
checksum_view = arr[-4:]
|
|
58
|
+
payload_view = arr[:-4]
|
|
59
|
+
expect = checksum_view.view('<u4')[0]
|
|
60
|
+
checksum = self.checksum(payload_view) & 0xFFFFFFFF
|
|
61
|
+
if expect != checksum:
|
|
62
|
+
raise RuntimeError(
|
|
63
|
+
f"Stored and computed {self.codec_id} checksum do not match. Stored: {expect}. Computed: {checksum}."
|
|
64
|
+
)
|
|
65
|
+
return ndarray_copy(payload_view, out)
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
@abc.abstractmethod
|
|
69
|
+
def checksum(data: Buffer, value: int) -> int: ...
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class CRC32(Checksum32):
|
|
73
|
+
"""Codec add a crc32 checksum to the buffer.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
location : 'start' or 'end'
|
|
78
|
+
Where to place the checksum in the buffer.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
codec_id = 'crc32'
|
|
82
|
+
location = 'start'
|
|
83
|
+
|
|
84
|
+
@staticmethod
|
|
85
|
+
def checksum(data: Buffer, value: int = 0) -> int:
|
|
86
|
+
"""
|
|
87
|
+
Thin wrapper around ``zlib.crc32``.
|
|
88
|
+
"""
|
|
89
|
+
return zlib.crc32(data, value)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Adler32(Checksum32):
|
|
93
|
+
"""Codec add a adler32 checksum to the buffer.
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
location : 'start' or 'end'
|
|
98
|
+
Where to place the checksum in the buffer.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
codec_id = 'adler32'
|
|
102
|
+
location = 'start'
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def checksum(data: Buffer, value: int = 1) -> int:
|
|
106
|
+
"""
|
|
107
|
+
Thin wrapper around ``zlib.adler32``.
|
|
108
|
+
"""
|
|
109
|
+
return zlib.adler32(data, value)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class JenkinsLookup3(Checksum32):
|
|
113
|
+
"""Bob Jenkin's lookup3 checksum with 32-bit output
|
|
114
|
+
|
|
115
|
+
This is the HDF5 implementation.
|
|
116
|
+
https://github.com/HDFGroup/hdf5/blob/577c192518598c7e2945683655feffcdbdf5a91b/src/H5checksum.c#L378-L472
|
|
117
|
+
|
|
118
|
+
With this codec, the checksum is concatenated on the end of the data
|
|
119
|
+
bytes when encoded. At decode time, the checksum is performed on
|
|
120
|
+
the data portion and compared with the four-byte checksum, raising
|
|
121
|
+
RuntimeError if inconsistent.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
initval : int
|
|
126
|
+
initial seed passed to the hash algorithm, default: 0
|
|
127
|
+
prefix : int
|
|
128
|
+
bytes prepended to the buffer before evaluating the hash, default: None
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
checksum = jenkins_lookup3
|
|
132
|
+
codec_id = "jenkins_lookup3"
|
|
133
|
+
|
|
134
|
+
def __init__(self, initval: int = 0, prefix=None):
|
|
135
|
+
self.initval = initval
|
|
136
|
+
if prefix is None:
|
|
137
|
+
self.prefix = None
|
|
138
|
+
else:
|
|
139
|
+
self.prefix = np.frombuffer(prefix, dtype='uint8')
|
|
140
|
+
|
|
141
|
+
def encode(self, buf):
|
|
142
|
+
"""Return buffer plus 4-byte Bob Jenkin's lookup3 checksum"""
|
|
143
|
+
buf = ensure_contiguous_ndarray(buf).ravel().view('uint8')
|
|
144
|
+
if self.prefix is None:
|
|
145
|
+
val = jenkins_lookup3(buf, self.initval)
|
|
146
|
+
else:
|
|
147
|
+
val = jenkins_lookup3(np.hstack((self.prefix, buf)), self.initval)
|
|
148
|
+
return buf.tobytes() + struct.pack("<I", val)
|
|
149
|
+
|
|
150
|
+
def decode(self, buf, out=None):
|
|
151
|
+
"""Check Bob Jenkin's lookup3 checksum, and return buffer without it"""
|
|
152
|
+
b = ensure_contiguous_ndarray(buf).view('uint8')
|
|
153
|
+
if self.prefix is None:
|
|
154
|
+
val = jenkins_lookup3(b[:-4], self.initval)
|
|
155
|
+
else:
|
|
156
|
+
val = jenkins_lookup3(np.hstack((self.prefix, b[:-4])), self.initval)
|
|
157
|
+
found = b[-4:].view("<u4")[0]
|
|
158
|
+
if val != found:
|
|
159
|
+
raise RuntimeError(
|
|
160
|
+
f"The Bob Jenkin's lookup3 checksum of the data ({val}) did not"
|
|
161
|
+
f" match the expected checksum ({found}).\n"
|
|
162
|
+
"This could be a sign that the data has been corrupted."
|
|
163
|
+
)
|
|
164
|
+
if out is not None:
|
|
165
|
+
out.view("uint8")[:] = b[:-4]
|
|
166
|
+
return out
|
|
167
|
+
return memoryview(b[:-4])
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
if _crc32c:
|
|
171
|
+
|
|
172
|
+
class CRC32C(Checksum32):
|
|
173
|
+
"""Codec add a crc32c checksum to the buffer.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
location : 'start' or 'end'
|
|
178
|
+
Where to place the checksum in the buffer.
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
codec_id = 'crc32c'
|
|
182
|
+
checksum = _crc32c.crc32c # type: ignore[union-attr]
|
|
183
|
+
location = 'end'
|
numcodecs/compat.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import array
|
|
2
|
+
import codecs
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from .ndarray_like import NDArrayLike, is_ndarray_like
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def ensure_ndarray_like(buf) -> NDArrayLike:
|
|
10
|
+
"""Convenience function to coerce `buf` to ndarray-like array.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
buf : ndarray-like, array-like, or bytes-like
|
|
15
|
+
A numpy array like object such as numpy.ndarray, cupy.ndarray, or
|
|
16
|
+
any object exporting a buffer interface.
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
-------
|
|
20
|
+
arr : NDArrayLike
|
|
21
|
+
A ndarray-like, sharing memory with `buf`.
|
|
22
|
+
|
|
23
|
+
Notes
|
|
24
|
+
-----
|
|
25
|
+
This function will not create a copy under any circumstances, it is guaranteed to
|
|
26
|
+
return a view on memory exported by `buf`.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
if not is_ndarray_like(buf):
|
|
30
|
+
if isinstance(buf, array.array) and buf.typecode in "cu":
|
|
31
|
+
# Guard condition, do not support array.array with unicode type, this is
|
|
32
|
+
# problematic because numpy does not support it on all platforms. Also do not
|
|
33
|
+
# support char as it was removed in Python 3.
|
|
34
|
+
raise TypeError("array.array with char or unicode type is not supported")
|
|
35
|
+
else:
|
|
36
|
+
# N.B., first take a memoryview to make sure that we subsequently create a
|
|
37
|
+
# numpy array from a memory buffer with no copy
|
|
38
|
+
mem = memoryview(buf)
|
|
39
|
+
# instantiate array from memoryview, ensures no copy
|
|
40
|
+
buf = np.array(mem, copy=False)
|
|
41
|
+
return buf
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def ensure_ndarray(buf) -> np.ndarray:
|
|
45
|
+
"""Convenience function to coerce `buf` to a numpy array, if it is not already a
|
|
46
|
+
numpy array.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
buf : array-like or bytes-like
|
|
51
|
+
A numpy array or any object exporting a buffer interface.
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
arr : ndarray
|
|
56
|
+
A numpy array, sharing memory with `buf`.
|
|
57
|
+
|
|
58
|
+
Notes
|
|
59
|
+
-----
|
|
60
|
+
This function will not create a copy under any circumstances, it is guaranteed to
|
|
61
|
+
return a view on memory exported by `buf`.
|
|
62
|
+
"""
|
|
63
|
+
return np.array(ensure_ndarray_like(buf), copy=False)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def ensure_contiguous_ndarray_like(buf, max_buffer_size=None, flatten=True) -> NDArrayLike:
|
|
67
|
+
"""Convenience function to coerce `buf` to ndarray-like array.
|
|
68
|
+
Also ensures that the returned value exports fully contiguous memory,
|
|
69
|
+
and supports the new-style buffer interface. If the optional max_buffer_size is
|
|
70
|
+
provided, raise a ValueError if the number of bytes consumed by the returned
|
|
71
|
+
array exceeds this value.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
buf : ndarray-like, array-like, or bytes-like
|
|
76
|
+
A numpy array like object such as numpy.ndarray, cupy.ndarray, or
|
|
77
|
+
any object exporting a buffer interface.
|
|
78
|
+
max_buffer_size : int
|
|
79
|
+
If specified, the largest allowable value of arr.nbytes, where arr
|
|
80
|
+
is the returned array.
|
|
81
|
+
flatten : bool
|
|
82
|
+
If True, the array are flatten.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
arr : NDArrayLike
|
|
87
|
+
A ndarray-like, sharing memory with `buf`.
|
|
88
|
+
|
|
89
|
+
Notes
|
|
90
|
+
-----
|
|
91
|
+
This function will not create a copy under any circumstances, it is guaranteed to
|
|
92
|
+
return a view on memory exported by `buf`.
|
|
93
|
+
"""
|
|
94
|
+
arr = ensure_ndarray_like(buf)
|
|
95
|
+
|
|
96
|
+
# check for object arrays, these are just memory pointers, actual memory holding
|
|
97
|
+
# item data is scattered elsewhere
|
|
98
|
+
if arr.dtype == object:
|
|
99
|
+
raise TypeError("object arrays are not supported")
|
|
100
|
+
|
|
101
|
+
# check for datetime or timedelta ndarray, the buffer interface doesn't support those
|
|
102
|
+
if arr.dtype.kind in "Mm":
|
|
103
|
+
arr = arr.view(np.int64) # type: ignore[arg-type]
|
|
104
|
+
|
|
105
|
+
# check memory is contiguous, if so flatten
|
|
106
|
+
if arr.flags.c_contiguous or arr.flags.f_contiguous:
|
|
107
|
+
if flatten:
|
|
108
|
+
# can flatten without copy
|
|
109
|
+
arr = arr.reshape(-1, order="A")
|
|
110
|
+
else:
|
|
111
|
+
raise ValueError("an array with contiguous memory is required")
|
|
112
|
+
|
|
113
|
+
if max_buffer_size is not None and arr.nbytes > max_buffer_size:
|
|
114
|
+
msg = f"Codec does not support buffers of > {max_buffer_size} bytes"
|
|
115
|
+
raise ValueError(msg)
|
|
116
|
+
|
|
117
|
+
return arr
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True) -> np.ndarray:
|
|
121
|
+
"""Convenience function to coerce `buf` to a numpy array, if it is not already a
|
|
122
|
+
numpy array. Also ensures that the returned value exports fully contiguous memory,
|
|
123
|
+
and supports the new-style buffer interface. If the optional max_buffer_size is
|
|
124
|
+
provided, raise a ValueError if the number of bytes consumed by the returned
|
|
125
|
+
array exceeds this value.
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
buf : array-like or bytes-like
|
|
130
|
+
A numpy array or any object exporting a buffer interface.
|
|
131
|
+
max_buffer_size : int
|
|
132
|
+
If specified, the largest allowable value of arr.nbytes, where arr
|
|
133
|
+
is the returned array.
|
|
134
|
+
flatten : bool
|
|
135
|
+
If True, the array are flatten.
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
arr : ndarray
|
|
140
|
+
A numpy array, sharing memory with `buf`.
|
|
141
|
+
|
|
142
|
+
Notes
|
|
143
|
+
-----
|
|
144
|
+
This function will not create a copy under any circumstances, it is guaranteed to
|
|
145
|
+
return a view on memory exported by `buf`.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
return ensure_ndarray(
|
|
149
|
+
ensure_contiguous_ndarray_like(buf, max_buffer_size=max_buffer_size, flatten=flatten)
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def ensure_bytes(buf) -> bytes:
|
|
154
|
+
"""Obtain a bytes object from memory exposed by `buf`."""
|
|
155
|
+
|
|
156
|
+
if not isinstance(buf, bytes):
|
|
157
|
+
arr = ensure_ndarray_like(buf)
|
|
158
|
+
|
|
159
|
+
# check for object arrays, these are just memory pointers,
|
|
160
|
+
# actual memory holding item data is scattered elsewhere
|
|
161
|
+
if arr.dtype == object:
|
|
162
|
+
raise TypeError("object arrays are not supported")
|
|
163
|
+
|
|
164
|
+
# create bytes
|
|
165
|
+
buf = arr.tobytes(order="A")
|
|
166
|
+
|
|
167
|
+
return buf
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def ensure_text(s, encoding="utf-8"):
|
|
171
|
+
if not isinstance(s, str):
|
|
172
|
+
s = ensure_contiguous_ndarray(s)
|
|
173
|
+
s = codecs.decode(s, encoding)
|
|
174
|
+
return s
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def ndarray_copy(src, dst) -> NDArrayLike:
|
|
178
|
+
"""Copy the contents of the array from `src` to `dst`."""
|
|
179
|
+
|
|
180
|
+
if dst is None:
|
|
181
|
+
# no-op
|
|
182
|
+
return src
|
|
183
|
+
|
|
184
|
+
# ensure ndarray like
|
|
185
|
+
src = ensure_ndarray_like(src)
|
|
186
|
+
dst = ensure_ndarray_like(dst)
|
|
187
|
+
|
|
188
|
+
# flatten source array
|
|
189
|
+
src = src.reshape(-1, order="A")
|
|
190
|
+
|
|
191
|
+
# ensure same data type
|
|
192
|
+
if dst.dtype != object:
|
|
193
|
+
src = src.view(dst.dtype)
|
|
194
|
+
|
|
195
|
+
# reshape source to match destination
|
|
196
|
+
if src.shape != dst.shape:
|
|
197
|
+
if dst.flags.f_contiguous:
|
|
198
|
+
order = "F"
|
|
199
|
+
else:
|
|
200
|
+
order = "C"
|
|
201
|
+
src = src.reshape(dst.shape, order=order)
|
|
202
|
+
|
|
203
|
+
# copy via numpy
|
|
204
|
+
np.copyto(dst, src)
|
|
205
|
+
|
|
206
|
+
return dst
|
|
Binary file
|
numcodecs/delta.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .abc import Codec
|
|
4
|
+
from .compat import ensure_ndarray, ndarray_copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Delta(Codec):
|
|
8
|
+
"""Codec to encode data as the difference between adjacent values.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
dtype : dtype
|
|
13
|
+
Data type to use for decoded data.
|
|
14
|
+
astype : dtype, optional
|
|
15
|
+
Data type to use for encoded data.
|
|
16
|
+
|
|
17
|
+
Notes
|
|
18
|
+
-----
|
|
19
|
+
If `astype` is an integer data type, please ensure that it is
|
|
20
|
+
sufficiently large to store encoded values. No checks are made and data
|
|
21
|
+
may become corrupted due to integer overflow if `astype` is too small.
|
|
22
|
+
Note also that the encoded data for each chunk includes the absolute
|
|
23
|
+
value of the first element in the chunk, and so the encoded data type in
|
|
24
|
+
general needs to be large enough to store absolute values from the array.
|
|
25
|
+
|
|
26
|
+
Examples
|
|
27
|
+
--------
|
|
28
|
+
>>> import numcodecs
|
|
29
|
+
>>> import numpy as np
|
|
30
|
+
>>> x = np.arange(100, 120, 2, dtype='i2')
|
|
31
|
+
>>> codec = numcodecs.Delta(dtype='i2', astype='i1')
|
|
32
|
+
>>> y = codec.encode(x)
|
|
33
|
+
>>> y
|
|
34
|
+
array([100, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int8)
|
|
35
|
+
>>> z = codec.decode(y)
|
|
36
|
+
>>> z
|
|
37
|
+
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int16)
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
codec_id = 'delta'
|
|
42
|
+
|
|
43
|
+
def __init__(self, dtype, astype=None):
|
|
44
|
+
self.dtype = np.dtype(dtype)
|
|
45
|
+
if astype is None:
|
|
46
|
+
self.astype = self.dtype
|
|
47
|
+
else:
|
|
48
|
+
self.astype = np.dtype(astype)
|
|
49
|
+
if self.dtype == np.dtype(object) or self.astype == np.dtype(object):
|
|
50
|
+
raise ValueError('object arrays are not supported')
|
|
51
|
+
|
|
52
|
+
def encode(self, buf):
|
|
53
|
+
# normalise input
|
|
54
|
+
arr = ensure_ndarray(buf).view(self.dtype)
|
|
55
|
+
|
|
56
|
+
# flatten to simplify implementation
|
|
57
|
+
arr = arr.reshape(-1, order='A')
|
|
58
|
+
|
|
59
|
+
# setup encoded output
|
|
60
|
+
enc = np.empty_like(arr, dtype=self.astype)
|
|
61
|
+
|
|
62
|
+
# set first element
|
|
63
|
+
enc[0] = arr[0]
|
|
64
|
+
|
|
65
|
+
# compute differences
|
|
66
|
+
enc[1:] = np.diff(arr)
|
|
67
|
+
return enc
|
|
68
|
+
|
|
69
|
+
def decode(self, buf, out=None):
|
|
70
|
+
# normalise input
|
|
71
|
+
enc = ensure_ndarray(buf).view(self.astype)
|
|
72
|
+
|
|
73
|
+
# flatten to simplify implementation
|
|
74
|
+
enc = enc.reshape(-1, order='A')
|
|
75
|
+
|
|
76
|
+
# setup decoded output
|
|
77
|
+
dec = np.empty_like(enc, dtype=self.dtype)
|
|
78
|
+
|
|
79
|
+
# decode differences
|
|
80
|
+
np.cumsum(enc, out=dec)
|
|
81
|
+
|
|
82
|
+
# handle output
|
|
83
|
+
return ndarray_copy(dec, out)
|
|
84
|
+
|
|
85
|
+
def get_config(self):
|
|
86
|
+
# override to handle encoding dtypes
|
|
87
|
+
return {'id': self.codec_id, 'dtype': self.dtype.str, 'astype': self.astype.str}
|
|
88
|
+
|
|
89
|
+
def __repr__(self):
|
|
90
|
+
r = f'{type(self).__name__}(dtype={self.dtype.str!r}'
|
|
91
|
+
if self.astype != self.dtype:
|
|
92
|
+
r += f', astype={self.astype.str!r}'
|
|
93
|
+
r += ')'
|
|
94
|
+
return r
|
numcodecs/errors.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module defines custom exceptions that are raised in the `numcodecs` codebase.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class UnknownCodecError(ValueError):
|
|
7
|
+
"""
|
|
8
|
+
An exception that is raised when trying to receive a codec that has not been registered.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
codec_id : str
|
|
13
|
+
Codec identifier.
|
|
14
|
+
|
|
15
|
+
Examples
|
|
16
|
+
----------
|
|
17
|
+
>>> import numcodecs
|
|
18
|
+
>>> numcodecs.get_codec({"codec_id": "unknown"})
|
|
19
|
+
Traceback (most recent call last):
|
|
20
|
+
...
|
|
21
|
+
UnknownCodecError: codec not available: 'unknown'
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, codec_id: str):
|
|
25
|
+
self.codec_id = codec_id
|
|
26
|
+
super().__init__(f"codec not available: '{codec_id}'")
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .abc import Codec
|
|
4
|
+
from .compat import ensure_ndarray, ndarray_copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FixedScaleOffset(Codec):
|
|
8
|
+
"""Simplified version of the scale-offset filter available in HDF5.
|
|
9
|
+
Applies the transformation `(x - offset) * scale` to all chunks. Results
|
|
10
|
+
are rounded to the nearest integer but are not packed according to the
|
|
11
|
+
minimum number of bits.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
offset : float
|
|
16
|
+
Value to subtract from data.
|
|
17
|
+
scale : float
|
|
18
|
+
Value to multiply by data.
|
|
19
|
+
dtype : dtype
|
|
20
|
+
Data type to use for decoded data.
|
|
21
|
+
astype : dtype, optional
|
|
22
|
+
Data type to use for encoded data.
|
|
23
|
+
|
|
24
|
+
Notes
|
|
25
|
+
-----
|
|
26
|
+
If `astype` is an integer data type, please ensure that it is
|
|
27
|
+
sufficiently large to store encoded values. No checks are made and data
|
|
28
|
+
may become corrupted due to integer overflow if `astype` is too small.
|
|
29
|
+
|
|
30
|
+
Examples
|
|
31
|
+
--------
|
|
32
|
+
>>> import numcodecs
|
|
33
|
+
>>> import numpy as np
|
|
34
|
+
>>> x = np.linspace(1000, 1001, 10, dtype='f8')
|
|
35
|
+
>>> x
|
|
36
|
+
array([1000. , 1000.11111111, 1000.22222222, 1000.33333333,
|
|
37
|
+
1000.44444444, 1000.55555556, 1000.66666667, 1000.77777778,
|
|
38
|
+
1000.88888889, 1001. ])
|
|
39
|
+
>>> codec = numcodecs.FixedScaleOffset(offset=1000, scale=10, dtype='f8', astype='u1')
|
|
40
|
+
>>> y1 = codec.encode(x)
|
|
41
|
+
>>> y1
|
|
42
|
+
array([ 0, 1, 2, 3, 4, 6, 7, 8, 9, 10], dtype=uint8)
|
|
43
|
+
>>> z1 = codec.decode(y1)
|
|
44
|
+
>>> z1
|
|
45
|
+
array([1000. , 1000.1, 1000.2, 1000.3, 1000.4, 1000.6, 1000.7,
|
|
46
|
+
1000.8, 1000.9, 1001. ])
|
|
47
|
+
>>> codec = numcodecs.FixedScaleOffset(offset=1000, scale=10**2, dtype='f8', astype='u1')
|
|
48
|
+
>>> y2 = codec.encode(x)
|
|
49
|
+
>>> y2
|
|
50
|
+
array([ 0, 11, 22, 33, 44, 56, 67, 78, 89, 100], dtype=uint8)
|
|
51
|
+
>>> z2 = codec.decode(y2)
|
|
52
|
+
>>> z2
|
|
53
|
+
array([1000. , 1000.11, 1000.22, 1000.33, 1000.44, 1000.56,
|
|
54
|
+
1000.67, 1000.78, 1000.89, 1001. ])
|
|
55
|
+
>>> codec = numcodecs.FixedScaleOffset(offset=1000, scale=10**3, dtype='f8', astype='u2')
|
|
56
|
+
>>> y3 = codec.encode(x)
|
|
57
|
+
>>> y3
|
|
58
|
+
array([ 0, 111, 222, 333, 444, 556, 667, 778, 889, 1000], dtype=uint16)
|
|
59
|
+
>>> z3 = codec.decode(y3)
|
|
60
|
+
>>> z3
|
|
61
|
+
array([1000. , 1000.111, 1000.222, 1000.333, 1000.444, 1000.556,
|
|
62
|
+
1000.667, 1000.778, 1000.889, 1001. ])
|
|
63
|
+
|
|
64
|
+
See Also
|
|
65
|
+
--------
|
|
66
|
+
numcodecs.quantize.Quantize
|
|
67
|
+
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
codec_id = 'fixedscaleoffset'
|
|
71
|
+
|
|
72
|
+
def __init__(self, offset, scale, dtype, astype=None):
|
|
73
|
+
self.offset = offset
|
|
74
|
+
self.scale = scale
|
|
75
|
+
self.dtype = np.dtype(dtype)
|
|
76
|
+
if astype is None:
|
|
77
|
+
self.astype = self.dtype
|
|
78
|
+
else:
|
|
79
|
+
self.astype = np.dtype(astype)
|
|
80
|
+
if self.dtype == np.dtype(object) or self.astype == np.dtype(object):
|
|
81
|
+
raise ValueError('object arrays are not supported')
|
|
82
|
+
|
|
83
|
+
def encode(self, buf):
|
|
84
|
+
# normalise input
|
|
85
|
+
arr = ensure_ndarray(buf).view(self.dtype)
|
|
86
|
+
|
|
87
|
+
# flatten to simplify implementation
|
|
88
|
+
arr = arr.reshape(-1, order='A')
|
|
89
|
+
|
|
90
|
+
# compute scale offset
|
|
91
|
+
enc = (arr - self.offset) * self.scale
|
|
92
|
+
|
|
93
|
+
# round to nearest integer
|
|
94
|
+
enc = np.around(enc)
|
|
95
|
+
|
|
96
|
+
# convert dtype
|
|
97
|
+
return enc.astype(self.astype, copy=False)
|
|
98
|
+
|
|
99
|
+
def decode(self, buf, out=None):
|
|
100
|
+
# interpret buffer as numpy array
|
|
101
|
+
enc = ensure_ndarray(buf).view(self.astype)
|
|
102
|
+
|
|
103
|
+
# flatten to simplify implementation
|
|
104
|
+
enc = enc.reshape(-1, order='A')
|
|
105
|
+
|
|
106
|
+
# decode scale offset
|
|
107
|
+
dec = (enc / self.scale) + self.offset
|
|
108
|
+
|
|
109
|
+
# convert dtype
|
|
110
|
+
dec = dec.astype(self.dtype, copy=False)
|
|
111
|
+
|
|
112
|
+
# handle output
|
|
113
|
+
return ndarray_copy(dec, out)
|
|
114
|
+
|
|
115
|
+
def get_config(self):
|
|
116
|
+
# override to handle encoding dtypes
|
|
117
|
+
return {
|
|
118
|
+
'id': self.codec_id,
|
|
119
|
+
'scale': self.scale,
|
|
120
|
+
'offset': self.offset,
|
|
121
|
+
'dtype': self.dtype.str,
|
|
122
|
+
'astype': self.astype.str,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
def __repr__(self):
|
|
126
|
+
r = f'{type(self).__name__}(scale={self.scale}, offset={self.offset}, dtype={self.dtype.str!r}'
|
|
127
|
+
if self.astype != self.dtype:
|
|
128
|
+
r += f', astype={self.astype.str!r}'
|
|
129
|
+
r += ')'
|
|
130
|
+
return r
|
|
Binary file
|
numcodecs/gzip.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import gzip as _gzip
|
|
2
|
+
import io
|
|
3
|
+
|
|
4
|
+
from .abc import Codec
|
|
5
|
+
from .compat import ensure_bytes, ensure_contiguous_ndarray
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GZip(Codec):
|
|
9
|
+
"""Codec providing gzip compression using zlib via the Python standard library.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
level : int
|
|
14
|
+
Compression level.
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
codec_id = 'gzip'
|
|
19
|
+
|
|
20
|
+
def __init__(self, level=1):
|
|
21
|
+
self.level = level
|
|
22
|
+
|
|
23
|
+
def encode(self, buf):
|
|
24
|
+
# normalise inputs
|
|
25
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
26
|
+
|
|
27
|
+
# do compression
|
|
28
|
+
compressed = io.BytesIO()
|
|
29
|
+
with _gzip.GzipFile(fileobj=compressed, mode='wb', compresslevel=self.level) as compressor:
|
|
30
|
+
compressor.write(buf)
|
|
31
|
+
return compressed.getvalue()
|
|
32
|
+
|
|
33
|
+
# noinspection PyMethodMayBeStatic
|
|
34
|
+
def decode(self, buf, out=None):
|
|
35
|
+
# normalise inputs
|
|
36
|
+
# BytesIO only copies if the data is not of `bytes` type.
|
|
37
|
+
# This allows `bytes` objects to pass through without copying.
|
|
38
|
+
buf = io.BytesIO(ensure_bytes(buf))
|
|
39
|
+
|
|
40
|
+
# do decompression
|
|
41
|
+
with _gzip.GzipFile(fileobj=buf, mode='rb') as decompressor:
|
|
42
|
+
if out is not None:
|
|
43
|
+
out_view = ensure_contiguous_ndarray(out)
|
|
44
|
+
decompressor.readinto(out_view)
|
|
45
|
+
if decompressor.read(1) != b'':
|
|
46
|
+
raise ValueError("Unable to fit data into `out`")
|
|
47
|
+
else:
|
|
48
|
+
out = decompressor.read()
|
|
49
|
+
|
|
50
|
+
return out
|
|
Binary file
|