numcodecs 0.16.0__cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numcodecs might be problematic. Click here for more details.
- numcodecs/__init__.py +146 -0
- numcodecs/_shuffle.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/abc.py +127 -0
- numcodecs/astype.py +72 -0
- numcodecs/base64.py +26 -0
- numcodecs/bitround.py +80 -0
- numcodecs/blosc.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/bz2.py +45 -0
- numcodecs/categorize.py +98 -0
- numcodecs/checksum32.py +183 -0
- numcodecs/compat.py +206 -0
- numcodecs/compat_ext.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/delta.py +94 -0
- numcodecs/errors.py +26 -0
- numcodecs/fixedscaleoffset.py +130 -0
- numcodecs/fletcher32.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/gzip.py +50 -0
- numcodecs/jenkins.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/json.py +107 -0
- numcodecs/lz4.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/lzma.py +72 -0
- numcodecs/msgpacks.py +86 -0
- numcodecs/ndarray_like.py +65 -0
- numcodecs/packbits.py +82 -0
- numcodecs/pcodec.py +118 -0
- numcodecs/pickles.py +55 -0
- numcodecs/quantize.py +98 -0
- numcodecs/registry.py +74 -0
- numcodecs/shuffle.py +61 -0
- numcodecs/tests/__init__.py +3 -0
- numcodecs/tests/common.py +285 -0
- numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
- numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
- numcodecs/tests/test_astype.py +74 -0
- numcodecs/tests/test_base64.py +81 -0
- numcodecs/tests/test_bitround.py +81 -0
- numcodecs/tests/test_blosc.py +284 -0
- numcodecs/tests/test_bz2.py +66 -0
- numcodecs/tests/test_categorize.py +87 -0
- numcodecs/tests/test_checksum32.py +154 -0
- numcodecs/tests/test_compat.py +111 -0
- numcodecs/tests/test_delta.py +61 -0
- numcodecs/tests/test_entrypoints.py +24 -0
- numcodecs/tests/test_entrypoints_backport.py +36 -0
- numcodecs/tests/test_fixedscaleoffset.py +77 -0
- numcodecs/tests/test_fletcher32.py +56 -0
- numcodecs/tests/test_gzip.py +110 -0
- numcodecs/tests/test_jenkins.py +150 -0
- numcodecs/tests/test_json.py +85 -0
- numcodecs/tests/test_lz4.py +83 -0
- numcodecs/tests/test_lzma.py +94 -0
- numcodecs/tests/test_msgpacks.py +126 -0
- numcodecs/tests/test_ndarray_like.py +48 -0
- numcodecs/tests/test_packbits.py +39 -0
- numcodecs/tests/test_pcodec.py +90 -0
- numcodecs/tests/test_pickles.py +61 -0
- numcodecs/tests/test_quantize.py +76 -0
- numcodecs/tests/test_registry.py +43 -0
- numcodecs/tests/test_shuffle.py +166 -0
- numcodecs/tests/test_vlen_array.py +97 -0
- numcodecs/tests/test_vlen_bytes.py +97 -0
- numcodecs/tests/test_vlen_utf8.py +91 -0
- numcodecs/tests/test_zarr3.py +279 -0
- numcodecs/tests/test_zarr3_import.py +13 -0
- numcodecs/tests/test_zfpy.py +104 -0
- numcodecs/tests/test_zlib.py +94 -0
- numcodecs/tests/test_zstd.py +92 -0
- numcodecs/version.py +21 -0
- numcodecs/vlen.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs/zarr3.py +401 -0
- numcodecs/zfpy.py +113 -0
- numcodecs/zlib.py +42 -0
- numcodecs/zstd.cpython-311-aarch64-linux-gnu.so +0 -0
- numcodecs-0.16.0.dist-info/METADATA +66 -0
- numcodecs-0.16.0.dist-info/RECORD +79 -0
- numcodecs-0.16.0.dist-info/WHEEL +6 -0
- numcodecs-0.16.0.dist-info/entry_points.txt +22 -0
- numcodecs-0.16.0.dist-info/licenses/LICENSE.txt +21 -0
- numcodecs-0.16.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
from multiprocessing import Pool
|
|
2
|
+
from multiprocessing.pool import ThreadPool
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from numcodecs import blosc
|
|
9
|
+
from numcodecs.blosc import Blosc
|
|
10
|
+
except ImportError: # pragma: no cover
|
|
11
|
+
pytest.skip("numcodecs.blosc not available", allow_module_level=True)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
from numcodecs.tests.common import (
|
|
15
|
+
check_backwards_compatibility,
|
|
16
|
+
check_config,
|
|
17
|
+
check_encode_decode,
|
|
18
|
+
check_err_decode_object_buffer,
|
|
19
|
+
check_err_encode_object_buffer,
|
|
20
|
+
check_max_buffer_size,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
codecs = [
|
|
24
|
+
Blosc(shuffle=Blosc.SHUFFLE),
|
|
25
|
+
Blosc(clevel=0, shuffle=Blosc.SHUFFLE),
|
|
26
|
+
Blosc(cname='lz4', shuffle=Blosc.SHUFFLE),
|
|
27
|
+
Blosc(cname='lz4', clevel=1, shuffle=Blosc.NOSHUFFLE),
|
|
28
|
+
Blosc(cname='lz4', clevel=5, shuffle=Blosc.SHUFFLE),
|
|
29
|
+
Blosc(cname='lz4', clevel=9, shuffle=Blosc.BITSHUFFLE),
|
|
30
|
+
Blosc(cname='zlib', clevel=1, shuffle=0),
|
|
31
|
+
Blosc(cname='zstd', clevel=1, shuffle=1),
|
|
32
|
+
Blosc(cname='blosclz', clevel=1, shuffle=2),
|
|
33
|
+
None, # was snappy
|
|
34
|
+
Blosc(shuffle=Blosc.SHUFFLE, blocksize=0),
|
|
35
|
+
Blosc(shuffle=Blosc.SHUFFLE, blocksize=2**8),
|
|
36
|
+
Blosc(cname='lz4', clevel=1, shuffle=Blosc.NOSHUFFLE, blocksize=2**8),
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# mix of dtypes: integer, float, bool, string
|
|
41
|
+
# mix of shapes: 1D, 2D, 3D
|
|
42
|
+
# mix of orders: C, F
|
|
43
|
+
arrays = [
|
|
44
|
+
np.arange(1000, dtype='i4'),
|
|
45
|
+
np.linspace(1000, 1001, 1000, dtype='f8'),
|
|
46
|
+
np.random.normal(loc=1000, scale=1, size=(100, 10)),
|
|
47
|
+
np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
|
|
48
|
+
np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10),
|
|
49
|
+
np.random.randint(0, 2**60, size=1000, dtype='u8').view('M8[ns]'),
|
|
50
|
+
np.random.randint(0, 2**60, size=1000, dtype='u8').view('m8[ns]'),
|
|
51
|
+
np.random.randint(0, 2**25, size=1000, dtype='u8').view('M8[m]'),
|
|
52
|
+
np.random.randint(0, 2**25, size=1000, dtype='u8').view('m8[m]'),
|
|
53
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[ns]'),
|
|
54
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[ns]'),
|
|
55
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[m]'),
|
|
56
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[m]'),
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _skip_null(codec):
|
|
61
|
+
if codec is None:
|
|
62
|
+
pytest.skip("codec has been removed")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@pytest.fixture(scope='module', params=[True, False, None])
|
|
66
|
+
def use_threads(request):
|
|
67
|
+
return request.param
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@pytest.mark.parametrize('array', arrays)
|
|
71
|
+
@pytest.mark.parametrize('codec', codecs)
|
|
72
|
+
def test_encode_decode(array, codec):
|
|
73
|
+
_skip_null(codec)
|
|
74
|
+
check_encode_decode(array, codec)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_config():
|
|
78
|
+
codec = Blosc(cname='zstd', clevel=3, shuffle=1)
|
|
79
|
+
check_config(codec)
|
|
80
|
+
codec = Blosc(cname='lz4', clevel=1, shuffle=2, blocksize=2**8)
|
|
81
|
+
check_config(codec)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_repr():
|
|
85
|
+
expect = "Blosc(cname='zstd', clevel=3, shuffle=SHUFFLE, blocksize=0)"
|
|
86
|
+
actual = repr(Blosc(cname='zstd', clevel=3, shuffle=Blosc.SHUFFLE, blocksize=0))
|
|
87
|
+
assert expect == actual
|
|
88
|
+
expect = "Blosc(cname='lz4', clevel=1, shuffle=NOSHUFFLE, blocksize=256)"
|
|
89
|
+
actual = repr(Blosc(cname='lz4', clevel=1, shuffle=Blosc.NOSHUFFLE, blocksize=256))
|
|
90
|
+
assert expect == actual
|
|
91
|
+
expect = "Blosc(cname='zlib', clevel=9, shuffle=BITSHUFFLE, blocksize=512)"
|
|
92
|
+
actual = repr(Blosc(cname='zlib', clevel=9, shuffle=Blosc.BITSHUFFLE, blocksize=512))
|
|
93
|
+
assert expect == actual
|
|
94
|
+
expect = "Blosc(cname='blosclz', clevel=5, shuffle=AUTOSHUFFLE, blocksize=1024)"
|
|
95
|
+
actual = repr(Blosc(cname='blosclz', clevel=5, shuffle=Blosc.AUTOSHUFFLE, blocksize=1024))
|
|
96
|
+
assert expect == actual
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def test_eq():
|
|
100
|
+
assert Blosc() == Blosc()
|
|
101
|
+
assert Blosc(cname='lz4') != Blosc(cname='zstd')
|
|
102
|
+
assert Blosc(clevel=1) != Blosc(clevel=9)
|
|
103
|
+
assert Blosc(cname='lz4') != 'foo'
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_compress_blocksize_default(use_threads):
|
|
107
|
+
arr = np.arange(1000, dtype='i4')
|
|
108
|
+
|
|
109
|
+
blosc.use_threads = use_threads
|
|
110
|
+
|
|
111
|
+
# default blocksize
|
|
112
|
+
enc = blosc.compress(arr, b'lz4', 1, Blosc.NOSHUFFLE)
|
|
113
|
+
_, _, blocksize = blosc._cbuffer_sizes(enc)
|
|
114
|
+
assert blocksize > 0
|
|
115
|
+
|
|
116
|
+
# explicit default blocksize
|
|
117
|
+
enc = blosc.compress(arr, b'lz4', 1, Blosc.NOSHUFFLE, 0)
|
|
118
|
+
_, _, blocksize = blosc._cbuffer_sizes(enc)
|
|
119
|
+
assert blocksize > 0
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@pytest.mark.parametrize('bs', [2**7, 2**8])
|
|
123
|
+
def test_compress_blocksize(use_threads, bs):
|
|
124
|
+
arr = np.arange(1000, dtype='i4')
|
|
125
|
+
|
|
126
|
+
blosc.use_threads = use_threads
|
|
127
|
+
|
|
128
|
+
enc = blosc.compress(arr, b'lz4', 1, Blosc.NOSHUFFLE, bs)
|
|
129
|
+
_, _, blocksize = blosc._cbuffer_sizes(enc)
|
|
130
|
+
assert blocksize == bs
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_compress_complib(use_threads):
|
|
134
|
+
arr = np.arange(1000, dtype='i4')
|
|
135
|
+
expected_complibs = {
|
|
136
|
+
'lz4': 'LZ4',
|
|
137
|
+
'lz4hc': 'LZ4',
|
|
138
|
+
'blosclz': 'BloscLZ',
|
|
139
|
+
'zlib': 'Zlib',
|
|
140
|
+
'zstd': 'Zstd',
|
|
141
|
+
}
|
|
142
|
+
blosc.use_threads = use_threads
|
|
143
|
+
for cname in blosc.list_compressors():
|
|
144
|
+
enc = blosc.compress(arr, cname.encode(), 1, Blosc.NOSHUFFLE)
|
|
145
|
+
complib = blosc.cbuffer_complib(enc)
|
|
146
|
+
expected_complib = expected_complibs[cname]
|
|
147
|
+
assert complib == expected_complib
|
|
148
|
+
with pytest.raises(ValueError):
|
|
149
|
+
# capitalized cname
|
|
150
|
+
blosc.compress(arr, b'LZ4', 1)
|
|
151
|
+
with pytest.raises(ValueError):
|
|
152
|
+
# bad cname
|
|
153
|
+
blosc.compress(arr, b'foo', 1)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@pytest.mark.parametrize('dtype', ['i1', 'i2', 'i4', 'i8'])
|
|
157
|
+
def test_compress_metainfo(dtype, use_threads):
|
|
158
|
+
arr = np.arange(1000, dtype=dtype)
|
|
159
|
+
for shuffle in Blosc.NOSHUFFLE, Blosc.SHUFFLE, Blosc.BITSHUFFLE:
|
|
160
|
+
blosc.use_threads = use_threads
|
|
161
|
+
for cname in blosc.list_compressors():
|
|
162
|
+
enc = blosc.compress(arr, cname.encode(), 1, shuffle)
|
|
163
|
+
typesize, did_shuffle, _ = blosc._cbuffer_metainfo(enc)
|
|
164
|
+
assert typesize == arr.dtype.itemsize
|
|
165
|
+
assert did_shuffle == shuffle
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def test_compress_autoshuffle(use_threads):
|
|
169
|
+
arr = np.arange(8000)
|
|
170
|
+
for dtype in 'i1', 'i2', 'i4', 'i8', 'f2', 'f4', 'f8', 'bool', 'S10':
|
|
171
|
+
varr = arr.view(dtype)
|
|
172
|
+
blosc.use_threads = use_threads
|
|
173
|
+
for cname in blosc.list_compressors():
|
|
174
|
+
enc = blosc.compress(varr, cname.encode(), 1, Blosc.AUTOSHUFFLE)
|
|
175
|
+
typesize, did_shuffle, _ = blosc._cbuffer_metainfo(enc)
|
|
176
|
+
assert typesize == varr.dtype.itemsize
|
|
177
|
+
if typesize == 1:
|
|
178
|
+
assert did_shuffle == Blosc.BITSHUFFLE
|
|
179
|
+
else:
|
|
180
|
+
assert did_shuffle == Blosc.SHUFFLE
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def test_config_blocksize():
|
|
184
|
+
# N.B., we want to be backwards compatible with any config where blocksize is not
|
|
185
|
+
# explicitly stated
|
|
186
|
+
|
|
187
|
+
# blocksize not stated
|
|
188
|
+
config = {"cname": 'lz4', "clevel": 1, "shuffle": Blosc.SHUFFLE}
|
|
189
|
+
codec = Blosc.from_config(config)
|
|
190
|
+
assert codec.blocksize == 0
|
|
191
|
+
|
|
192
|
+
# blocksize stated
|
|
193
|
+
config = {"cname": 'lz4', "clevel": 1, "shuffle": Blosc.SHUFFLE, "blocksize": 2**8}
|
|
194
|
+
codec = Blosc.from_config(config)
|
|
195
|
+
assert codec.blocksize == 2**8
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def test_backwards_compatibility():
|
|
199
|
+
check_backwards_compatibility(Blosc.codec_id, arrays, codecs)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _encode_worker(data):
|
|
203
|
+
compressor = Blosc(cname='zlib', clevel=9, shuffle=Blosc.SHUFFLE)
|
|
204
|
+
return compressor.encode(data)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _decode_worker(enc):
|
|
208
|
+
compressor = Blosc()
|
|
209
|
+
return compressor.decode(enc)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
@pytest.mark.parametrize('pool', [Pool, ThreadPool])
|
|
213
|
+
def test_multiprocessing(use_threads, pool):
|
|
214
|
+
data = np.arange(1000000)
|
|
215
|
+
enc = _encode_worker(data)
|
|
216
|
+
|
|
217
|
+
pool = pool(5)
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
blosc.use_threads = use_threads
|
|
221
|
+
|
|
222
|
+
# test with process pool and thread pool
|
|
223
|
+
|
|
224
|
+
# test encoding
|
|
225
|
+
enc_results = pool.map(_encode_worker, [data] * 5)
|
|
226
|
+
assert all(len(enc) == len(e) for e in enc_results)
|
|
227
|
+
|
|
228
|
+
# test decoding
|
|
229
|
+
dec_results = pool.map(_decode_worker, [enc] * 5)
|
|
230
|
+
assert all(data.nbytes == len(d) for d in dec_results)
|
|
231
|
+
|
|
232
|
+
# tidy up
|
|
233
|
+
pool.close()
|
|
234
|
+
pool.join()
|
|
235
|
+
|
|
236
|
+
finally:
|
|
237
|
+
blosc.use_threads = None # restore default
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def test_err_decode_object_buffer():
|
|
241
|
+
check_err_decode_object_buffer(Blosc())
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def test_err_encode_object_buffer():
|
|
245
|
+
check_err_encode_object_buffer(Blosc())
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def test_decompression_error_handling():
|
|
249
|
+
for codec in codecs:
|
|
250
|
+
_skip_null(codec)
|
|
251
|
+
with pytest.raises(RuntimeError):
|
|
252
|
+
codec.decode(bytearray())
|
|
253
|
+
with pytest.raises(RuntimeError):
|
|
254
|
+
codec.decode(bytearray(0))
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def test_max_buffer_size():
|
|
258
|
+
for codec in codecs:
|
|
259
|
+
_skip_null(codec)
|
|
260
|
+
assert codec.max_buffer_size == 2**31 - 1
|
|
261
|
+
check_max_buffer_size(codec)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def test_typesize_explicit():
|
|
265
|
+
arr = np.arange(100).astype("int64")
|
|
266
|
+
itemsize = arr.itemsize
|
|
267
|
+
codec_no_type_size = Blosc(shuffle=Blosc.SHUFFLE)
|
|
268
|
+
codec_itemsize = Blosc(shuffle=Blosc.SHUFFLE, typesize=itemsize)
|
|
269
|
+
encoded_without_itemsize = codec_no_type_size.encode(arr.tobytes())
|
|
270
|
+
encoded_with_itemsize = codec_itemsize.encode(arr.tobytes())
|
|
271
|
+
# third byte encodes the `typesize`
|
|
272
|
+
assert encoded_without_itemsize[3] == 1 # inferred from bytes i.e., 1
|
|
273
|
+
assert encoded_with_itemsize[3] == itemsize # given as a constructor argument
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def test_typesize_less_than_1():
|
|
277
|
+
with pytest.raises(ValueError, match=r"Cannot use typesize"):
|
|
278
|
+
Blosc(shuffle=Blosc.SHUFFLE, typesize=0)
|
|
279
|
+
compressor = Blosc(shuffle=Blosc.SHUFFLE)
|
|
280
|
+
# not really something that should be done in practice, but good for testing.
|
|
281
|
+
compressor.typesize = 0
|
|
282
|
+
arr = np.arange(100)
|
|
283
|
+
with pytest.raises(ValueError, match=r"Cannot use typesize"):
|
|
284
|
+
compressor.encode(arr.tobytes())
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from numcodecs.bz2 import BZ2
|
|
6
|
+
from numcodecs.tests.common import (
|
|
7
|
+
check_backwards_compatibility,
|
|
8
|
+
check_config,
|
|
9
|
+
check_encode_decode,
|
|
10
|
+
check_err_decode_object_buffer,
|
|
11
|
+
check_err_encode_object_buffer,
|
|
12
|
+
check_repr,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
codecs = [
|
|
16
|
+
BZ2(),
|
|
17
|
+
BZ2(level=1),
|
|
18
|
+
BZ2(level=5),
|
|
19
|
+
BZ2(level=9),
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# mix of dtypes: integer, float, bool, string
|
|
24
|
+
# mix of shapes: 1D, 2D, 3D
|
|
25
|
+
# mix of orders: C, F
|
|
26
|
+
arrays = [
|
|
27
|
+
np.arange(1000, dtype='i4'),
|
|
28
|
+
np.linspace(1000, 1001, 1000, dtype='f8'),
|
|
29
|
+
np.random.normal(loc=1000, scale=1, size=(100, 10)),
|
|
30
|
+
np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
|
|
31
|
+
np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10),
|
|
32
|
+
np.random.randint(0, 2**60, size=1000, dtype='u8').view('M8[ns]'),
|
|
33
|
+
np.random.randint(0, 2**60, size=1000, dtype='u8').view('m8[ns]'),
|
|
34
|
+
np.random.randint(0, 2**25, size=1000, dtype='u8').view('M8[m]'),
|
|
35
|
+
np.random.randint(0, 2**25, size=1000, dtype='u8').view('m8[m]'),
|
|
36
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[ns]'),
|
|
37
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[ns]'),
|
|
38
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[m]'),
|
|
39
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[m]'),
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_encode_decode():
|
|
44
|
+
for arr, codec in itertools.product(arrays, codecs):
|
|
45
|
+
check_encode_decode(arr, codec)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_config():
|
|
49
|
+
codec = BZ2(level=3)
|
|
50
|
+
check_config(codec)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_repr():
|
|
54
|
+
check_repr("BZ2(level=3)")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_backwards_compatibility():
|
|
58
|
+
check_backwards_compatibility(BZ2.codec_id, arrays, codecs)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_err_decode_object_buffer():
|
|
62
|
+
check_err_decode_object_buffer(BZ2())
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_err_encode_object_buffer():
|
|
66
|
+
check_err_encode_object_buffer(BZ2())
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pytest
|
|
3
|
+
from numpy.testing import assert_array_equal
|
|
4
|
+
|
|
5
|
+
from numcodecs.categorize import Categorize
|
|
6
|
+
from numcodecs.tests.common import (
|
|
7
|
+
check_backwards_compatibility,
|
|
8
|
+
check_config,
|
|
9
|
+
check_encode_decode,
|
|
10
|
+
check_encode_decode_array,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
labels = ['ƒöõ', 'ßàř', 'ßāẑ', 'ƪùüx']
|
|
14
|
+
arrays = [
|
|
15
|
+
np.random.choice(labels, size=1000),
|
|
16
|
+
np.random.choice(labels, size=(100, 10)),
|
|
17
|
+
np.random.choice(labels, size=(10, 10, 10)),
|
|
18
|
+
np.random.choice(labels, size=1000).reshape(100, 10, order='F'),
|
|
19
|
+
]
|
|
20
|
+
arrays_object = [a.astype(object) for a in arrays]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_encode_decode():
|
|
24
|
+
# unicode dtype
|
|
25
|
+
for arr in arrays:
|
|
26
|
+
codec = Categorize(labels, dtype=arr.dtype)
|
|
27
|
+
check_encode_decode(arr, codec)
|
|
28
|
+
|
|
29
|
+
# object dtype
|
|
30
|
+
for arr in arrays_object:
|
|
31
|
+
codec = Categorize(labels, dtype=arr.dtype)
|
|
32
|
+
check_encode_decode_array(arr, codec)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_encode():
|
|
36
|
+
for dtype in 'U', object:
|
|
37
|
+
arr = np.array(['ƒöõ', 'ßàř', 'ƒöõ', 'ßāẑ', 'ƪùüx'], dtype=dtype)
|
|
38
|
+
# miss off quux
|
|
39
|
+
codec = Categorize(labels=labels[:-1], dtype=arr.dtype, astype='u1')
|
|
40
|
+
|
|
41
|
+
# test encoding
|
|
42
|
+
expect = np.array([1, 2, 1, 3, 0], dtype='u1')
|
|
43
|
+
enc = codec.encode(arr)
|
|
44
|
+
assert_array_equal(expect, enc)
|
|
45
|
+
assert expect.dtype == enc.dtype
|
|
46
|
+
|
|
47
|
+
# test decoding with unexpected value
|
|
48
|
+
dec = codec.decode(enc)
|
|
49
|
+
expect = arr.copy()
|
|
50
|
+
expect[expect == 'ƪùüx'] = ''
|
|
51
|
+
assert_array_equal(expect, dec)
|
|
52
|
+
assert arr.dtype == dec.dtype
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_config():
|
|
56
|
+
codec = Categorize(labels=labels, dtype='U4')
|
|
57
|
+
check_config(codec)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_repr():
|
|
61
|
+
dtype = '<U3'
|
|
62
|
+
astype = '|u1'
|
|
63
|
+
codec = Categorize(labels=['foo', 'bar', 'baz', 'qux'], dtype=dtype, astype=astype)
|
|
64
|
+
expect = "Categorize(dtype='<U3', astype='|u1', labels=['foo', 'bar', 'baz', ...])"
|
|
65
|
+
actual = repr(codec)
|
|
66
|
+
assert expect == actual
|
|
67
|
+
|
|
68
|
+
dtype = '<U4'
|
|
69
|
+
astype = '|u1'
|
|
70
|
+
codec = Categorize(labels=labels, dtype=dtype, astype=astype)
|
|
71
|
+
expect = "Categorize(dtype='<U4', astype='|u1', labels=['ƒöõ', 'ßàř', 'ßāẑ', ...])"
|
|
72
|
+
actual = repr(codec)
|
|
73
|
+
assert expect == actual
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_backwards_compatibility():
|
|
77
|
+
codec = Categorize(labels=labels, dtype='<U4', astype='u1')
|
|
78
|
+
check_backwards_compatibility(Categorize.codec_id, arrays, [codec], prefix='U')
|
|
79
|
+
codec = Categorize(labels=labels, dtype=object, astype='u1')
|
|
80
|
+
check_backwards_compatibility(Categorize.codec_id, arrays_object, [codec], prefix='O')
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_errors():
|
|
84
|
+
with pytest.raises(TypeError):
|
|
85
|
+
Categorize(labels=['foo', 'bar'], dtype='S6')
|
|
86
|
+
with pytest.raises(TypeError):
|
|
87
|
+
Categorize(labels=['foo', 'bar'], dtype='U6', astype=object)
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
from contextlib import suppress
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from numcodecs.checksum32 import CRC32, Adler32
|
|
8
|
+
from numcodecs.tests.common import (
|
|
9
|
+
check_backwards_compatibility,
|
|
10
|
+
check_config,
|
|
11
|
+
check_encode_decode,
|
|
12
|
+
check_err_decode_object_buffer,
|
|
13
|
+
check_err_encode_object_buffer,
|
|
14
|
+
check_repr,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
has_crc32c = False
|
|
18
|
+
with suppress(ImportError):
|
|
19
|
+
from numcodecs.checksum32 import CRC32C
|
|
20
|
+
|
|
21
|
+
has_crc32c = True
|
|
22
|
+
|
|
23
|
+
# mix of dtypes: integer, float, bool, string
|
|
24
|
+
# mix of shapes: 1D, 2D, 3D
|
|
25
|
+
# mix of orders: C, F
|
|
26
|
+
arrays = [
|
|
27
|
+
np.arange(1000, dtype='i4'),
|
|
28
|
+
np.linspace(1000, 1001, 1000, dtype='f8'),
|
|
29
|
+
np.random.normal(loc=1000, scale=1, size=(100, 10)),
|
|
30
|
+
np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
|
|
31
|
+
np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10),
|
|
32
|
+
np.random.randint(0, 2**60, size=1000, dtype='u8').view('M8[ns]'),
|
|
33
|
+
np.random.randint(0, 2**60, size=1000, dtype='u8').view('m8[ns]'),
|
|
34
|
+
np.random.randint(0, 2**25, size=1000, dtype='u8').view('M8[m]'),
|
|
35
|
+
np.random.randint(0, 2**25, size=1000, dtype='u8').view('m8[m]'),
|
|
36
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[ns]'),
|
|
37
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[ns]'),
|
|
38
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[m]'),
|
|
39
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[m]'),
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
codecs = [
|
|
43
|
+
CRC32(),
|
|
44
|
+
CRC32(location="end"),
|
|
45
|
+
Adler32(),
|
|
46
|
+
Adler32(location="end"),
|
|
47
|
+
]
|
|
48
|
+
if has_crc32c:
|
|
49
|
+
codecs.extend(
|
|
50
|
+
[
|
|
51
|
+
CRC32C(location="start"),
|
|
52
|
+
CRC32C(),
|
|
53
|
+
]
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@pytest.mark.parametrize(("codec", "arr"), itertools.product(codecs, arrays))
|
|
58
|
+
def test_encode_decode(codec, arr):
|
|
59
|
+
check_encode_decode(arr, codec)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@pytest.mark.parametrize(("codec", "arr"), itertools.product(codecs, arrays))
|
|
63
|
+
def test_errors(codec, arr):
|
|
64
|
+
enc = codec.encode(arr)
|
|
65
|
+
with pytest.raises(RuntimeError):
|
|
66
|
+
codec.decode(enc[:-1])
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@pytest.mark.parametrize("codec", codecs)
|
|
70
|
+
def test_config(codec):
|
|
71
|
+
check_config(codec)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@pytest.mark.parametrize("codec", codecs)
|
|
75
|
+
def test_err_input_too_small(codec):
|
|
76
|
+
buf = b'000' # 3 bytes are too little for a 32-bit checksum
|
|
77
|
+
with pytest.raises(ValueError):
|
|
78
|
+
codec.decode(buf)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@pytest.mark.parametrize("codec", codecs)
|
|
82
|
+
def test_err_encode_non_contiguous(codec):
|
|
83
|
+
# non-contiguous memory
|
|
84
|
+
arr = np.arange(1000, dtype='i4')[::2]
|
|
85
|
+
with pytest.raises(ValueError):
|
|
86
|
+
codec.encode(arr)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@pytest.mark.parametrize("codec", codecs)
|
|
90
|
+
def test_err_encode_list(codec):
|
|
91
|
+
data = ['foo', 'bar', 'baz']
|
|
92
|
+
with pytest.raises(TypeError):
|
|
93
|
+
codec.encode(data)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_err_location():
|
|
97
|
+
with pytest.raises(ValueError):
|
|
98
|
+
CRC32(location="foo")
|
|
99
|
+
with pytest.raises(ValueError):
|
|
100
|
+
Adler32(location="foo")
|
|
101
|
+
if has_crc32c:
|
|
102
|
+
with pytest.raises(ValueError):
|
|
103
|
+
CRC32C(location="foo")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_repr():
|
|
107
|
+
check_repr("CRC32(location='start')")
|
|
108
|
+
check_repr("CRC32(location='end')")
|
|
109
|
+
check_repr("Adler32(location='start')")
|
|
110
|
+
check_repr("Adler32(location='end')")
|
|
111
|
+
if has_crc32c:
|
|
112
|
+
check_repr("CRC32C(location='start')")
|
|
113
|
+
check_repr("CRC32C(location='end')")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def test_backwards_compatibility():
|
|
117
|
+
check_backwards_compatibility(CRC32.codec_id, arrays, [CRC32()])
|
|
118
|
+
check_backwards_compatibility(Adler32.codec_id, arrays, [Adler32()])
|
|
119
|
+
if has_crc32c:
|
|
120
|
+
check_backwards_compatibility(CRC32C.codec_id, arrays, [CRC32C()])
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@pytest.mark.parametrize("codec", codecs)
|
|
124
|
+
def test_err_encode_object_buffer(codec):
|
|
125
|
+
check_err_encode_object_buffer(codec)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@pytest.mark.parametrize("codec", codecs)
|
|
129
|
+
def test_err_decode_object_buffer(codec):
|
|
130
|
+
check_err_decode_object_buffer(codec)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@pytest.mark.parametrize("codec", codecs)
|
|
134
|
+
def test_err_out_too_small(codec):
|
|
135
|
+
arr = np.arange(10, dtype='i4')
|
|
136
|
+
out = np.empty_like(arr)[:-1]
|
|
137
|
+
with pytest.raises(ValueError):
|
|
138
|
+
codec.decode(codec.encode(arr), out)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@pytest.mark.skipif(not has_crc32c, reason="Needs `crc32c` installed")
|
|
142
|
+
def test_crc32c_checksum():
|
|
143
|
+
arr = np.arange(0, 64, dtype="uint8")
|
|
144
|
+
buf = CRC32C(location="end").encode(arr)
|
|
145
|
+
assert np.frombuffer(buf, dtype="<u4", offset=(len(buf) - 4))[0] == np.uint32(4218238699)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@pytest.mark.parametrize("codec", codecs)
|
|
149
|
+
def test_err_checksum(codec):
|
|
150
|
+
arr = np.arange(0, 64, dtype="uint8")
|
|
151
|
+
buf = bytearray(codec.encode(arr))
|
|
152
|
+
buf[-1] = 0 # corrupt the checksum
|
|
153
|
+
with pytest.raises(RuntimeError):
|
|
154
|
+
codec.decode(buf)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import array
|
|
2
|
+
import mmap
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray, ensure_text
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_ensure_text():
|
|
11
|
+
bufs = [
|
|
12
|
+
b'adsdasdas',
|
|
13
|
+
'adsdasdas',
|
|
14
|
+
np.asarray(memoryview(b'adsdasdas')),
|
|
15
|
+
array.array('B', b'qwertyuiqwertyui'),
|
|
16
|
+
]
|
|
17
|
+
for buf in bufs:
|
|
18
|
+
b = ensure_text(buf)
|
|
19
|
+
assert isinstance(b, str)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_ensure_bytes():
|
|
23
|
+
bufs = [
|
|
24
|
+
b'adsdasdas',
|
|
25
|
+
bytes(20),
|
|
26
|
+
np.arange(100),
|
|
27
|
+
array.array('l', b'qwertyuiqwertyui'),
|
|
28
|
+
]
|
|
29
|
+
for buf in bufs:
|
|
30
|
+
b = ensure_bytes(buf)
|
|
31
|
+
assert isinstance(b, bytes)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_ensure_contiguous_ndarray_shares_memory():
|
|
35
|
+
typed_bufs = [
|
|
36
|
+
('u', 1, b'adsdasdas'),
|
|
37
|
+
('u', 1, bytes(20)),
|
|
38
|
+
('i', 8, np.arange(100, dtype=np.int64)),
|
|
39
|
+
('f', 8, np.linspace(0, 1, 100, dtype=np.float64)),
|
|
40
|
+
('i', 4, array.array('i', b'qwertyuiqwertyui')),
|
|
41
|
+
('u', 4, array.array('I', b'qwertyuiqwertyui')),
|
|
42
|
+
('f', 4, array.array('f', b'qwertyuiqwertyui')),
|
|
43
|
+
('f', 8, array.array('d', b'qwertyuiqwertyui')),
|
|
44
|
+
('i', 1, array.array('b', b'qwertyuiqwertyui')),
|
|
45
|
+
('u', 1, array.array('B', b'qwertyuiqwertyui')),
|
|
46
|
+
('u', 1, mmap.mmap(-1, 10)),
|
|
47
|
+
]
|
|
48
|
+
for expected_kind, expected_itemsize, buf in typed_bufs:
|
|
49
|
+
a = ensure_contiguous_ndarray(buf)
|
|
50
|
+
assert isinstance(a, np.ndarray)
|
|
51
|
+
assert expected_kind == a.dtype.kind
|
|
52
|
+
if isinstance(buf, array.array):
|
|
53
|
+
assert buf.itemsize == a.dtype.itemsize
|
|
54
|
+
else:
|
|
55
|
+
assert expected_itemsize == a.dtype.itemsize
|
|
56
|
+
assert np.shares_memory(a, memoryview(buf))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_ensure_bytes_invalid_inputs():
|
|
60
|
+
# object array not allowed
|
|
61
|
+
a = np.array(['Xin chào thế giới'], dtype=object)
|
|
62
|
+
for e in (a, memoryview(a)):
|
|
63
|
+
with pytest.raises(TypeError):
|
|
64
|
+
ensure_bytes(e)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@pytest.mark.filterwarnings(
|
|
68
|
+
"ignore:The 'u' type code is deprecated and will be removed in Python 3.16"
|
|
69
|
+
)
|
|
70
|
+
def test_ensure_contiguous_ndarray_invalid_inputs():
|
|
71
|
+
# object array not allowed
|
|
72
|
+
a = np.array(['Xin chào thế giới'], dtype=object)
|
|
73
|
+
for e in (a, memoryview(a)):
|
|
74
|
+
with pytest.raises(TypeError):
|
|
75
|
+
ensure_contiguous_ndarray(e)
|
|
76
|
+
|
|
77
|
+
# non-contiguous arrays not allowed
|
|
78
|
+
with pytest.raises(ValueError):
|
|
79
|
+
ensure_contiguous_ndarray(np.arange(100)[::2])
|
|
80
|
+
|
|
81
|
+
# unicode array.array not allowed
|
|
82
|
+
a = array.array('u', 'qwertyuiqwertyui')
|
|
83
|
+
with pytest.raises(TypeError):
|
|
84
|
+
ensure_contiguous_ndarray(a)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_ensure_contiguous_ndarray_writeable():
|
|
88
|
+
# check that the writeability of the underlying buffer is preserved
|
|
89
|
+
for writeable in (False, True):
|
|
90
|
+
a = np.arange(100)
|
|
91
|
+
a.setflags(write=writeable)
|
|
92
|
+
m = ensure_contiguous_ndarray(a)
|
|
93
|
+
assert m.flags.writeable == writeable
|
|
94
|
+
m = ensure_contiguous_ndarray(memoryview(a))
|
|
95
|
+
assert m.flags.writeable == writeable
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_ensure_contiguous_ndarray_max_buffer_size():
|
|
99
|
+
for max_buffer_size in (4, 64, 1024):
|
|
100
|
+
ensure_contiguous_ndarray(np.zeros(max_buffer_size - 1, dtype=np.int8), max_buffer_size)
|
|
101
|
+
ensure_contiguous_ndarray(np.zeros(max_buffer_size, dtype=np.int8), max_buffer_size)
|
|
102
|
+
buffers = [
|
|
103
|
+
bytes(b"x" * (max_buffer_size + 1)),
|
|
104
|
+
np.zeros(max_buffer_size + 1, dtype=np.int8),
|
|
105
|
+
np.zeros(max_buffer_size + 2, dtype=np.int8),
|
|
106
|
+
np.zeros(max_buffer_size, dtype=np.int16),
|
|
107
|
+
np.zeros(max_buffer_size, dtype=np.int32),
|
|
108
|
+
]
|
|
109
|
+
for buf in buffers:
|
|
110
|
+
with pytest.raises(ValueError):
|
|
111
|
+
ensure_contiguous_ndarray(buf, max_buffer_size=max_buffer_size)
|