numcodecs 0.16.4__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. numcodecs/__init__.py +146 -0
  2. numcodecs/_shuffle.cpython-313-darwin.so +0 -0
  3. numcodecs/abc.py +126 -0
  4. numcodecs/astype.py +72 -0
  5. numcodecs/base64.py +26 -0
  6. numcodecs/bitround.py +80 -0
  7. numcodecs/blosc.cpython-313-darwin.so +0 -0
  8. numcodecs/bz2.py +45 -0
  9. numcodecs/categorize.py +98 -0
  10. numcodecs/checksum32.py +189 -0
  11. numcodecs/compat.py +206 -0
  12. numcodecs/compat_ext.cpython-313-darwin.so +0 -0
  13. numcodecs/delta.py +94 -0
  14. numcodecs/errors.py +26 -0
  15. numcodecs/fixedscaleoffset.py +130 -0
  16. numcodecs/fletcher32.cpython-313-darwin.so +0 -0
  17. numcodecs/gzip.py +50 -0
  18. numcodecs/jenkins.cpython-313-darwin.so +0 -0
  19. numcodecs/json.py +107 -0
  20. numcodecs/lz4.cpython-313-darwin.so +0 -0
  21. numcodecs/lzma.py +71 -0
  22. numcodecs/msgpacks.py +86 -0
  23. numcodecs/ndarray_like.py +65 -0
  24. numcodecs/packbits.py +82 -0
  25. numcodecs/pcodec.py +119 -0
  26. numcodecs/pickles.py +55 -0
  27. numcodecs/quantize.py +98 -0
  28. numcodecs/registry.py +74 -0
  29. numcodecs/shuffle.py +61 -0
  30. numcodecs/tests/__init__.py +3 -0
  31. numcodecs/tests/common.py +275 -0
  32. numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
  33. numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
  34. numcodecs/tests/test_astype.py +74 -0
  35. numcodecs/tests/test_base64.py +81 -0
  36. numcodecs/tests/test_bitround.py +81 -0
  37. numcodecs/tests/test_blosc.py +290 -0
  38. numcodecs/tests/test_bz2.py +66 -0
  39. numcodecs/tests/test_categorize.py +87 -0
  40. numcodecs/tests/test_checksum32.py +199 -0
  41. numcodecs/tests/test_compat.py +111 -0
  42. numcodecs/tests/test_delta.py +61 -0
  43. numcodecs/tests/test_entrypoints.py +24 -0
  44. numcodecs/tests/test_entrypoints_backport.py +36 -0
  45. numcodecs/tests/test_fixedscaleoffset.py +77 -0
  46. numcodecs/tests/test_fletcher32.py +56 -0
  47. numcodecs/tests/test_gzip.py +110 -0
  48. numcodecs/tests/test_jenkins.py +150 -0
  49. numcodecs/tests/test_json.py +85 -0
  50. numcodecs/tests/test_lz4.py +83 -0
  51. numcodecs/tests/test_lzma.py +94 -0
  52. numcodecs/tests/test_msgpacks.py +126 -0
  53. numcodecs/tests/test_ndarray_like.py +48 -0
  54. numcodecs/tests/test_packbits.py +39 -0
  55. numcodecs/tests/test_pcodec.py +90 -0
  56. numcodecs/tests/test_pickles.py +61 -0
  57. numcodecs/tests/test_pyzstd.py +76 -0
  58. numcodecs/tests/test_quantize.py +76 -0
  59. numcodecs/tests/test_registry.py +43 -0
  60. numcodecs/tests/test_shuffle.py +166 -0
  61. numcodecs/tests/test_vlen_array.py +97 -0
  62. numcodecs/tests/test_vlen_bytes.py +93 -0
  63. numcodecs/tests/test_vlen_utf8.py +91 -0
  64. numcodecs/tests/test_zarr3.py +48 -0
  65. numcodecs/tests/test_zarr3_import.py +13 -0
  66. numcodecs/tests/test_zfpy.py +104 -0
  67. numcodecs/tests/test_zlib.py +94 -0
  68. numcodecs/tests/test_zstd.py +189 -0
  69. numcodecs/version.py +34 -0
  70. numcodecs/vlen.cpython-313-darwin.so +0 -0
  71. numcodecs/zarr3.py +67 -0
  72. numcodecs/zfpy.py +112 -0
  73. numcodecs/zlib.py +42 -0
  74. numcodecs/zstd.cpython-313-darwin.so +0 -0
  75. numcodecs-0.16.4.dist-info/METADATA +67 -0
  76. numcodecs-0.16.4.dist-info/RECORD +87 -0
  77. numcodecs-0.16.4.dist-info/WHEEL +6 -0
  78. numcodecs-0.16.4.dist-info/licenses/LICENSE.txt +21 -0
  79. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSE.txt +31 -0
  80. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/BITSHUFFLE.txt +21 -0
  81. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/FASTLZ.txt +20 -0
  82. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/LZ4.txt +25 -0
  83. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/SNAPPY.txt +28 -0
  84. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/STDINT.txt +29 -0
  85. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/ZLIB-NG.txt +17 -0
  86. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/ZLIB.txt +22 -0
  87. numcodecs-0.16.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,290 @@
1
+ from multiprocessing import Pool
2
+ from multiprocessing.pool import ThreadPool
3
+
4
+ import numpy as np
5
+ import pytest
6
+
7
+ try:
8
+ from numcodecs import blosc
9
+ from numcodecs.blosc import Blosc
10
+ except ImportError: # pragma: no cover
11
+ pytest.skip("numcodecs.blosc not available", allow_module_level=True)
12
+
13
+
14
+ from numcodecs.tests.common import (
15
+ check_backwards_compatibility,
16
+ check_config,
17
+ check_encode_decode,
18
+ check_err_decode_object_buffer,
19
+ check_err_encode_object_buffer,
20
+ check_max_buffer_size,
21
+ )
22
+
23
+ codecs = [
24
+ Blosc(shuffle=Blosc.SHUFFLE),
25
+ Blosc(clevel=0, shuffle=Blosc.SHUFFLE),
26
+ Blosc(cname='lz4', shuffle=Blosc.SHUFFLE),
27
+ Blosc(cname='lz4', clevel=1, shuffle=Blosc.NOSHUFFLE),
28
+ Blosc(cname='lz4', clevel=5, shuffle=Blosc.SHUFFLE),
29
+ Blosc(cname='lz4', clevel=9, shuffle=Blosc.BITSHUFFLE),
30
+ Blosc(cname='zlib', clevel=1, shuffle=0),
31
+ Blosc(cname='zstd', clevel=1, shuffle=1),
32
+ Blosc(cname='blosclz', clevel=1, shuffle=2),
33
+ None, # was snappy
34
+ Blosc(shuffle=Blosc.SHUFFLE, blocksize=0),
35
+ Blosc(shuffle=Blosc.SHUFFLE, blocksize=2**8),
36
+ Blosc(cname='lz4', clevel=1, shuffle=Blosc.NOSHUFFLE, blocksize=2**8),
37
+ ]
38
+
39
+
40
+ # mix of dtypes: integer, float, bool, string
41
+ # mix of shapes: 1D, 2D, 3D
42
+ # mix of orders: C, F
43
+ arrays = [
44
+ np.arange(1000, dtype='i4'),
45
+ np.linspace(1000, 1001, 1000, dtype='f8'),
46
+ np.random.normal(loc=1000, scale=1, size=(100, 10)),
47
+ np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
48
+ np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10),
49
+ np.random.randint(0, 2**60, size=1000, dtype='u8').view('M8[ns]'),
50
+ np.random.randint(0, 2**60, size=1000, dtype='u8').view('m8[ns]'),
51
+ np.random.randint(0, 2**25, size=1000, dtype='u8').view('M8[m]'),
52
+ np.random.randint(0, 2**25, size=1000, dtype='u8').view('m8[m]'),
53
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[ns]'),
54
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[ns]'),
55
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[m]'),
56
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[m]'),
57
+ ]
58
+
59
+
60
+ def _skip_null(codec):
61
+ if codec is None:
62
+ pytest.skip("codec has been removed")
63
+
64
+
65
+ @pytest.fixture(scope='module', params=[True, False, None])
66
+ def use_threads(request):
67
+ return request.param
68
+
69
+
70
+ @pytest.mark.parametrize('array', arrays)
71
+ @pytest.mark.parametrize('codec', codecs)
72
+ def test_encode_decode(array, codec):
73
+ _skip_null(codec)
74
+ check_encode_decode(array, codec)
75
+
76
+
77
+ def test_config():
78
+ codec = Blosc(cname='zstd', clevel=3, shuffle=1)
79
+ check_config(codec)
80
+ codec = Blosc(cname='lz4', clevel=1, shuffle=2, blocksize=2**8)
81
+ check_config(codec)
82
+
83
+
84
+ def test_repr():
85
+ expect = "Blosc(cname='zstd', clevel=3, shuffle=SHUFFLE, blocksize=0)"
86
+ actual = repr(Blosc(cname='zstd', clevel=3, shuffle=Blosc.SHUFFLE, blocksize=0))
87
+ assert expect == actual
88
+ expect = "Blosc(cname='lz4', clevel=1, shuffle=NOSHUFFLE, blocksize=256)"
89
+ actual = repr(Blosc(cname='lz4', clevel=1, shuffle=Blosc.NOSHUFFLE, blocksize=256))
90
+ assert expect == actual
91
+ expect = "Blosc(cname='zlib', clevel=9, shuffle=BITSHUFFLE, blocksize=512)"
92
+ actual = repr(Blosc(cname='zlib', clevel=9, shuffle=Blosc.BITSHUFFLE, blocksize=512))
93
+ assert expect == actual
94
+ expect = "Blosc(cname='blosclz', clevel=5, shuffle=AUTOSHUFFLE, blocksize=1024)"
95
+ actual = repr(Blosc(cname='blosclz', clevel=5, shuffle=Blosc.AUTOSHUFFLE, blocksize=1024))
96
+ assert expect == actual
97
+
98
+
99
+ def test_eq():
100
+ assert Blosc() == Blosc()
101
+ assert Blosc(cname='lz4') != Blosc(cname='zstd')
102
+ assert Blosc(clevel=1) != Blosc(clevel=9)
103
+ assert Blosc(cname='lz4') != 'foo'
104
+
105
+
106
+ def test_compress_blocksize_default(use_threads):
107
+ arr = np.arange(1000, dtype='i4')
108
+
109
+ blosc.use_threads = use_threads
110
+
111
+ # default blocksize
112
+ enc = blosc.compress(arr, b'lz4', 1, Blosc.NOSHUFFLE)
113
+ _, _, blocksize = blosc._cbuffer_sizes(enc)
114
+ assert blocksize > 0
115
+
116
+ # explicit default blocksize
117
+ enc = blosc.compress(arr, b'lz4', 1, Blosc.NOSHUFFLE, 0)
118
+ _, _, blocksize = blosc._cbuffer_sizes(enc)
119
+ assert blocksize > 0
120
+
121
+
122
+ @pytest.mark.parametrize('bs', [2**7, 2**8])
123
+ def test_compress_blocksize(use_threads, bs):
124
+ arr = np.arange(1000, dtype='i4')
125
+
126
+ blosc.use_threads = use_threads
127
+
128
+ enc = blosc.compress(arr, b'lz4', 1, Blosc.NOSHUFFLE, bs)
129
+ _, _, blocksize = blosc._cbuffer_sizes(enc)
130
+ assert blocksize == bs
131
+
132
+
133
+ def test_compress_complib(use_threads):
134
+ arr = np.arange(1000, dtype='i4')
135
+ expected_complibs = {
136
+ 'lz4': 'LZ4',
137
+ 'lz4hc': 'LZ4',
138
+ 'blosclz': 'BloscLZ',
139
+ 'zlib': 'Zlib',
140
+ 'zstd': 'Zstd',
141
+ }
142
+ blosc.use_threads = use_threads
143
+ for cname in blosc.list_compressors():
144
+ enc = blosc.compress(arr, cname.encode(), 1, Blosc.NOSHUFFLE)
145
+ complib = blosc.cbuffer_complib(enc)
146
+ expected_complib = expected_complibs[cname]
147
+ assert complib == expected_complib
148
+ with pytest.raises(ValueError):
149
+ # capitalized cname
150
+ blosc.compress(arr, b'LZ4', 1)
151
+ with pytest.raises(ValueError):
152
+ # bad cname
153
+ blosc.compress(arr, b'foo', 1)
154
+
155
+
156
+ @pytest.mark.parametrize('dtype', ['i1', 'i2', 'i4', 'i8'])
157
+ def test_compress_metainfo(dtype, use_threads):
158
+ arr = np.arange(1000, dtype=dtype)
159
+ for shuffle in Blosc.NOSHUFFLE, Blosc.SHUFFLE, Blosc.BITSHUFFLE:
160
+ blosc.use_threads = use_threads
161
+ for cname in blosc.list_compressors():
162
+ enc = blosc.compress(arr, cname.encode(), 1, shuffle)
163
+ typesize, did_shuffle, _ = blosc._cbuffer_metainfo(enc)
164
+ assert typesize == arr.dtype.itemsize
165
+ assert did_shuffle == shuffle
166
+
167
+
168
+ def test_compress_autoshuffle(use_threads):
169
+ arr = np.arange(8000)
170
+ for dtype in 'i1', 'i2', 'i4', 'i8', 'f2', 'f4', 'f8', 'bool', 'S10':
171
+ varr = arr.view(dtype)
172
+ blosc.use_threads = use_threads
173
+ for cname in blosc.list_compressors():
174
+ enc = blosc.compress(varr, cname.encode(), 1, Blosc.AUTOSHUFFLE)
175
+ typesize, did_shuffle, _ = blosc._cbuffer_metainfo(enc)
176
+ assert typesize == varr.dtype.itemsize
177
+ if typesize == 1:
178
+ assert did_shuffle == Blosc.BITSHUFFLE
179
+ else:
180
+ assert did_shuffle == Blosc.SHUFFLE
181
+
182
+
183
+ def test_config_blocksize():
184
+ # N.B., we want to be backwards compatible with any config where blocksize is not
185
+ # explicitly stated
186
+
187
+ # blocksize not stated
188
+ config = {"cname": 'lz4', "clevel": 1, "shuffle": Blosc.SHUFFLE}
189
+ codec = Blosc.from_config(config)
190
+ assert codec.blocksize == 0
191
+
192
+ # blocksize stated
193
+ config = {"cname": 'lz4', "clevel": 1, "shuffle": Blosc.SHUFFLE, "blocksize": 2**8}
194
+ codec = Blosc.from_config(config)
195
+ assert codec.blocksize == 2**8
196
+
197
+
198
+ def test_backwards_compatibility():
199
+ check_backwards_compatibility(Blosc.codec_id, arrays, codecs)
200
+
201
+
202
+ def _encode_worker(data):
203
+ compressor = Blosc(cname='zlib', clevel=9, shuffle=Blosc.SHUFFLE)
204
+ return compressor.encode(data)
205
+
206
+
207
+ def _decode_worker(enc):
208
+ compressor = Blosc()
209
+ return compressor.decode(enc)
210
+
211
+
212
+ @pytest.mark.parametrize('pool', [Pool, ThreadPool])
213
+ def test_multiprocessing(use_threads, pool):
214
+ data = np.arange(1000000)
215
+ enc = _encode_worker(data)
216
+
217
+ pool = pool(5)
218
+
219
+ try:
220
+ blosc.use_threads = use_threads
221
+
222
+ # test with process pool and thread pool
223
+
224
+ # test encoding
225
+ enc_results = pool.map(_encode_worker, [data] * 5)
226
+ assert all(len(enc) == len(e) for e in enc_results)
227
+
228
+ # test decoding
229
+ dec_results = pool.map(_decode_worker, [enc] * 5)
230
+ assert all(data.nbytes == len(d) for d in dec_results)
231
+
232
+ # tidy up
233
+ pool.close()
234
+ pool.join()
235
+
236
+ finally:
237
+ blosc.use_threads = None # restore default
238
+
239
+
240
+ def test_err_decode_object_buffer():
241
+ check_err_decode_object_buffer(Blosc())
242
+
243
+
244
+ def test_err_encode_object_buffer():
245
+ check_err_encode_object_buffer(Blosc())
246
+
247
+
248
+ @pytest.mark.parametrize('codec', codecs)
249
+ def test_decompression_error_handling(codec):
250
+ _skip_null(codec)
251
+ with pytest.raises(RuntimeError):
252
+ codec.decode(bytearray())
253
+ with pytest.raises(RuntimeError):
254
+ codec.decode(bytearray(0))
255
+
256
+
257
+ @pytest.mark.parametrize('codec', codecs)
258
+ def test_max_buffer_size(codec):
259
+ _skip_null(codec)
260
+ assert codec.max_buffer_size == 2**31 - 1
261
+ check_max_buffer_size(codec)
262
+
263
+
264
+ def test_typesize_explicit():
265
+ arr = np.arange(100).astype("int64")
266
+ itemsize = arr.itemsize
267
+ codec_no_type_size = Blosc(shuffle=Blosc.SHUFFLE)
268
+ codec_itemsize = Blosc(shuffle=Blosc.SHUFFLE, typesize=itemsize)
269
+ encoded_without_itemsize = codec_no_type_size.encode(arr.tobytes())
270
+ encoded_with_itemsize = codec_itemsize.encode(arr.tobytes())
271
+ # third byte encodes the `typesize`
272
+ assert encoded_without_itemsize[3] == 1 # inferred from bytes i.e., 1
273
+ assert encoded_with_itemsize[3] == itemsize # given as a constructor argument
274
+
275
+
276
+ def test_typesize_less_than_1():
277
+ with pytest.raises(ValueError, match=r"Cannot use typesize"):
278
+ Blosc(shuffle=Blosc.SHUFFLE, typesize=0)
279
+ compressor = Blosc(shuffle=Blosc.SHUFFLE)
280
+ # not really something that should be done in practice, but good for testing.
281
+ compressor._typesize = 0
282
+ arr = np.arange(100)
283
+ with pytest.raises(ValueError, match=r"Cannot use typesize"):
284
+ compressor.encode(arr.tobytes())
285
+
286
+
287
+ def test_config_no_typesize():
288
+ codec = Blosc(shuffle=Blosc.SHUFFLE, typesize=5)
289
+ config = codec.get_config()
290
+ assert "typesize" not in config
@@ -0,0 +1,66 @@
1
+ import itertools
2
+
3
+ import numpy as np
4
+
5
+ from numcodecs.bz2 import BZ2
6
+ from numcodecs.tests.common import (
7
+ check_backwards_compatibility,
8
+ check_config,
9
+ check_encode_decode,
10
+ check_err_decode_object_buffer,
11
+ check_err_encode_object_buffer,
12
+ check_repr,
13
+ )
14
+
15
+ codecs = [
16
+ BZ2(),
17
+ BZ2(level=1),
18
+ BZ2(level=5),
19
+ BZ2(level=9),
20
+ ]
21
+
22
+
23
+ # mix of dtypes: integer, float, bool, string
24
+ # mix of shapes: 1D, 2D, 3D
25
+ # mix of orders: C, F
26
+ arrays = [
27
+ np.arange(1000, dtype='i4'),
28
+ np.linspace(1000, 1001, 1000, dtype='f8'),
29
+ np.random.normal(loc=1000, scale=1, size=(100, 10)),
30
+ np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
31
+ np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10),
32
+ np.random.randint(0, 2**60, size=1000, dtype='u8').view('M8[ns]'),
33
+ np.random.randint(0, 2**60, size=1000, dtype='u8').view('m8[ns]'),
34
+ np.random.randint(0, 2**25, size=1000, dtype='u8').view('M8[m]'),
35
+ np.random.randint(0, 2**25, size=1000, dtype='u8').view('m8[m]'),
36
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[ns]'),
37
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[ns]'),
38
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[m]'),
39
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[m]'),
40
+ ]
41
+
42
+
43
+ def test_encode_decode():
44
+ for arr, codec in itertools.product(arrays, codecs):
45
+ check_encode_decode(arr, codec)
46
+
47
+
48
+ def test_config():
49
+ codec = BZ2(level=3)
50
+ check_config(codec)
51
+
52
+
53
+ def test_repr():
54
+ check_repr("BZ2(level=3)")
55
+
56
+
57
+ def test_backwards_compatibility():
58
+ check_backwards_compatibility(BZ2.codec_id, arrays, codecs)
59
+
60
+
61
+ def test_err_decode_object_buffer():
62
+ check_err_decode_object_buffer(BZ2())
63
+
64
+
65
+ def test_err_encode_object_buffer():
66
+ check_err_encode_object_buffer(BZ2())
@@ -0,0 +1,87 @@
1
+ import numpy as np
2
+ import pytest
3
+ from numpy.testing import assert_array_equal
4
+
5
+ from numcodecs.categorize import Categorize
6
+ from numcodecs.tests.common import (
7
+ check_backwards_compatibility,
8
+ check_config,
9
+ check_encode_decode,
10
+ check_encode_decode_array,
11
+ )
12
+
13
+ labels = ['ƒöõ', 'ßàř', 'ßāẑ', 'ƪùüx']
14
+ arrays = [
15
+ np.random.choice(labels, size=1000),
16
+ np.random.choice(labels, size=(100, 10)),
17
+ np.random.choice(labels, size=(10, 10, 10)),
18
+ np.random.choice(labels, size=1000).reshape(100, 10, order='F'),
19
+ ]
20
+ arrays_object = [a.astype(object) for a in arrays]
21
+
22
+
23
+ def test_encode_decode():
24
+ # unicode dtype
25
+ for arr in arrays:
26
+ codec = Categorize(labels, dtype=arr.dtype)
27
+ check_encode_decode(arr, codec)
28
+
29
+ # object dtype
30
+ for arr in arrays_object:
31
+ codec = Categorize(labels, dtype=arr.dtype)
32
+ check_encode_decode_array(arr, codec)
33
+
34
+
35
+ def test_encode():
36
+ for dtype in 'U', object:
37
+ arr = np.array(['ƒöõ', 'ßàř', 'ƒöõ', 'ßāẑ', 'ƪùüx'], dtype=dtype)
38
+ # miss off quux
39
+ codec = Categorize(labels=labels[:-1], dtype=arr.dtype, astype='u1')
40
+
41
+ # test encoding
42
+ expect = np.array([1, 2, 1, 3, 0], dtype='u1')
43
+ enc = codec.encode(arr)
44
+ assert_array_equal(expect, enc)
45
+ assert expect.dtype == enc.dtype
46
+
47
+ # test decoding with unexpected value
48
+ dec = codec.decode(enc)
49
+ expect = arr.copy()
50
+ expect[expect == 'ƪùüx'] = ''
51
+ assert_array_equal(expect, dec)
52
+ assert arr.dtype == dec.dtype
53
+
54
+
55
+ def test_config():
56
+ codec = Categorize(labels=labels, dtype='U4')
57
+ check_config(codec)
58
+
59
+
60
+ def test_repr():
61
+ dtype = '<U3'
62
+ astype = '|u1'
63
+ codec = Categorize(labels=['foo', 'bar', 'baz', 'qux'], dtype=dtype, astype=astype)
64
+ expect = "Categorize(dtype='<U3', astype='|u1', labels=['foo', 'bar', 'baz', ...])"
65
+ actual = repr(codec)
66
+ assert expect == actual
67
+
68
+ dtype = '<U4'
69
+ astype = '|u1'
70
+ codec = Categorize(labels=labels, dtype=dtype, astype=astype)
71
+ expect = "Categorize(dtype='<U4', astype='|u1', labels=['ƒöõ', 'ßàř', 'ßāẑ', ...])"
72
+ actual = repr(codec)
73
+ assert expect == actual
74
+
75
+
76
+ def test_backwards_compatibility():
77
+ codec = Categorize(labels=labels, dtype='<U4', astype='u1')
78
+ check_backwards_compatibility(Categorize.codec_id, arrays, [codec], prefix='U')
79
+ codec = Categorize(labels=labels, dtype=object, astype='u1')
80
+ check_backwards_compatibility(Categorize.codec_id, arrays_object, [codec], prefix='O')
81
+
82
+
83
+ def test_errors():
84
+ with pytest.raises(TypeError):
85
+ Categorize(labels=['foo', 'bar'], dtype='S6')
86
+ with pytest.raises(TypeError):
87
+ Categorize(labels=['foo', 'bar'], dtype='U6', astype=object)
@@ -0,0 +1,199 @@
1
+ import itertools
2
+ from contextlib import suppress
3
+
4
+ import numpy as np
5
+ import pytest
6
+
7
+ from numcodecs.checksum32 import CRC32, Adler32
8
+ from numcodecs.tests.common import (
9
+ check_backwards_compatibility,
10
+ check_config,
11
+ check_encode_decode,
12
+ check_err_decode_object_buffer,
13
+ check_err_encode_object_buffer,
14
+ check_repr,
15
+ )
16
+
17
+ has_crc32c = False
18
+ with suppress(ImportError):
19
+ from numcodecs.checksum32 import CRC32C
20
+
21
+ has_crc32c = True
22
+
23
+ # mix of dtypes: integer, float, bool, string
24
+ # mix of shapes: 1D, 2D, 3D
25
+ # mix of orders: C, F
26
+ arrays = [
27
+ np.arange(1000, dtype='i4'),
28
+ np.linspace(1000, 1001, 1000, dtype='f8'),
29
+ np.random.normal(loc=1000, scale=1, size=(100, 10)),
30
+ np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
31
+ np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10),
32
+ np.random.randint(0, 2**60, size=1000, dtype='u8').view('M8[ns]'),
33
+ np.random.randint(0, 2**60, size=1000, dtype='u8').view('m8[ns]'),
34
+ np.random.randint(0, 2**25, size=1000, dtype='u8').view('M8[m]'),
35
+ np.random.randint(0, 2**25, size=1000, dtype='u8').view('m8[m]'),
36
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[ns]'),
37
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[ns]'),
38
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[m]'),
39
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[m]'),
40
+ ]
41
+
42
+ base_codecs = [
43
+ CRC32(),
44
+ CRC32(location="end"),
45
+ Adler32(),
46
+ Adler32(location="end"),
47
+ ]
48
+
49
+
50
+ def get_all_codecs():
51
+ codecs = base_codecs.copy()
52
+ if has_crc32c:
53
+ codecs.extend(
54
+ [
55
+ CRC32C(location="start"),
56
+ CRC32C(),
57
+ ]
58
+ )
59
+ return codecs
60
+
61
+
62
+ @pytest.mark.parametrize(("codec", "arr"), itertools.product(get_all_codecs(), arrays))
63
+ def test_encode_decode(codec, arr):
64
+ check_encode_decode(arr, codec)
65
+
66
+
67
+ @pytest.mark.parametrize(("codec", "arr"), itertools.product(get_all_codecs(), arrays))
68
+ def test_errors(codec, arr):
69
+ enc = codec.encode(arr)
70
+ with pytest.raises(RuntimeError):
71
+ codec.decode(enc[:-1])
72
+
73
+
74
+ @pytest.mark.parametrize("codec", get_all_codecs())
75
+ def test_config(codec):
76
+ check_config(codec)
77
+
78
+
79
+ @pytest.mark.parametrize("codec", get_all_codecs())
80
+ def test_err_input_too_small(codec):
81
+ buf = b'000' # 3 bytes are too little for a 32-bit checksum
82
+ with pytest.raises(ValueError):
83
+ codec.decode(buf)
84
+
85
+
86
+ @pytest.mark.parametrize("codec", get_all_codecs())
87
+ def test_err_encode_non_contiguous(codec):
88
+ # non-contiguous memory
89
+ arr = np.arange(1000, dtype='i4')[::2]
90
+ with pytest.raises(ValueError):
91
+ codec.encode(arr)
92
+
93
+
94
+ @pytest.mark.parametrize("codec", get_all_codecs())
95
+ def test_err_encode_list(codec):
96
+ data = ['foo', 'bar', 'baz']
97
+ with pytest.raises(TypeError):
98
+ codec.encode(data)
99
+
100
+
101
+ def test_err_location():
102
+ with pytest.raises(ValueError):
103
+ CRC32(location="foo")
104
+ with pytest.raises(ValueError):
105
+ Adler32(location="foo")
106
+ if not has_crc32c:
107
+ pytest.skip("Needs `crc32c` installed")
108
+ with pytest.raises(ValueError):
109
+ CRC32C(location="foo")
110
+
111
+
112
+ @pytest.mark.parametrize(
113
+ "repr_str",
114
+ [
115
+ "CRC32(location='start')",
116
+ "CRC32(location='end')",
117
+ "Adler32(location='start')",
118
+ "Adler32(location='end')",
119
+ pytest.param(
120
+ "CRC32C(location='start')",
121
+ marks=pytest.mark.skipif(not has_crc32c, reason="Needs `crc32c` installed"),
122
+ ),
123
+ pytest.param(
124
+ "CRC32C(location='end')",
125
+ marks=pytest.mark.skipif(not has_crc32c, reason="Needs `crc32c` installed"),
126
+ ),
127
+ ],
128
+ )
129
+ def test_repr(repr_str):
130
+ check_repr(repr_str)
131
+
132
+
133
+ @pytest.mark.parametrize(
134
+ ('codec_id', 'codec_instance'),
135
+ [
136
+ (CRC32.codec_id, CRC32()),
137
+ (Adler32.codec_id, Adler32()),
138
+ ],
139
+ )
140
+ def test_backwards_compatibility(codec_id, codec_instance):
141
+ check_backwards_compatibility(codec_id, arrays, [codec_instance])
142
+
143
+
144
+ @pytest.mark.skipif(not has_crc32c, reason="Needs `crc32c` installed")
145
+ def test_backwards_compatibility_crc32c():
146
+ check_backwards_compatibility(CRC32C.codec_id, arrays, [CRC32C()])
147
+
148
+
149
+ @pytest.mark.parametrize("codec", get_all_codecs())
150
+ def test_err_encode_object_buffer(codec):
151
+ check_err_encode_object_buffer(codec)
152
+
153
+
154
+ @pytest.mark.parametrize("codec", get_all_codecs())
155
+ def test_err_decode_object_buffer(codec):
156
+ check_err_decode_object_buffer(codec)
157
+
158
+
159
+ @pytest.mark.parametrize("codec", get_all_codecs())
160
+ def test_err_out_too_small(codec):
161
+ arr = np.arange(10, dtype='i4')
162
+ out = np.empty_like(arr)[:-1]
163
+ with pytest.raises(ValueError):
164
+ codec.decode(codec.encode(arr), out)
165
+
166
+
167
+ @pytest.mark.skipif(not has_crc32c, reason="Needs `crc32c` installed")
168
+ def test_crc32c_checksum():
169
+ arr = np.arange(0, 64, dtype="uint8")
170
+ buf = CRC32C(location="end").encode(arr)
171
+ assert np.frombuffer(buf, dtype="<u4", offset=(len(buf) - 4))[0] == np.uint32(4218238699)
172
+
173
+
174
+ @pytest.mark.skipif(not has_crc32c, reason="Needs `crc32c` installed")
175
+ def test_crc32c_incremental():
176
+ """Test that CRC32C.checksum supports incremental calculation via value parameter."""
177
+ # Test incremental checksum calculation (for API compatibility)
178
+ data1 = np.frombuffer(b"hello", dtype='uint8')
179
+ data2 = np.frombuffer(b" world", dtype='uint8')
180
+ full_data = np.frombuffer(b"hello world", dtype='uint8')
181
+
182
+ # Calculate checksum in one go
183
+ checksum_full = CRC32C.checksum(full_data)
184
+
185
+ # Calculate incrementally using the value parameter
186
+ checksum_part1 = CRC32C.checksum(data1, 0)
187
+ checksum_part2 = CRC32C.checksum(data2, checksum_part1)
188
+
189
+ # Both methods should produce the same result
190
+ assert checksum_full == checksum_part2
191
+
192
+
193
+ @pytest.mark.parametrize("codec", get_all_codecs())
194
+ def test_err_checksum(codec):
195
+ arr = np.arange(0, 64, dtype="uint8")
196
+ buf = bytearray(codec.encode(arr))
197
+ buf[-1] = 0 # corrupt the checksum
198
+ with pytest.raises(RuntimeError):
199
+ codec.decode(buf)