numcodecs 0.16.0__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numcodecs might be problematic. Click here for more details.

Files changed (79) hide show
  1. numcodecs/__init__.py +146 -0
  2. numcodecs/_shuffle.cpython-312-aarch64-linux-gnu.so +0 -0
  3. numcodecs/abc.py +127 -0
  4. numcodecs/astype.py +72 -0
  5. numcodecs/base64.py +26 -0
  6. numcodecs/bitround.py +80 -0
  7. numcodecs/blosc.cpython-312-aarch64-linux-gnu.so +0 -0
  8. numcodecs/bz2.py +45 -0
  9. numcodecs/categorize.py +98 -0
  10. numcodecs/checksum32.py +183 -0
  11. numcodecs/compat.py +206 -0
  12. numcodecs/compat_ext.cpython-312-aarch64-linux-gnu.so +0 -0
  13. numcodecs/delta.py +94 -0
  14. numcodecs/errors.py +26 -0
  15. numcodecs/fixedscaleoffset.py +130 -0
  16. numcodecs/fletcher32.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numcodecs/gzip.py +50 -0
  18. numcodecs/jenkins.cpython-312-aarch64-linux-gnu.so +0 -0
  19. numcodecs/json.py +107 -0
  20. numcodecs/lz4.cpython-312-aarch64-linux-gnu.so +0 -0
  21. numcodecs/lzma.py +72 -0
  22. numcodecs/msgpacks.py +86 -0
  23. numcodecs/ndarray_like.py +65 -0
  24. numcodecs/packbits.py +82 -0
  25. numcodecs/pcodec.py +118 -0
  26. numcodecs/pickles.py +55 -0
  27. numcodecs/quantize.py +98 -0
  28. numcodecs/registry.py +74 -0
  29. numcodecs/shuffle.py +61 -0
  30. numcodecs/tests/__init__.py +3 -0
  31. numcodecs/tests/common.py +285 -0
  32. numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
  33. numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
  34. numcodecs/tests/test_astype.py +74 -0
  35. numcodecs/tests/test_base64.py +81 -0
  36. numcodecs/tests/test_bitround.py +81 -0
  37. numcodecs/tests/test_blosc.py +284 -0
  38. numcodecs/tests/test_bz2.py +66 -0
  39. numcodecs/tests/test_categorize.py +87 -0
  40. numcodecs/tests/test_checksum32.py +154 -0
  41. numcodecs/tests/test_compat.py +111 -0
  42. numcodecs/tests/test_delta.py +61 -0
  43. numcodecs/tests/test_entrypoints.py +24 -0
  44. numcodecs/tests/test_entrypoints_backport.py +36 -0
  45. numcodecs/tests/test_fixedscaleoffset.py +77 -0
  46. numcodecs/tests/test_fletcher32.py +56 -0
  47. numcodecs/tests/test_gzip.py +110 -0
  48. numcodecs/tests/test_jenkins.py +150 -0
  49. numcodecs/tests/test_json.py +85 -0
  50. numcodecs/tests/test_lz4.py +83 -0
  51. numcodecs/tests/test_lzma.py +94 -0
  52. numcodecs/tests/test_msgpacks.py +126 -0
  53. numcodecs/tests/test_ndarray_like.py +48 -0
  54. numcodecs/tests/test_packbits.py +39 -0
  55. numcodecs/tests/test_pcodec.py +90 -0
  56. numcodecs/tests/test_pickles.py +61 -0
  57. numcodecs/tests/test_quantize.py +76 -0
  58. numcodecs/tests/test_registry.py +43 -0
  59. numcodecs/tests/test_shuffle.py +166 -0
  60. numcodecs/tests/test_vlen_array.py +97 -0
  61. numcodecs/tests/test_vlen_bytes.py +97 -0
  62. numcodecs/tests/test_vlen_utf8.py +91 -0
  63. numcodecs/tests/test_zarr3.py +279 -0
  64. numcodecs/tests/test_zarr3_import.py +13 -0
  65. numcodecs/tests/test_zfpy.py +104 -0
  66. numcodecs/tests/test_zlib.py +94 -0
  67. numcodecs/tests/test_zstd.py +92 -0
  68. numcodecs/version.py +21 -0
  69. numcodecs/vlen.cpython-312-aarch64-linux-gnu.so +0 -0
  70. numcodecs/zarr3.py +401 -0
  71. numcodecs/zfpy.py +113 -0
  72. numcodecs/zlib.py +42 -0
  73. numcodecs/zstd.cpython-312-aarch64-linux-gnu.so +0 -0
  74. numcodecs-0.16.0.dist-info/METADATA +66 -0
  75. numcodecs-0.16.0.dist-info/RECORD +79 -0
  76. numcodecs-0.16.0.dist-info/WHEEL +6 -0
  77. numcodecs-0.16.0.dist-info/entry_points.txt +22 -0
  78. numcodecs-0.16.0.dist-info/licenses/LICENSE.txt +21 -0
  79. numcodecs-0.16.0.dist-info/top_level.txt +1 -0
numcodecs/registry.py ADDED
@@ -0,0 +1,74 @@
1
+ """The registry module provides some simple convenience functions to enable
2
+ applications to dynamically register and look-up codec classes."""
3
+
4
+ import logging
5
+ from importlib.metadata import EntryPoints, entry_points
6
+
7
+ from numcodecs.abc import Codec
8
+ from numcodecs.errors import UnknownCodecError
9
+
10
+ logger = logging.getLogger("numcodecs")
11
+ codec_registry: dict[str, Codec] = {}
12
+ entries: dict[str, EntryPoints] = {}
13
+
14
+
15
+ def run_entrypoints():
16
+ entries.clear()
17
+ eps = entry_points()
18
+ entries.update({e.name: e for e in eps.select(group="numcodecs.codecs")})
19
+
20
+
21
+ run_entrypoints()
22
+
23
+
24
+ def get_codec(config):
25
+ """Obtain a codec for the given configuration.
26
+
27
+ Parameters
28
+ ----------
29
+ config : dict-like
30
+ Configuration object.
31
+
32
+ Returns
33
+ -------
34
+ codec : Codec
35
+
36
+ Examples
37
+ --------
38
+
39
+ >>> import numcodecs as codecs
40
+ >>> codec = codecs.get_codec(dict(id='zlib', level=1))
41
+ >>> codec
42
+ Zlib(level=1)
43
+
44
+ """
45
+ config = dict(config)
46
+ codec_id = config.pop('id', None)
47
+ cls = codec_registry.get(codec_id)
48
+ if cls is None and codec_id in entries:
49
+ logger.debug("Auto loading codec '%s' from entrypoint", codec_id)
50
+ cls = entries[codec_id].load()
51
+ register_codec(cls, codec_id=codec_id)
52
+ if cls:
53
+ return cls.from_config(config)
54
+ raise UnknownCodecError(f"{codec_id!r}")
55
+
56
+
57
+ def register_codec(cls, codec_id=None):
58
+ """Register a codec class.
59
+
60
+ Parameters
61
+ ----------
62
+ cls : Codec class
63
+
64
+ Notes
65
+ -----
66
+ This function maintains a mapping from codec identifiers to codec
67
+ classes. When a codec class is registered, it will replace any class
68
+ previously registered under the same codec identifier, if present.
69
+
70
+ """
71
+ if codec_id is None:
72
+ codec_id = cls.codec_id
73
+ logger.debug("Registering codec '%s'", codec_id)
74
+ codec_registry[codec_id] = cls
numcodecs/shuffle.py ADDED
@@ -0,0 +1,61 @@
1
+ import numpy as np
2
+
3
+ from ._shuffle import _doShuffle, _doUnshuffle
4
+ from .abc import Codec
5
+ from .compat import ensure_contiguous_ndarray
6
+
7
+
8
+ class Shuffle(Codec):
9
+ """Codec providing shuffle
10
+
11
+ Parameters
12
+ ----------
13
+ elementsize : int
14
+ Size in bytes of the array elements. Default = 4
15
+
16
+ """
17
+
18
+ codec_id = 'shuffle'
19
+
20
+ def __init__(self, elementsize=4):
21
+ self.elementsize = elementsize
22
+
23
+ def _prepare_arrays(self, buf, out):
24
+ buf = ensure_contiguous_ndarray(buf)
25
+
26
+ if out is None:
27
+ out = np.zeros(buf.nbytes, dtype='uint8')
28
+ else:
29
+ out = ensure_contiguous_ndarray(out)
30
+
31
+ if self.elementsize <= 1:
32
+ out.view(buf.dtype)[: len(buf)] = buf[:] # no shuffling needed
33
+ return buf, out
34
+
35
+ if buf.nbytes % self.elementsize != 0:
36
+ raise ValueError("Shuffle buffer is not an integer multiple of elementsize")
37
+
38
+ return buf, out
39
+
40
+ def encode(self, buf, out=None):
41
+ buf, out = self._prepare_arrays(buf, out)
42
+
43
+ if self.elementsize <= 1:
44
+ return out # no shuffling needed
45
+
46
+ _doShuffle(buf.view("uint8"), out.view("uint8"), self.elementsize)
47
+
48
+ return out
49
+
50
+ def decode(self, buf, out=None):
51
+ buf, out = self._prepare_arrays(buf, out)
52
+
53
+ if self.elementsize <= 1:
54
+ return out # no shuffling needed
55
+
56
+ _doUnshuffle(buf.view("uint8"), out.view("uint8"), self.elementsize)
57
+
58
+ return out
59
+
60
+ def __repr__(self):
61
+ return f'{type(self).__name__}(elementsize={self.elementsize})'
@@ -0,0 +1,3 @@
1
+ import pytest
2
+
3
+ pytest.register_assert_rewrite('numcodecs.tests.common')
@@ -0,0 +1,285 @@
1
+ import array
2
+ import json as _json
3
+ import os
4
+ from glob import glob
5
+
6
+ import numpy as np
7
+ import pytest
8
+ from numpy.testing import assert_array_almost_equal, assert_array_equal
9
+
10
+ from numcodecs import * # noqa: F403 # for eval to find names in repr tests
11
+ from numcodecs.compat import ensure_bytes, ensure_ndarray
12
+ from numcodecs.registry import get_codec
13
+
14
+ greetings = [
15
+ '¡Hola mundo!',
16
+ 'Hej Världen!',
17
+ 'Servus Woid!',
18
+ 'Hei maailma!',
19
+ 'Xin chào thế giới',
20
+ 'Njatjeta Botë!',
21
+ 'Γεια σου κόσμε!', # noqa: RUF001
22
+ 'こんにちは世界',
23
+ '世界,你好!', # noqa: RUF001
24
+ 'Helló, világ!',
25
+ 'Zdravo svete!',
26
+ 'เฮลโลเวิลด์',
27
+ ]
28
+
29
+
30
+ def compare_arrays(arr, res, precision=None):
31
+ # ensure numpy array with matching dtype
32
+ res = ensure_ndarray(res).view(arr.dtype)
33
+
34
+ # convert to correct shape
35
+ if arr.flags.f_contiguous:
36
+ order = 'F'
37
+ else:
38
+ order = 'C'
39
+ res = res.reshape(arr.shape, order=order)
40
+
41
+ # exact compare
42
+ if precision is None:
43
+ assert_array_equal(arr, res)
44
+
45
+ # fuzzy compare
46
+ else:
47
+ assert_array_almost_equal(arr, res, decimal=precision)
48
+
49
+
50
+ def check_encode_decode(arr, codec, precision=None):
51
+ # N.B., watch out here with blosc compressor, if the itemsize of
52
+ # the source buffer is different then the results of encoding
53
+ # (i.e., compression) may be different. Hence we *do not* require that
54
+ # the results of encoding be identical for all possible inputs, rather
55
+ # we just require that the results of the encode/decode round-trip can
56
+ # be compared to the original array.
57
+
58
+ # encoding should support any object exporting the buffer protocol
59
+
60
+ # test encoding of numpy array
61
+ enc = codec.encode(arr)
62
+ dec = codec.decode(enc)
63
+ compare_arrays(arr, dec, precision=precision)
64
+
65
+ # test encoding of bytes
66
+ buf = arr.tobytes(order='A')
67
+ enc = codec.encode(buf)
68
+ dec = codec.decode(enc)
69
+ compare_arrays(arr, dec, precision=precision)
70
+
71
+ # test encoding of bytearray
72
+ buf = bytearray(arr.tobytes(order='A'))
73
+ enc = codec.encode(buf)
74
+ dec = codec.decode(enc)
75
+ compare_arrays(arr, dec, precision=precision)
76
+
77
+ # test encoding of array.array
78
+ buf = array.array('b', arr.tobytes(order='A'))
79
+ enc = codec.encode(buf)
80
+ dec = codec.decode(enc)
81
+ compare_arrays(arr, dec, precision=precision)
82
+
83
+ # decoding should support any object exporting the buffer protocol,
84
+
85
+ # setup
86
+ enc_bytes = ensure_bytes(enc)
87
+
88
+ # test decoding of raw bytes
89
+ dec = codec.decode(enc_bytes)
90
+ compare_arrays(arr, dec, precision=precision)
91
+
92
+ # test decoding of bytearray
93
+ dec = codec.decode(bytearray(enc_bytes))
94
+ compare_arrays(arr, dec, precision=precision)
95
+
96
+ # test decoding of array.array
97
+ buf = array.array('b', enc_bytes)
98
+ dec = codec.decode(buf)
99
+ compare_arrays(arr, dec, precision=precision)
100
+
101
+ # test decoding of numpy array
102
+ buf = np.frombuffer(enc_bytes, dtype='u1')
103
+ dec = codec.decode(buf)
104
+ compare_arrays(arr, dec, precision=precision)
105
+
106
+ # test decoding directly into numpy array
107
+ out = np.empty_like(arr)
108
+ codec.decode(enc_bytes, out=out)
109
+ compare_arrays(arr, out, precision=precision)
110
+
111
+ # test decoding directly into bytearray
112
+ out = bytearray(arr.nbytes)
113
+ codec.decode(enc_bytes, out=out)
114
+ # noinspection PyTypeChecker
115
+ compare_arrays(arr, out, precision=precision)
116
+
117
+
118
+ def assert_array_items_equal(res, arr):
119
+ assert isinstance(res, np.ndarray)
120
+ res = res.reshape(-1, order='A')
121
+ arr = arr.reshape(-1, order='A')
122
+ assert res.shape == arr.shape
123
+ assert res.dtype == arr.dtype
124
+
125
+ # numpy asserts don't compare object arrays
126
+ # properly; assert that we have the same nans
127
+ # and values
128
+ arr = arr.ravel().tolist()
129
+ res = res.ravel().tolist()
130
+ for a, r in zip(arr, res, strict=True):
131
+ if isinstance(a, np.ndarray):
132
+ assert_array_equal(a, r)
133
+ elif a != a:
134
+ assert r != r
135
+ else:
136
+ assert a == r
137
+
138
+
139
+ def check_encode_decode_array(arr, codec):
140
+ enc = codec.encode(arr)
141
+ dec = codec.decode(enc)
142
+ assert_array_items_equal(arr, dec)
143
+
144
+ out = np.empty_like(arr)
145
+ codec.decode(enc, out=out)
146
+ assert_array_items_equal(arr, out)
147
+
148
+ enc = codec.encode(arr)
149
+ dec = codec.decode(ensure_ndarray(enc))
150
+ assert_array_items_equal(arr, dec)
151
+
152
+
153
+ def check_encode_decode_array_to_bytes(arr, codec):
154
+ enc = codec.encode(arr)
155
+ dec = codec.decode(enc)
156
+ assert_array_items_equal(arr, dec)
157
+
158
+ out = np.empty_like(arr)
159
+ codec.decode(enc, out=out)
160
+ assert_array_items_equal(arr, out)
161
+
162
+
163
+ def check_config(codec):
164
+ config = codec.get_config()
165
+ # round-trip through JSON to check serialization
166
+ config = _json.loads(_json.dumps(config))
167
+ assert codec == get_codec(config)
168
+
169
+
170
+ def check_repr(stmt):
171
+ # check repr matches instantiation statement
172
+ codec = eval(stmt)
173
+ actual = repr(codec)
174
+ assert stmt == actual
175
+
176
+
177
+ def check_backwards_compatibility(codec_id, arrays, codecs, precision=None, prefix=None):
178
+ # setup directory to hold data fixture
179
+ if prefix:
180
+ fixture_dir = os.path.join('fixture', codec_id, prefix)
181
+ else:
182
+ fixture_dir = os.path.join('fixture', codec_id)
183
+ if not os.path.exists(fixture_dir): # pragma: no cover
184
+ os.makedirs(fixture_dir)
185
+
186
+ # save fixture data
187
+ for i, arr in enumerate(arrays):
188
+ arr_fn = os.path.join(fixture_dir, f'array.{i:02d}.npy')
189
+ if not os.path.exists(arr_fn): # pragma: no cover
190
+ np.save(arr_fn, arr)
191
+
192
+ # load fixture data
193
+ for arr_fn in glob(os.path.join(fixture_dir, 'array.*.npy')):
194
+ # setup
195
+ i = int(arr_fn.split('.')[-2])
196
+ arr = np.load(arr_fn, allow_pickle=True)
197
+ arr_bytes = arr.tobytes(order='A')
198
+ if arr.flags.f_contiguous:
199
+ order = 'F'
200
+ else:
201
+ order = 'C'
202
+
203
+ for j, codec in enumerate(codecs):
204
+ if codec is None:
205
+ pytest.skip("codec has been removed")
206
+
207
+ # setup a directory to hold encoded data
208
+ codec_dir = os.path.join(fixture_dir, f'codec.{j:02d}')
209
+ if not os.path.exists(codec_dir): # pragma: no cover
210
+ os.makedirs(codec_dir)
211
+
212
+ # file with codec configuration information
213
+ codec_fn = os.path.join(codec_dir, 'config.json')
214
+ # one time save config
215
+ if not os.path.exists(codec_fn): # pragma: no cover
216
+ with open(codec_fn, mode='w') as cf:
217
+ _json.dump(codec.get_config(), cf, sort_keys=True, indent=4)
218
+ # load config and compare with expectation
219
+ with open(codec_fn) as cf:
220
+ config = _json.load(cf)
221
+ assert codec == get_codec(config)
222
+
223
+ enc_fn = os.path.join(codec_dir, f'encoded.{i:02d}.dat')
224
+
225
+ # one time encode and save array
226
+ if not os.path.exists(enc_fn): # pragma: no cover
227
+ enc = codec.encode(arr)
228
+ enc = ensure_bytes(enc)
229
+ with open(enc_fn, mode='wb') as ef:
230
+ ef.write(enc)
231
+
232
+ # load and decode data
233
+ with open(enc_fn, mode='rb') as ef:
234
+ enc = ef.read()
235
+ dec = codec.decode(enc)
236
+ dec_arr = ensure_ndarray(dec).reshape(-1, order='A')
237
+ dec_arr = dec_arr.view(dtype=arr.dtype).reshape(arr.shape, order=order)
238
+ if precision and precision[j] is not None:
239
+ assert_array_almost_equal(arr, dec_arr, decimal=precision[j])
240
+ elif arr.dtype == 'object':
241
+ assert_array_items_equal(arr, dec_arr)
242
+ else:
243
+ assert_array_equal(arr, dec_arr)
244
+ assert arr_bytes == ensure_bytes(dec)
245
+
246
+
247
+ def check_err_decode_object_buffer(compressor):
248
+ # cannot decode directly into object array, leads to segfaults
249
+ a = np.arange(10)
250
+ enc = compressor.encode(a)
251
+ out = np.empty(10, dtype=object)
252
+ with pytest.raises(TypeError):
253
+ compressor.decode(enc, out=out)
254
+
255
+
256
+ def check_err_encode_object_buffer(compressor):
257
+ # compressors cannot encode object array
258
+ a = np.array(['foo', 'bar', 'baz'], dtype=object)
259
+ with pytest.raises(TypeError):
260
+ compressor.encode(a)
261
+
262
+
263
+ def check_max_buffer_size(codec):
264
+ for max_buffer_size in (4, 64, 1024):
265
+ old_max_buffer_size = codec.max_buffer_size
266
+ try:
267
+ codec.max_buffer_size = max_buffer_size
268
+ # Just up the max_buffer_size is fine.
269
+ codec.encode(np.zeros(max_buffer_size - 1, dtype=np.int8))
270
+ codec.encode(np.zeros(max_buffer_size, dtype=np.int8))
271
+
272
+ buffers = [
273
+ bytes(b"x" * (max_buffer_size + 1)),
274
+ np.zeros(max_buffer_size + 1, dtype=np.int8),
275
+ np.zeros(max_buffer_size + 2, dtype=np.int8),
276
+ np.zeros(max_buffer_size, dtype=np.int16),
277
+ np.zeros(max_buffer_size, dtype=np.int32),
278
+ ]
279
+ for buf in buffers:
280
+ with pytest.raises(ValueError):
281
+ codec.encode(buf)
282
+ with pytest.raises(ValueError):
283
+ codec.decode(buf)
284
+ finally:
285
+ codec.max_buffer_size = old_max_buffer_size
@@ -0,0 +1,11 @@
1
+ from numcodecs.abc import Codec
2
+
3
+
4
+ class TestCodec(Codec):
5
+ codec_id = "test"
6
+
7
+ def encode(self, buf): # pragma: no cover
8
+ pass
9
+
10
+ def decode(self, buf, out=None): # pragma: no cover
11
+ pass
@@ -0,0 +1,2 @@
1
+ [numcodecs.codecs]
2
+ test = package_with_entrypoint:TestCodec
@@ -0,0 +1,74 @@
1
+ import numpy as np
2
+ from numpy.testing import assert_array_equal
3
+
4
+ from numcodecs.astype import AsType
5
+ from numcodecs.tests.common import (
6
+ check_backwards_compatibility,
7
+ check_config,
8
+ check_encode_decode,
9
+ check_repr,
10
+ )
11
+
12
+ # mix of dtypes: integer, float
13
+ # mix of shapes: 1D, 2D, 3D
14
+ # mix of orders: C, F
15
+ arrays = [
16
+ np.arange(1000, dtype='i4'),
17
+ np.linspace(1000, 1001, 1000, dtype='f8').reshape(100, 10),
18
+ np.random.normal(loc=1000, scale=1, size=(10, 10, 10)),
19
+ np.random.randint(0, 200, size=1000, dtype='u2').reshape(100, 10, order='F'),
20
+ ]
21
+
22
+
23
+ def test_encode_decode():
24
+ for arr in arrays:
25
+ codec = AsType(encode_dtype=arr.dtype, decode_dtype=arr.dtype)
26
+ check_encode_decode(arr, codec)
27
+
28
+
29
+ def test_decode():
30
+ encode_dtype, decode_dtype = '<i4', '<i8'
31
+ codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
32
+ arr = np.arange(10, 20, 1, dtype=encode_dtype)
33
+ expect = arr.astype(decode_dtype)
34
+ actual = codec.decode(arr)
35
+ assert_array_equal(expect, actual)
36
+ assert np.dtype(decode_dtype) == actual.dtype
37
+
38
+
39
+ def test_encode():
40
+ encode_dtype, decode_dtype = '<i4', '<i8'
41
+ codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
42
+ arr = np.arange(10, 20, 1, dtype=decode_dtype)
43
+ expect = arr.astype(encode_dtype)
44
+ actual = codec.encode(arr)
45
+ assert_array_equal(expect, actual)
46
+ assert np.dtype(encode_dtype) == actual.dtype
47
+
48
+
49
+ def test_config():
50
+ encode_dtype, decode_dtype = '<i4', '<i8'
51
+ codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
52
+ check_config(codec)
53
+
54
+
55
+ def test_repr():
56
+ check_repr("AsType(encode_dtype='<i4', decode_dtype='<i2')")
57
+
58
+
59
+ def test_backwards_compatibility():
60
+ # integers
61
+ arrs = [
62
+ np.arange(1000, dtype='<i4'),
63
+ np.random.randint(0, 200, size=1000, dtype='i4').astype('<i4').reshape(100, 10, order='F'),
64
+ ]
65
+ codec = AsType(encode_dtype='<i2', decode_dtype='<i4')
66
+ check_backwards_compatibility(AsType.codec_id, arrs, [codec], prefix='i')
67
+
68
+ # floats
69
+ arrs = [
70
+ np.linspace(1000, 1001, 1000, dtype='<f8').reshape(100, 10, order='F'),
71
+ np.random.normal(loc=1000, scale=1, size=(10, 10, 10)).astype('<f8'),
72
+ ]
73
+ codec = AsType(encode_dtype='<f4', decode_dtype='<f8')
74
+ check_backwards_compatibility(AsType.codec_id, arrs, [codec], precision=[3], prefix='f')
@@ -0,0 +1,81 @@
1
+ import itertools
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from numcodecs.base64 import Base64
7
+ from numcodecs.tests.common import (
8
+ check_backwards_compatibility,
9
+ check_encode_decode,
10
+ check_err_decode_object_buffer,
11
+ check_err_encode_object_buffer,
12
+ check_repr,
13
+ )
14
+
15
+ codecs = [
16
+ Base64(),
17
+ ]
18
+
19
+
20
+ # mix of dtypes: integer, float, bool, string
21
+ # mix of shapes: 1D, 2D, 3D
22
+ # mix of orders: C, F
23
+ arrays = [
24
+ np.arange(1000, dtype="i4"),
25
+ np.linspace(1000, 1001, 1000, dtype="f8"),
26
+ np.random.normal(loc=1000, scale=1, size=(100, 10)),
27
+ np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order="F"),
28
+ np.random.choice([b"a", b"bb", b"ccc"], size=1000).reshape(10, 10, 10),
29
+ np.random.randint(0, 2**60, size=1000, dtype="u8").view("M8[ns]"),
30
+ np.random.randint(0, 2**60, size=1000, dtype="u8").view("m8[ns]"),
31
+ np.random.randint(0, 2**25, size=1000, dtype="u8").view("M8[m]"),
32
+ np.random.randint(0, 2**25, size=1000, dtype="u8").view("m8[m]"),
33
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype="i8").view("M8[ns]"),
34
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype="i8").view("m8[ns]"),
35
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype="i8").view("M8[m]"),
36
+ np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype="i8").view("m8[m]"),
37
+ ]
38
+
39
+
40
+ def test_encode_decode():
41
+ for arr, codec in itertools.product(arrays, codecs):
42
+ check_encode_decode(arr, codec)
43
+
44
+
45
+ def test_repr():
46
+ check_repr("Base64()")
47
+
48
+
49
+ def test_eq():
50
+ assert Base64() == Base64()
51
+ assert not Base64() != Base64()
52
+ assert Base64() != "foo"
53
+ assert "foo" != Base64()
54
+ assert not Base64() == "foo"
55
+
56
+
57
+ def test_backwards_compatibility():
58
+ check_backwards_compatibility(Base64.codec_id, arrays, codecs)
59
+
60
+
61
+ def test_err_decode_object_buffer():
62
+ check_err_decode_object_buffer(Base64())
63
+
64
+
65
+ def test_err_encode_object_buffer():
66
+ check_err_encode_object_buffer(Base64())
67
+
68
+
69
+ def test_err_encode_list():
70
+ data = ["foo", "bar", "baz"]
71
+ for codec in codecs:
72
+ with pytest.raises(TypeError):
73
+ codec.encode(data)
74
+
75
+
76
+ def test_err_encode_non_contiguous():
77
+ # non-contiguous memory
78
+ arr = np.arange(1000, dtype="i4")[::2]
79
+ for codec in codecs:
80
+ with pytest.raises(ValueError):
81
+ codec.encode(arr)
@@ -0,0 +1,81 @@
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from numcodecs.bitround import BitRound, max_bits
5
+
6
+ # adapted from https://github.com/milankl/BitInformation.jl/blob/main/test/round_nearest.jl
7
+
8
+
9
+ # TODO: add other dtypes
10
+ @pytest.fixture(params=["float32", "float64"])
11
+ def dtype(request):
12
+ return request.param
13
+
14
+
15
+ def round(data, keepbits):
16
+ codec = BitRound(keepbits=keepbits)
17
+ data = data.copy() # otherwise overwrites the input
18
+ encoded = codec.encode(data)
19
+ return codec.decode(encoded)
20
+
21
+
22
+ def test_round_zero_to_zero(dtype):
23
+ a = np.zeros((3, 2), dtype=dtype)
24
+ # Don't understand Milan's original test:
25
+ # How is it possible to have negative keepbits?
26
+ # for k in range(-5, 50):
27
+ for k in range(max_bits[dtype]):
28
+ ar = round(a, k)
29
+ np.testing.assert_equal(a, ar)
30
+
31
+
32
+ def test_round_one_to_one(dtype):
33
+ a = np.ones((3, 2), dtype=dtype)
34
+ for k in range(max_bits[dtype]):
35
+ ar = round(a, k)
36
+ np.testing.assert_equal(a, ar)
37
+
38
+
39
+ def test_round_minus_one_to_minus_one(dtype):
40
+ a = -np.ones((3, 2), dtype=dtype)
41
+ for k in range(max_bits[dtype]):
42
+ ar = round(a, k)
43
+ np.testing.assert_equal(a, ar)
44
+
45
+
46
+ def test_no_rounding(dtype):
47
+ a = np.random.random_sample((300, 200)).astype(dtype)
48
+ keepbits = max_bits[dtype]
49
+ ar = round(a, keepbits)
50
+ np.testing.assert_equal(a, ar)
51
+
52
+
53
+ APPROX_KEEPBITS = {"float32": 11, "float64": 18}
54
+
55
+
56
+ def test_approx_equal(dtype):
57
+ a = np.random.random_sample((300, 200)).astype(dtype)
58
+ ar = round(a, APPROX_KEEPBITS[dtype])
59
+ # Mimic julia behavior - https://docs.julialang.org/en/v1/base/math/#Base.isapprox
60
+ rtol = np.sqrt(np.finfo(np.float32).eps)
61
+ # This gets us much closer but still failing for ~6% of the array
62
+ # It does pass if we add 1 to keepbits (11 instead of 10)
63
+ # Is there an off-by-one issue here?
64
+ np.testing.assert_allclose(a, ar, rtol=rtol)
65
+
66
+
67
+ def test_idempotence(dtype):
68
+ a = np.random.random_sample((300, 200)).astype(dtype)
69
+ for k in range(20):
70
+ ar = round(a, k)
71
+ ar2 = round(a, k)
72
+ np.testing.assert_equal(ar, ar2)
73
+
74
+
75
+ def test_errors():
76
+ with pytest.raises(ValueError):
77
+ BitRound(keepbits=99).encode(np.array([0], dtype="float32"))
78
+ with pytest.raises(TypeError):
79
+ BitRound(keepbits=10).encode(np.array([0]))
80
+ with pytest.raises(ValueError):
81
+ BitRound(-1)