numcodecs 0.16.0__cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numcodecs might be problematic. Click here for more details.
- numcodecs/__init__.py +146 -0
- numcodecs/_shuffle.cpython-313-aarch64-linux-gnu.so +0 -0
- numcodecs/abc.py +127 -0
- numcodecs/astype.py +72 -0
- numcodecs/base64.py +26 -0
- numcodecs/bitround.py +80 -0
- numcodecs/blosc.cpython-313-aarch64-linux-gnu.so +0 -0
- numcodecs/bz2.py +45 -0
- numcodecs/categorize.py +98 -0
- numcodecs/checksum32.py +183 -0
- numcodecs/compat.py +206 -0
- numcodecs/compat_ext.cpython-313-aarch64-linux-gnu.so +0 -0
- numcodecs/delta.py +94 -0
- numcodecs/errors.py +26 -0
- numcodecs/fixedscaleoffset.py +130 -0
- numcodecs/fletcher32.cpython-313-aarch64-linux-gnu.so +0 -0
- numcodecs/gzip.py +50 -0
- numcodecs/jenkins.cpython-313-aarch64-linux-gnu.so +0 -0
- numcodecs/json.py +107 -0
- numcodecs/lz4.cpython-313-aarch64-linux-gnu.so +0 -0
- numcodecs/lzma.py +72 -0
- numcodecs/msgpacks.py +86 -0
- numcodecs/ndarray_like.py +65 -0
- numcodecs/packbits.py +82 -0
- numcodecs/pcodec.py +118 -0
- numcodecs/pickles.py +55 -0
- numcodecs/quantize.py +98 -0
- numcodecs/registry.py +74 -0
- numcodecs/shuffle.py +61 -0
- numcodecs/tests/__init__.py +3 -0
- numcodecs/tests/common.py +285 -0
- numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
- numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
- numcodecs/tests/test_astype.py +74 -0
- numcodecs/tests/test_base64.py +81 -0
- numcodecs/tests/test_bitround.py +81 -0
- numcodecs/tests/test_blosc.py +284 -0
- numcodecs/tests/test_bz2.py +66 -0
- numcodecs/tests/test_categorize.py +87 -0
- numcodecs/tests/test_checksum32.py +154 -0
- numcodecs/tests/test_compat.py +111 -0
- numcodecs/tests/test_delta.py +61 -0
- numcodecs/tests/test_entrypoints.py +24 -0
- numcodecs/tests/test_entrypoints_backport.py +36 -0
- numcodecs/tests/test_fixedscaleoffset.py +77 -0
- numcodecs/tests/test_fletcher32.py +56 -0
- numcodecs/tests/test_gzip.py +110 -0
- numcodecs/tests/test_jenkins.py +150 -0
- numcodecs/tests/test_json.py +85 -0
- numcodecs/tests/test_lz4.py +83 -0
- numcodecs/tests/test_lzma.py +94 -0
- numcodecs/tests/test_msgpacks.py +126 -0
- numcodecs/tests/test_ndarray_like.py +48 -0
- numcodecs/tests/test_packbits.py +39 -0
- numcodecs/tests/test_pcodec.py +90 -0
- numcodecs/tests/test_pickles.py +61 -0
- numcodecs/tests/test_quantize.py +76 -0
- numcodecs/tests/test_registry.py +43 -0
- numcodecs/tests/test_shuffle.py +166 -0
- numcodecs/tests/test_vlen_array.py +97 -0
- numcodecs/tests/test_vlen_bytes.py +97 -0
- numcodecs/tests/test_vlen_utf8.py +91 -0
- numcodecs/tests/test_zarr3.py +279 -0
- numcodecs/tests/test_zarr3_import.py +13 -0
- numcodecs/tests/test_zfpy.py +104 -0
- numcodecs/tests/test_zlib.py +94 -0
- numcodecs/tests/test_zstd.py +92 -0
- numcodecs/version.py +21 -0
- numcodecs/vlen.cpython-313-aarch64-linux-gnu.so +0 -0
- numcodecs/zarr3.py +401 -0
- numcodecs/zfpy.py +113 -0
- numcodecs/zlib.py +42 -0
- numcodecs/zstd.cpython-313-aarch64-linux-gnu.so +0 -0
- numcodecs-0.16.0.dist-info/METADATA +66 -0
- numcodecs-0.16.0.dist-info/RECORD +79 -0
- numcodecs-0.16.0.dist-info/WHEEL +6 -0
- numcodecs-0.16.0.dist-info/entry_points.txt +22 -0
- numcodecs-0.16.0.dist-info/licenses/LICENSE.txt +21 -0
- numcodecs-0.16.0.dist-info/top_level.txt +1 -0
numcodecs/registry.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""The registry module provides some simple convenience functions to enable
|
|
2
|
+
applications to dynamically register and look-up codec classes."""
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
from importlib.metadata import EntryPoints, entry_points
|
|
6
|
+
|
|
7
|
+
from numcodecs.abc import Codec
|
|
8
|
+
from numcodecs.errors import UnknownCodecError
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger("numcodecs")
|
|
11
|
+
codec_registry: dict[str, Codec] = {}
|
|
12
|
+
entries: dict[str, EntryPoints] = {}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def run_entrypoints():
|
|
16
|
+
entries.clear()
|
|
17
|
+
eps = entry_points()
|
|
18
|
+
entries.update({e.name: e for e in eps.select(group="numcodecs.codecs")})
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
run_entrypoints()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_codec(config):
|
|
25
|
+
"""Obtain a codec for the given configuration.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
config : dict-like
|
|
30
|
+
Configuration object.
|
|
31
|
+
|
|
32
|
+
Returns
|
|
33
|
+
-------
|
|
34
|
+
codec : Codec
|
|
35
|
+
|
|
36
|
+
Examples
|
|
37
|
+
--------
|
|
38
|
+
|
|
39
|
+
>>> import numcodecs as codecs
|
|
40
|
+
>>> codec = codecs.get_codec(dict(id='zlib', level=1))
|
|
41
|
+
>>> codec
|
|
42
|
+
Zlib(level=1)
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
config = dict(config)
|
|
46
|
+
codec_id = config.pop('id', None)
|
|
47
|
+
cls = codec_registry.get(codec_id)
|
|
48
|
+
if cls is None and codec_id in entries:
|
|
49
|
+
logger.debug("Auto loading codec '%s' from entrypoint", codec_id)
|
|
50
|
+
cls = entries[codec_id].load()
|
|
51
|
+
register_codec(cls, codec_id=codec_id)
|
|
52
|
+
if cls:
|
|
53
|
+
return cls.from_config(config)
|
|
54
|
+
raise UnknownCodecError(f"{codec_id!r}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def register_codec(cls, codec_id=None):
|
|
58
|
+
"""Register a codec class.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
cls : Codec class
|
|
63
|
+
|
|
64
|
+
Notes
|
|
65
|
+
-----
|
|
66
|
+
This function maintains a mapping from codec identifiers to codec
|
|
67
|
+
classes. When a codec class is registered, it will replace any class
|
|
68
|
+
previously registered under the same codec identifier, if present.
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
if codec_id is None:
|
|
72
|
+
codec_id = cls.codec_id
|
|
73
|
+
logger.debug("Registering codec '%s'", codec_id)
|
|
74
|
+
codec_registry[codec_id] = cls
|
numcodecs/shuffle.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from ._shuffle import _doShuffle, _doUnshuffle
|
|
4
|
+
from .abc import Codec
|
|
5
|
+
from .compat import ensure_contiguous_ndarray
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Shuffle(Codec):
|
|
9
|
+
"""Codec providing shuffle
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
elementsize : int
|
|
14
|
+
Size in bytes of the array elements. Default = 4
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
codec_id = 'shuffle'
|
|
19
|
+
|
|
20
|
+
def __init__(self, elementsize=4):
|
|
21
|
+
self.elementsize = elementsize
|
|
22
|
+
|
|
23
|
+
def _prepare_arrays(self, buf, out):
|
|
24
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
25
|
+
|
|
26
|
+
if out is None:
|
|
27
|
+
out = np.zeros(buf.nbytes, dtype='uint8')
|
|
28
|
+
else:
|
|
29
|
+
out = ensure_contiguous_ndarray(out)
|
|
30
|
+
|
|
31
|
+
if self.elementsize <= 1:
|
|
32
|
+
out.view(buf.dtype)[: len(buf)] = buf[:] # no shuffling needed
|
|
33
|
+
return buf, out
|
|
34
|
+
|
|
35
|
+
if buf.nbytes % self.elementsize != 0:
|
|
36
|
+
raise ValueError("Shuffle buffer is not an integer multiple of elementsize")
|
|
37
|
+
|
|
38
|
+
return buf, out
|
|
39
|
+
|
|
40
|
+
def encode(self, buf, out=None):
|
|
41
|
+
buf, out = self._prepare_arrays(buf, out)
|
|
42
|
+
|
|
43
|
+
if self.elementsize <= 1:
|
|
44
|
+
return out # no shuffling needed
|
|
45
|
+
|
|
46
|
+
_doShuffle(buf.view("uint8"), out.view("uint8"), self.elementsize)
|
|
47
|
+
|
|
48
|
+
return out
|
|
49
|
+
|
|
50
|
+
def decode(self, buf, out=None):
|
|
51
|
+
buf, out = self._prepare_arrays(buf, out)
|
|
52
|
+
|
|
53
|
+
if self.elementsize <= 1:
|
|
54
|
+
return out # no shuffling needed
|
|
55
|
+
|
|
56
|
+
_doUnshuffle(buf.view("uint8"), out.view("uint8"), self.elementsize)
|
|
57
|
+
|
|
58
|
+
return out
|
|
59
|
+
|
|
60
|
+
def __repr__(self):
|
|
61
|
+
return f'{type(self).__name__}(elementsize={self.elementsize})'
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import array
|
|
2
|
+
import json as _json
|
|
3
|
+
import os
|
|
4
|
+
from glob import glob
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pytest
|
|
8
|
+
from numpy.testing import assert_array_almost_equal, assert_array_equal
|
|
9
|
+
|
|
10
|
+
from numcodecs import * # noqa: F403 # for eval to find names in repr tests
|
|
11
|
+
from numcodecs.compat import ensure_bytes, ensure_ndarray
|
|
12
|
+
from numcodecs.registry import get_codec
|
|
13
|
+
|
|
14
|
+
greetings = [
|
|
15
|
+
'¡Hola mundo!',
|
|
16
|
+
'Hej Världen!',
|
|
17
|
+
'Servus Woid!',
|
|
18
|
+
'Hei maailma!',
|
|
19
|
+
'Xin chào thế giới',
|
|
20
|
+
'Njatjeta Botë!',
|
|
21
|
+
'Γεια σου κόσμε!', # noqa: RUF001
|
|
22
|
+
'こんにちは世界',
|
|
23
|
+
'世界,你好!', # noqa: RUF001
|
|
24
|
+
'Helló, világ!',
|
|
25
|
+
'Zdravo svete!',
|
|
26
|
+
'เฮลโลเวิลด์',
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def compare_arrays(arr, res, precision=None):
|
|
31
|
+
# ensure numpy array with matching dtype
|
|
32
|
+
res = ensure_ndarray(res).view(arr.dtype)
|
|
33
|
+
|
|
34
|
+
# convert to correct shape
|
|
35
|
+
if arr.flags.f_contiguous:
|
|
36
|
+
order = 'F'
|
|
37
|
+
else:
|
|
38
|
+
order = 'C'
|
|
39
|
+
res = res.reshape(arr.shape, order=order)
|
|
40
|
+
|
|
41
|
+
# exact compare
|
|
42
|
+
if precision is None:
|
|
43
|
+
assert_array_equal(arr, res)
|
|
44
|
+
|
|
45
|
+
# fuzzy compare
|
|
46
|
+
else:
|
|
47
|
+
assert_array_almost_equal(arr, res, decimal=precision)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def check_encode_decode(arr, codec, precision=None):
|
|
51
|
+
# N.B., watch out here with blosc compressor, if the itemsize of
|
|
52
|
+
# the source buffer is different then the results of encoding
|
|
53
|
+
# (i.e., compression) may be different. Hence we *do not* require that
|
|
54
|
+
# the results of encoding be identical for all possible inputs, rather
|
|
55
|
+
# we just require that the results of the encode/decode round-trip can
|
|
56
|
+
# be compared to the original array.
|
|
57
|
+
|
|
58
|
+
# encoding should support any object exporting the buffer protocol
|
|
59
|
+
|
|
60
|
+
# test encoding of numpy array
|
|
61
|
+
enc = codec.encode(arr)
|
|
62
|
+
dec = codec.decode(enc)
|
|
63
|
+
compare_arrays(arr, dec, precision=precision)
|
|
64
|
+
|
|
65
|
+
# test encoding of bytes
|
|
66
|
+
buf = arr.tobytes(order='A')
|
|
67
|
+
enc = codec.encode(buf)
|
|
68
|
+
dec = codec.decode(enc)
|
|
69
|
+
compare_arrays(arr, dec, precision=precision)
|
|
70
|
+
|
|
71
|
+
# test encoding of bytearray
|
|
72
|
+
buf = bytearray(arr.tobytes(order='A'))
|
|
73
|
+
enc = codec.encode(buf)
|
|
74
|
+
dec = codec.decode(enc)
|
|
75
|
+
compare_arrays(arr, dec, precision=precision)
|
|
76
|
+
|
|
77
|
+
# test encoding of array.array
|
|
78
|
+
buf = array.array('b', arr.tobytes(order='A'))
|
|
79
|
+
enc = codec.encode(buf)
|
|
80
|
+
dec = codec.decode(enc)
|
|
81
|
+
compare_arrays(arr, dec, precision=precision)
|
|
82
|
+
|
|
83
|
+
# decoding should support any object exporting the buffer protocol,
|
|
84
|
+
|
|
85
|
+
# setup
|
|
86
|
+
enc_bytes = ensure_bytes(enc)
|
|
87
|
+
|
|
88
|
+
# test decoding of raw bytes
|
|
89
|
+
dec = codec.decode(enc_bytes)
|
|
90
|
+
compare_arrays(arr, dec, precision=precision)
|
|
91
|
+
|
|
92
|
+
# test decoding of bytearray
|
|
93
|
+
dec = codec.decode(bytearray(enc_bytes))
|
|
94
|
+
compare_arrays(arr, dec, precision=precision)
|
|
95
|
+
|
|
96
|
+
# test decoding of array.array
|
|
97
|
+
buf = array.array('b', enc_bytes)
|
|
98
|
+
dec = codec.decode(buf)
|
|
99
|
+
compare_arrays(arr, dec, precision=precision)
|
|
100
|
+
|
|
101
|
+
# test decoding of numpy array
|
|
102
|
+
buf = np.frombuffer(enc_bytes, dtype='u1')
|
|
103
|
+
dec = codec.decode(buf)
|
|
104
|
+
compare_arrays(arr, dec, precision=precision)
|
|
105
|
+
|
|
106
|
+
# test decoding directly into numpy array
|
|
107
|
+
out = np.empty_like(arr)
|
|
108
|
+
codec.decode(enc_bytes, out=out)
|
|
109
|
+
compare_arrays(arr, out, precision=precision)
|
|
110
|
+
|
|
111
|
+
# test decoding directly into bytearray
|
|
112
|
+
out = bytearray(arr.nbytes)
|
|
113
|
+
codec.decode(enc_bytes, out=out)
|
|
114
|
+
# noinspection PyTypeChecker
|
|
115
|
+
compare_arrays(arr, out, precision=precision)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def assert_array_items_equal(res, arr):
|
|
119
|
+
assert isinstance(res, np.ndarray)
|
|
120
|
+
res = res.reshape(-1, order='A')
|
|
121
|
+
arr = arr.reshape(-1, order='A')
|
|
122
|
+
assert res.shape == arr.shape
|
|
123
|
+
assert res.dtype == arr.dtype
|
|
124
|
+
|
|
125
|
+
# numpy asserts don't compare object arrays
|
|
126
|
+
# properly; assert that we have the same nans
|
|
127
|
+
# and values
|
|
128
|
+
arr = arr.ravel().tolist()
|
|
129
|
+
res = res.ravel().tolist()
|
|
130
|
+
for a, r in zip(arr, res, strict=True):
|
|
131
|
+
if isinstance(a, np.ndarray):
|
|
132
|
+
assert_array_equal(a, r)
|
|
133
|
+
elif a != a:
|
|
134
|
+
assert r != r
|
|
135
|
+
else:
|
|
136
|
+
assert a == r
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def check_encode_decode_array(arr, codec):
|
|
140
|
+
enc = codec.encode(arr)
|
|
141
|
+
dec = codec.decode(enc)
|
|
142
|
+
assert_array_items_equal(arr, dec)
|
|
143
|
+
|
|
144
|
+
out = np.empty_like(arr)
|
|
145
|
+
codec.decode(enc, out=out)
|
|
146
|
+
assert_array_items_equal(arr, out)
|
|
147
|
+
|
|
148
|
+
enc = codec.encode(arr)
|
|
149
|
+
dec = codec.decode(ensure_ndarray(enc))
|
|
150
|
+
assert_array_items_equal(arr, dec)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def check_encode_decode_array_to_bytes(arr, codec):
|
|
154
|
+
enc = codec.encode(arr)
|
|
155
|
+
dec = codec.decode(enc)
|
|
156
|
+
assert_array_items_equal(arr, dec)
|
|
157
|
+
|
|
158
|
+
out = np.empty_like(arr)
|
|
159
|
+
codec.decode(enc, out=out)
|
|
160
|
+
assert_array_items_equal(arr, out)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def check_config(codec):
|
|
164
|
+
config = codec.get_config()
|
|
165
|
+
# round-trip through JSON to check serialization
|
|
166
|
+
config = _json.loads(_json.dumps(config))
|
|
167
|
+
assert codec == get_codec(config)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def check_repr(stmt):
|
|
171
|
+
# check repr matches instantiation statement
|
|
172
|
+
codec = eval(stmt)
|
|
173
|
+
actual = repr(codec)
|
|
174
|
+
assert stmt == actual
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def check_backwards_compatibility(codec_id, arrays, codecs, precision=None, prefix=None):
|
|
178
|
+
# setup directory to hold data fixture
|
|
179
|
+
if prefix:
|
|
180
|
+
fixture_dir = os.path.join('fixture', codec_id, prefix)
|
|
181
|
+
else:
|
|
182
|
+
fixture_dir = os.path.join('fixture', codec_id)
|
|
183
|
+
if not os.path.exists(fixture_dir): # pragma: no cover
|
|
184
|
+
os.makedirs(fixture_dir)
|
|
185
|
+
|
|
186
|
+
# save fixture data
|
|
187
|
+
for i, arr in enumerate(arrays):
|
|
188
|
+
arr_fn = os.path.join(fixture_dir, f'array.{i:02d}.npy')
|
|
189
|
+
if not os.path.exists(arr_fn): # pragma: no cover
|
|
190
|
+
np.save(arr_fn, arr)
|
|
191
|
+
|
|
192
|
+
# load fixture data
|
|
193
|
+
for arr_fn in glob(os.path.join(fixture_dir, 'array.*.npy')):
|
|
194
|
+
# setup
|
|
195
|
+
i = int(arr_fn.split('.')[-2])
|
|
196
|
+
arr = np.load(arr_fn, allow_pickle=True)
|
|
197
|
+
arr_bytes = arr.tobytes(order='A')
|
|
198
|
+
if arr.flags.f_contiguous:
|
|
199
|
+
order = 'F'
|
|
200
|
+
else:
|
|
201
|
+
order = 'C'
|
|
202
|
+
|
|
203
|
+
for j, codec in enumerate(codecs):
|
|
204
|
+
if codec is None:
|
|
205
|
+
pytest.skip("codec has been removed")
|
|
206
|
+
|
|
207
|
+
# setup a directory to hold encoded data
|
|
208
|
+
codec_dir = os.path.join(fixture_dir, f'codec.{j:02d}')
|
|
209
|
+
if not os.path.exists(codec_dir): # pragma: no cover
|
|
210
|
+
os.makedirs(codec_dir)
|
|
211
|
+
|
|
212
|
+
# file with codec configuration information
|
|
213
|
+
codec_fn = os.path.join(codec_dir, 'config.json')
|
|
214
|
+
# one time save config
|
|
215
|
+
if not os.path.exists(codec_fn): # pragma: no cover
|
|
216
|
+
with open(codec_fn, mode='w') as cf:
|
|
217
|
+
_json.dump(codec.get_config(), cf, sort_keys=True, indent=4)
|
|
218
|
+
# load config and compare with expectation
|
|
219
|
+
with open(codec_fn) as cf:
|
|
220
|
+
config = _json.load(cf)
|
|
221
|
+
assert codec == get_codec(config)
|
|
222
|
+
|
|
223
|
+
enc_fn = os.path.join(codec_dir, f'encoded.{i:02d}.dat')
|
|
224
|
+
|
|
225
|
+
# one time encode and save array
|
|
226
|
+
if not os.path.exists(enc_fn): # pragma: no cover
|
|
227
|
+
enc = codec.encode(arr)
|
|
228
|
+
enc = ensure_bytes(enc)
|
|
229
|
+
with open(enc_fn, mode='wb') as ef:
|
|
230
|
+
ef.write(enc)
|
|
231
|
+
|
|
232
|
+
# load and decode data
|
|
233
|
+
with open(enc_fn, mode='rb') as ef:
|
|
234
|
+
enc = ef.read()
|
|
235
|
+
dec = codec.decode(enc)
|
|
236
|
+
dec_arr = ensure_ndarray(dec).reshape(-1, order='A')
|
|
237
|
+
dec_arr = dec_arr.view(dtype=arr.dtype).reshape(arr.shape, order=order)
|
|
238
|
+
if precision and precision[j] is not None:
|
|
239
|
+
assert_array_almost_equal(arr, dec_arr, decimal=precision[j])
|
|
240
|
+
elif arr.dtype == 'object':
|
|
241
|
+
assert_array_items_equal(arr, dec_arr)
|
|
242
|
+
else:
|
|
243
|
+
assert_array_equal(arr, dec_arr)
|
|
244
|
+
assert arr_bytes == ensure_bytes(dec)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def check_err_decode_object_buffer(compressor):
|
|
248
|
+
# cannot decode directly into object array, leads to segfaults
|
|
249
|
+
a = np.arange(10)
|
|
250
|
+
enc = compressor.encode(a)
|
|
251
|
+
out = np.empty(10, dtype=object)
|
|
252
|
+
with pytest.raises(TypeError):
|
|
253
|
+
compressor.decode(enc, out=out)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def check_err_encode_object_buffer(compressor):
|
|
257
|
+
# compressors cannot encode object array
|
|
258
|
+
a = np.array(['foo', 'bar', 'baz'], dtype=object)
|
|
259
|
+
with pytest.raises(TypeError):
|
|
260
|
+
compressor.encode(a)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def check_max_buffer_size(codec):
|
|
264
|
+
for max_buffer_size in (4, 64, 1024):
|
|
265
|
+
old_max_buffer_size = codec.max_buffer_size
|
|
266
|
+
try:
|
|
267
|
+
codec.max_buffer_size = max_buffer_size
|
|
268
|
+
# Just up the max_buffer_size is fine.
|
|
269
|
+
codec.encode(np.zeros(max_buffer_size - 1, dtype=np.int8))
|
|
270
|
+
codec.encode(np.zeros(max_buffer_size, dtype=np.int8))
|
|
271
|
+
|
|
272
|
+
buffers = [
|
|
273
|
+
bytes(b"x" * (max_buffer_size + 1)),
|
|
274
|
+
np.zeros(max_buffer_size + 1, dtype=np.int8),
|
|
275
|
+
np.zeros(max_buffer_size + 2, dtype=np.int8),
|
|
276
|
+
np.zeros(max_buffer_size, dtype=np.int16),
|
|
277
|
+
np.zeros(max_buffer_size, dtype=np.int32),
|
|
278
|
+
]
|
|
279
|
+
for buf in buffers:
|
|
280
|
+
with pytest.raises(ValueError):
|
|
281
|
+
codec.encode(buf)
|
|
282
|
+
with pytest.raises(ValueError):
|
|
283
|
+
codec.decode(buf)
|
|
284
|
+
finally:
|
|
285
|
+
codec.max_buffer_size = old_max_buffer_size
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from numpy.testing import assert_array_equal
|
|
3
|
+
|
|
4
|
+
from numcodecs.astype import AsType
|
|
5
|
+
from numcodecs.tests.common import (
|
|
6
|
+
check_backwards_compatibility,
|
|
7
|
+
check_config,
|
|
8
|
+
check_encode_decode,
|
|
9
|
+
check_repr,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
# mix of dtypes: integer, float
|
|
13
|
+
# mix of shapes: 1D, 2D, 3D
|
|
14
|
+
# mix of orders: C, F
|
|
15
|
+
arrays = [
|
|
16
|
+
np.arange(1000, dtype='i4'),
|
|
17
|
+
np.linspace(1000, 1001, 1000, dtype='f8').reshape(100, 10),
|
|
18
|
+
np.random.normal(loc=1000, scale=1, size=(10, 10, 10)),
|
|
19
|
+
np.random.randint(0, 200, size=1000, dtype='u2').reshape(100, 10, order='F'),
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_encode_decode():
|
|
24
|
+
for arr in arrays:
|
|
25
|
+
codec = AsType(encode_dtype=arr.dtype, decode_dtype=arr.dtype)
|
|
26
|
+
check_encode_decode(arr, codec)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_decode():
|
|
30
|
+
encode_dtype, decode_dtype = '<i4', '<i8'
|
|
31
|
+
codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
|
|
32
|
+
arr = np.arange(10, 20, 1, dtype=encode_dtype)
|
|
33
|
+
expect = arr.astype(decode_dtype)
|
|
34
|
+
actual = codec.decode(arr)
|
|
35
|
+
assert_array_equal(expect, actual)
|
|
36
|
+
assert np.dtype(decode_dtype) == actual.dtype
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_encode():
|
|
40
|
+
encode_dtype, decode_dtype = '<i4', '<i8'
|
|
41
|
+
codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
|
|
42
|
+
arr = np.arange(10, 20, 1, dtype=decode_dtype)
|
|
43
|
+
expect = arr.astype(encode_dtype)
|
|
44
|
+
actual = codec.encode(arr)
|
|
45
|
+
assert_array_equal(expect, actual)
|
|
46
|
+
assert np.dtype(encode_dtype) == actual.dtype
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_config():
|
|
50
|
+
encode_dtype, decode_dtype = '<i4', '<i8'
|
|
51
|
+
codec = AsType(encode_dtype=encode_dtype, decode_dtype=decode_dtype)
|
|
52
|
+
check_config(codec)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_repr():
|
|
56
|
+
check_repr("AsType(encode_dtype='<i4', decode_dtype='<i2')")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_backwards_compatibility():
|
|
60
|
+
# integers
|
|
61
|
+
arrs = [
|
|
62
|
+
np.arange(1000, dtype='<i4'),
|
|
63
|
+
np.random.randint(0, 200, size=1000, dtype='i4').astype('<i4').reshape(100, 10, order='F'),
|
|
64
|
+
]
|
|
65
|
+
codec = AsType(encode_dtype='<i2', decode_dtype='<i4')
|
|
66
|
+
check_backwards_compatibility(AsType.codec_id, arrs, [codec], prefix='i')
|
|
67
|
+
|
|
68
|
+
# floats
|
|
69
|
+
arrs = [
|
|
70
|
+
np.linspace(1000, 1001, 1000, dtype='<f8').reshape(100, 10, order='F'),
|
|
71
|
+
np.random.normal(loc=1000, scale=1, size=(10, 10, 10)).astype('<f8'),
|
|
72
|
+
]
|
|
73
|
+
codec = AsType(encode_dtype='<f4', decode_dtype='<f8')
|
|
74
|
+
check_backwards_compatibility(AsType.codec_id, arrs, [codec], precision=[3], prefix='f')
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from numcodecs.base64 import Base64
|
|
7
|
+
from numcodecs.tests.common import (
|
|
8
|
+
check_backwards_compatibility,
|
|
9
|
+
check_encode_decode,
|
|
10
|
+
check_err_decode_object_buffer,
|
|
11
|
+
check_err_encode_object_buffer,
|
|
12
|
+
check_repr,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
codecs = [
|
|
16
|
+
Base64(),
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# mix of dtypes: integer, float, bool, string
|
|
21
|
+
# mix of shapes: 1D, 2D, 3D
|
|
22
|
+
# mix of orders: C, F
|
|
23
|
+
arrays = [
|
|
24
|
+
np.arange(1000, dtype="i4"),
|
|
25
|
+
np.linspace(1000, 1001, 1000, dtype="f8"),
|
|
26
|
+
np.random.normal(loc=1000, scale=1, size=(100, 10)),
|
|
27
|
+
np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order="F"),
|
|
28
|
+
np.random.choice([b"a", b"bb", b"ccc"], size=1000).reshape(10, 10, 10),
|
|
29
|
+
np.random.randint(0, 2**60, size=1000, dtype="u8").view("M8[ns]"),
|
|
30
|
+
np.random.randint(0, 2**60, size=1000, dtype="u8").view("m8[ns]"),
|
|
31
|
+
np.random.randint(0, 2**25, size=1000, dtype="u8").view("M8[m]"),
|
|
32
|
+
np.random.randint(0, 2**25, size=1000, dtype="u8").view("m8[m]"),
|
|
33
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype="i8").view("M8[ns]"),
|
|
34
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype="i8").view("m8[ns]"),
|
|
35
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype="i8").view("M8[m]"),
|
|
36
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype="i8").view("m8[m]"),
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_encode_decode():
|
|
41
|
+
for arr, codec in itertools.product(arrays, codecs):
|
|
42
|
+
check_encode_decode(arr, codec)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_repr():
|
|
46
|
+
check_repr("Base64()")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_eq():
|
|
50
|
+
assert Base64() == Base64()
|
|
51
|
+
assert not Base64() != Base64()
|
|
52
|
+
assert Base64() != "foo"
|
|
53
|
+
assert "foo" != Base64()
|
|
54
|
+
assert not Base64() == "foo"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_backwards_compatibility():
|
|
58
|
+
check_backwards_compatibility(Base64.codec_id, arrays, codecs)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_err_decode_object_buffer():
|
|
62
|
+
check_err_decode_object_buffer(Base64())
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_err_encode_object_buffer():
|
|
66
|
+
check_err_encode_object_buffer(Base64())
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_err_encode_list():
|
|
70
|
+
data = ["foo", "bar", "baz"]
|
|
71
|
+
for codec in codecs:
|
|
72
|
+
with pytest.raises(TypeError):
|
|
73
|
+
codec.encode(data)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def test_err_encode_non_contiguous():
|
|
77
|
+
# non-contiguous memory
|
|
78
|
+
arr = np.arange(1000, dtype="i4")[::2]
|
|
79
|
+
for codec in codecs:
|
|
80
|
+
with pytest.raises(ValueError):
|
|
81
|
+
codec.encode(arr)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pytest
|
|
3
|
+
|
|
4
|
+
from numcodecs.bitround import BitRound, max_bits
|
|
5
|
+
|
|
6
|
+
# adapted from https://github.com/milankl/BitInformation.jl/blob/main/test/round_nearest.jl
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# TODO: add other dtypes
|
|
10
|
+
@pytest.fixture(params=["float32", "float64"])
|
|
11
|
+
def dtype(request):
|
|
12
|
+
return request.param
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def round(data, keepbits):
|
|
16
|
+
codec = BitRound(keepbits=keepbits)
|
|
17
|
+
data = data.copy() # otherwise overwrites the input
|
|
18
|
+
encoded = codec.encode(data)
|
|
19
|
+
return codec.decode(encoded)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_round_zero_to_zero(dtype):
|
|
23
|
+
a = np.zeros((3, 2), dtype=dtype)
|
|
24
|
+
# Don't understand Milan's original test:
|
|
25
|
+
# How is it possible to have negative keepbits?
|
|
26
|
+
# for k in range(-5, 50):
|
|
27
|
+
for k in range(max_bits[dtype]):
|
|
28
|
+
ar = round(a, k)
|
|
29
|
+
np.testing.assert_equal(a, ar)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_round_one_to_one(dtype):
|
|
33
|
+
a = np.ones((3, 2), dtype=dtype)
|
|
34
|
+
for k in range(max_bits[dtype]):
|
|
35
|
+
ar = round(a, k)
|
|
36
|
+
np.testing.assert_equal(a, ar)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_round_minus_one_to_minus_one(dtype):
|
|
40
|
+
a = -np.ones((3, 2), dtype=dtype)
|
|
41
|
+
for k in range(max_bits[dtype]):
|
|
42
|
+
ar = round(a, k)
|
|
43
|
+
np.testing.assert_equal(a, ar)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_no_rounding(dtype):
|
|
47
|
+
a = np.random.random_sample((300, 200)).astype(dtype)
|
|
48
|
+
keepbits = max_bits[dtype]
|
|
49
|
+
ar = round(a, keepbits)
|
|
50
|
+
np.testing.assert_equal(a, ar)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
APPROX_KEEPBITS = {"float32": 11, "float64": 18}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_approx_equal(dtype):
|
|
57
|
+
a = np.random.random_sample((300, 200)).astype(dtype)
|
|
58
|
+
ar = round(a, APPROX_KEEPBITS[dtype])
|
|
59
|
+
# Mimic julia behavior - https://docs.julialang.org/en/v1/base/math/#Base.isapprox
|
|
60
|
+
rtol = np.sqrt(np.finfo(np.float32).eps)
|
|
61
|
+
# This gets us much closer but still failing for ~6% of the array
|
|
62
|
+
# It does pass if we add 1 to keepbits (11 instead of 10)
|
|
63
|
+
# Is there an off-by-one issue here?
|
|
64
|
+
np.testing.assert_allclose(a, ar, rtol=rtol)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_idempotence(dtype):
|
|
68
|
+
a = np.random.random_sample((300, 200)).astype(dtype)
|
|
69
|
+
for k in range(20):
|
|
70
|
+
ar = round(a, k)
|
|
71
|
+
ar2 = round(a, k)
|
|
72
|
+
np.testing.assert_equal(ar, ar2)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_errors():
|
|
76
|
+
with pytest.raises(ValueError):
|
|
77
|
+
BitRound(keepbits=99).encode(np.array([0], dtype="float32"))
|
|
78
|
+
with pytest.raises(TypeError):
|
|
79
|
+
BitRound(keepbits=10).encode(np.array([0]))
|
|
80
|
+
with pytest.raises(ValueError):
|
|
81
|
+
BitRound(-1)
|