numcodecs 0.16.4__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numcodecs/__init__.py +146 -0
- numcodecs/_shuffle.cpython-313-darwin.so +0 -0
- numcodecs/abc.py +126 -0
- numcodecs/astype.py +72 -0
- numcodecs/base64.py +26 -0
- numcodecs/bitround.py +80 -0
- numcodecs/blosc.cpython-313-darwin.so +0 -0
- numcodecs/bz2.py +45 -0
- numcodecs/categorize.py +98 -0
- numcodecs/checksum32.py +189 -0
- numcodecs/compat.py +206 -0
- numcodecs/compat_ext.cpython-313-darwin.so +0 -0
- numcodecs/delta.py +94 -0
- numcodecs/errors.py +26 -0
- numcodecs/fixedscaleoffset.py +130 -0
- numcodecs/fletcher32.cpython-313-darwin.so +0 -0
- numcodecs/gzip.py +50 -0
- numcodecs/jenkins.cpython-313-darwin.so +0 -0
- numcodecs/json.py +107 -0
- numcodecs/lz4.cpython-313-darwin.so +0 -0
- numcodecs/lzma.py +71 -0
- numcodecs/msgpacks.py +86 -0
- numcodecs/ndarray_like.py +65 -0
- numcodecs/packbits.py +82 -0
- numcodecs/pcodec.py +119 -0
- numcodecs/pickles.py +55 -0
- numcodecs/quantize.py +98 -0
- numcodecs/registry.py +74 -0
- numcodecs/shuffle.py +61 -0
- numcodecs/tests/__init__.py +3 -0
- numcodecs/tests/common.py +275 -0
- numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
- numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
- numcodecs/tests/test_astype.py +74 -0
- numcodecs/tests/test_base64.py +81 -0
- numcodecs/tests/test_bitround.py +81 -0
- numcodecs/tests/test_blosc.py +290 -0
- numcodecs/tests/test_bz2.py +66 -0
- numcodecs/tests/test_categorize.py +87 -0
- numcodecs/tests/test_checksum32.py +199 -0
- numcodecs/tests/test_compat.py +111 -0
- numcodecs/tests/test_delta.py +61 -0
- numcodecs/tests/test_entrypoints.py +24 -0
- numcodecs/tests/test_entrypoints_backport.py +36 -0
- numcodecs/tests/test_fixedscaleoffset.py +77 -0
- numcodecs/tests/test_fletcher32.py +56 -0
- numcodecs/tests/test_gzip.py +110 -0
- numcodecs/tests/test_jenkins.py +150 -0
- numcodecs/tests/test_json.py +85 -0
- numcodecs/tests/test_lz4.py +83 -0
- numcodecs/tests/test_lzma.py +94 -0
- numcodecs/tests/test_msgpacks.py +126 -0
- numcodecs/tests/test_ndarray_like.py +48 -0
- numcodecs/tests/test_packbits.py +39 -0
- numcodecs/tests/test_pcodec.py +90 -0
- numcodecs/tests/test_pickles.py +61 -0
- numcodecs/tests/test_pyzstd.py +76 -0
- numcodecs/tests/test_quantize.py +76 -0
- numcodecs/tests/test_registry.py +43 -0
- numcodecs/tests/test_shuffle.py +166 -0
- numcodecs/tests/test_vlen_array.py +97 -0
- numcodecs/tests/test_vlen_bytes.py +93 -0
- numcodecs/tests/test_vlen_utf8.py +91 -0
- numcodecs/tests/test_zarr3.py +48 -0
- numcodecs/tests/test_zarr3_import.py +13 -0
- numcodecs/tests/test_zfpy.py +104 -0
- numcodecs/tests/test_zlib.py +94 -0
- numcodecs/tests/test_zstd.py +189 -0
- numcodecs/version.py +34 -0
- numcodecs/vlen.cpython-313-darwin.so +0 -0
- numcodecs/zarr3.py +67 -0
- numcodecs/zfpy.py +112 -0
- numcodecs/zlib.py +42 -0
- numcodecs/zstd.cpython-313-darwin.so +0 -0
- numcodecs-0.16.4.dist-info/METADATA +67 -0
- numcodecs-0.16.4.dist-info/RECORD +87 -0
- numcodecs-0.16.4.dist-info/WHEEL +6 -0
- numcodecs-0.16.4.dist-info/licenses/LICENSE.txt +21 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSE.txt +31 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/BITSHUFFLE.txt +21 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/FASTLZ.txt +20 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/LZ4.txt +25 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/SNAPPY.txt +28 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/STDINT.txt +29 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/ZLIB-NG.txt +17 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/ZLIB.txt +22 -0
- numcodecs-0.16.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# Check Zstd against pyzstd package
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
import pyzstd
|
|
6
|
+
|
|
7
|
+
from numcodecs.zstd import Zstd
|
|
8
|
+
|
|
9
|
+
test_data = [
|
|
10
|
+
b"Hello World!",
|
|
11
|
+
np.arange(113).tobytes(),
|
|
12
|
+
np.arange(10, 15).tobytes(),
|
|
13
|
+
np.random.randint(3, 50, size=(53,), dtype=np.uint16).tobytes(),
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@pytest.mark.parametrize("input", test_data)
|
|
18
|
+
def test_pyzstd_simple(input):
|
|
19
|
+
"""
|
|
20
|
+
Test if Zstd.[decode, encode] can perform the inverse operation to
|
|
21
|
+
pyzstd.[compress, decompress] in the simple case.
|
|
22
|
+
"""
|
|
23
|
+
z = Zstd()
|
|
24
|
+
assert z.decode(pyzstd.compress(input)) == input
|
|
25
|
+
assert pyzstd.decompress(z.encode(input)) == input
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.mark.parametrize("input", test_data)
|
|
29
|
+
def test_pyzstd_simple_multiple_frames_decode(input):
|
|
30
|
+
"""
|
|
31
|
+
Test decompression of two concatenated frames of known sizes
|
|
32
|
+
|
|
33
|
+
numcodecs.zstd.Zstd currently fails because it only assesses the size of the
|
|
34
|
+
first frame. Rather, it should keep iterating through all the frames until
|
|
35
|
+
the end of the input buffer.
|
|
36
|
+
"""
|
|
37
|
+
z = Zstd()
|
|
38
|
+
assert pyzstd.decompress(pyzstd.compress(input) * 2) == input * 2
|
|
39
|
+
assert z.decode(pyzstd.compress(input) * 2) == input * 2
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@pytest.mark.parametrize("input", test_data)
|
|
43
|
+
def test_pyzstd_simple_multiple_frames_encode(input):
|
|
44
|
+
"""
|
|
45
|
+
Test if pyzstd can decompress two concatenated frames from Zstd.encode
|
|
46
|
+
"""
|
|
47
|
+
z = Zstd()
|
|
48
|
+
assert pyzstd.decompress(z.encode(input) * 2) == input * 2
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@pytest.mark.parametrize("input", test_data)
|
|
52
|
+
def test_pyzstd_streaming(input):
|
|
53
|
+
"""
|
|
54
|
+
Test if Zstd can decode a single frame and concatenated frames in streaming
|
|
55
|
+
mode where the decompressed size is not recorded in the frame header.
|
|
56
|
+
"""
|
|
57
|
+
pyzstd_c = pyzstd.ZstdCompressor()
|
|
58
|
+
pyzstd_d = pyzstd.ZstdDecompressor()
|
|
59
|
+
pyzstd_e = pyzstd.EndlessZstdDecompressor()
|
|
60
|
+
z = Zstd()
|
|
61
|
+
|
|
62
|
+
d_bytes = input
|
|
63
|
+
pyzstd_c.compress(d_bytes)
|
|
64
|
+
c_bytes = pyzstd_c.flush()
|
|
65
|
+
assert z.decode(c_bytes) == d_bytes
|
|
66
|
+
assert pyzstd_d.decompress(z.encode(d_bytes)) == d_bytes
|
|
67
|
+
|
|
68
|
+
# Test multiple streaming frames
|
|
69
|
+
assert z.decode(c_bytes * 2) == pyzstd_e.decompress(c_bytes * 2)
|
|
70
|
+
assert z.decode(c_bytes * 3) == pyzstd_e.decompress(c_bytes * 3)
|
|
71
|
+
assert z.decode(c_bytes * 4) == pyzstd_e.decompress(c_bytes * 4)
|
|
72
|
+
assert z.decode(c_bytes * 5) == pyzstd_e.decompress(c_bytes * 5)
|
|
73
|
+
assert z.decode(c_bytes * 7) == pyzstd_e.decompress(c_bytes * 7)
|
|
74
|
+
assert z.decode(c_bytes * 11) == pyzstd_e.decompress(c_bytes * 11)
|
|
75
|
+
assert z.decode(c_bytes * 13) == pyzstd_e.decompress(c_bytes * 13)
|
|
76
|
+
assert z.decode(c_bytes * 99) == pyzstd_e.decompress(c_bytes * 99)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
from numpy.testing import assert_array_almost_equal, assert_array_equal
|
|
6
|
+
|
|
7
|
+
from numcodecs.quantize import Quantize
|
|
8
|
+
from numcodecs.tests.common import (
|
|
9
|
+
check_backwards_compatibility,
|
|
10
|
+
check_config,
|
|
11
|
+
check_encode_decode,
|
|
12
|
+
check_repr,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
arrays = [
|
|
16
|
+
np.linspace(100, 200, 1000, dtype='<f8'),
|
|
17
|
+
np.random.normal(loc=0, scale=1, size=1000).astype('<f8'),
|
|
18
|
+
np.linspace(100, 200, 1000, dtype='<f8').reshape(100, 10),
|
|
19
|
+
np.linspace(100, 200, 1000, dtype='<f8').reshape(100, 10, order='F'),
|
|
20
|
+
np.linspace(100, 200, 1000, dtype='<f8').reshape(10, 10, 10),
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
codecs = [
|
|
25
|
+
Quantize(digits=-1, dtype='<f8', astype='<f2'),
|
|
26
|
+
Quantize(digits=0, dtype='<f8', astype='<f2'),
|
|
27
|
+
Quantize(digits=1, dtype='<f8', astype='<f2'),
|
|
28
|
+
Quantize(digits=5, dtype='<f8', astype='<f4'),
|
|
29
|
+
Quantize(digits=12, dtype='<f8', astype='<f8'),
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_encode_decode():
|
|
34
|
+
for arr, codec in itertools.product(arrays, codecs):
|
|
35
|
+
check_encode_decode(arr, codec, precision=codec.digits)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_encode():
|
|
39
|
+
for arr, codec in itertools.product(arrays, codecs):
|
|
40
|
+
if arr.flags.f_contiguous:
|
|
41
|
+
order = 'F'
|
|
42
|
+
else:
|
|
43
|
+
order = 'C'
|
|
44
|
+
enc = codec.encode(arr).reshape(arr.shape, order=order)
|
|
45
|
+
assert_array_almost_equal(arr, enc, decimal=codec.digits)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_decode():
|
|
49
|
+
# decode is a no-op
|
|
50
|
+
for arr, codec in itertools.product(arrays, codecs):
|
|
51
|
+
enc = codec.encode(arr)
|
|
52
|
+
dec = codec.decode(enc)
|
|
53
|
+
assert_array_equal(enc, dec)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_config():
|
|
57
|
+
for codec in codecs:
|
|
58
|
+
check_config(codec)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_repr():
|
|
62
|
+
check_repr("Quantize(digits=2, dtype='<f8', astype='<f2')")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_errors():
|
|
66
|
+
with pytest.raises(ValueError):
|
|
67
|
+
Quantize(digits=2, dtype='i4')
|
|
68
|
+
with pytest.raises(ValueError):
|
|
69
|
+
Quantize(digits=2, dtype=object)
|
|
70
|
+
with pytest.raises(ValueError):
|
|
71
|
+
Quantize(digits=2, dtype='f8', astype=object)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_backwards_compatibility():
|
|
75
|
+
precision = [codec.digits for codec in codecs]
|
|
76
|
+
check_backwards_compatibility(Quantize.codec_id, arrays, codecs, precision=precision)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
import numcodecs
|
|
6
|
+
from numcodecs.errors import UnknownCodecError
|
|
7
|
+
from numcodecs.registry import get_codec
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_registry_errors():
|
|
11
|
+
with pytest.raises(UnknownCodecError, match='foo'):
|
|
12
|
+
get_codec({'id': 'foo'})
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_get_codec_argument():
|
|
16
|
+
# Check that get_codec doesn't modify its argument.
|
|
17
|
+
arg = {"id": "json2"}
|
|
18
|
+
before = dict(arg)
|
|
19
|
+
get_codec(arg)
|
|
20
|
+
assert before == arg
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_all_classes_registered():
|
|
24
|
+
"""
|
|
25
|
+
find all Codec subclasses in this repository and check that they
|
|
26
|
+
have been registered.
|
|
27
|
+
|
|
28
|
+
see #346 for more info
|
|
29
|
+
"""
|
|
30
|
+
missing = {
|
|
31
|
+
obj.codec_id
|
|
32
|
+
for _, submod in inspect.getmembers(numcodecs, inspect.ismodule)
|
|
33
|
+
for _, obj in inspect.getmembers(submod)
|
|
34
|
+
if (
|
|
35
|
+
inspect.isclass(obj)
|
|
36
|
+
and issubclass(obj, numcodecs.abc.Codec)
|
|
37
|
+
and obj.codec_id not in numcodecs.registry.codec_registry
|
|
38
|
+
and obj.codec_id is not None # remove `None`
|
|
39
|
+
)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if missing:
|
|
43
|
+
raise Exception(f"these codecs are missing: {missing}") # pragma: no cover
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
from multiprocessing import Pool
|
|
2
|
+
from multiprocessing.pool import ThreadPool
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from numcodecs.shuffle import Shuffle
|
|
9
|
+
except ImportError: # pragma: no cover
|
|
10
|
+
pytest.skip("numcodecs.shuffle not available", allow_module_level=True)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
from numcodecs.tests.common import (
|
|
14
|
+
check_backwards_compatibility,
|
|
15
|
+
check_config,
|
|
16
|
+
check_encode_decode,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
codecs = [
|
|
20
|
+
Shuffle(),
|
|
21
|
+
Shuffle(elementsize=0),
|
|
22
|
+
Shuffle(elementsize=4),
|
|
23
|
+
Shuffle(elementsize=8),
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# mix of dtypes: integer, float, bool, string
|
|
28
|
+
# mix of shapes: 1D, 2D, 3D
|
|
29
|
+
# mix of orders: C, F
|
|
30
|
+
arrays = [
|
|
31
|
+
np.arange(1000, dtype='i4'),
|
|
32
|
+
np.linspace(1000, 1001, 1000, dtype='f8'),
|
|
33
|
+
np.random.normal(loc=1000, scale=1, size=(100, 10)),
|
|
34
|
+
np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'),
|
|
35
|
+
np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10),
|
|
36
|
+
np.random.randint(0, 2**60, size=1000, dtype='u8').view('M8[ns]'),
|
|
37
|
+
np.random.randint(0, 2**60, size=1000, dtype='u8').view('m8[ns]'),
|
|
38
|
+
np.random.randint(0, 2**25, size=1000, dtype='u8').view('M8[m]'),
|
|
39
|
+
np.random.randint(0, 2**25, size=1000, dtype='u8').view('m8[m]'),
|
|
40
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[ns]'),
|
|
41
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[ns]'),
|
|
42
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('M8[m]'),
|
|
43
|
+
np.random.randint(-(2**63), -(2**63) + 20, size=1000, dtype='i8').view('m8[m]'),
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.mark.parametrize('array', arrays)
|
|
48
|
+
@pytest.mark.parametrize('codec', codecs)
|
|
49
|
+
def test_encode_decode(array, codec):
|
|
50
|
+
check_encode_decode(array, codec)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_config():
|
|
54
|
+
codec = Shuffle()
|
|
55
|
+
check_config(codec)
|
|
56
|
+
codec = Shuffle(elementsize=8)
|
|
57
|
+
check_config(codec)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_repr():
|
|
61
|
+
expect = "Shuffle(elementsize=0)"
|
|
62
|
+
actual = repr(Shuffle(elementsize=0))
|
|
63
|
+
assert expect == actual
|
|
64
|
+
expect = "Shuffle(elementsize=4)"
|
|
65
|
+
actual = repr(Shuffle(elementsize=4))
|
|
66
|
+
assert expect == actual
|
|
67
|
+
expect = "Shuffle(elementsize=8)"
|
|
68
|
+
actual = repr(Shuffle(elementsize=8))
|
|
69
|
+
assert expect == actual
|
|
70
|
+
expect = "Shuffle(elementsize=16)"
|
|
71
|
+
actual = repr(Shuffle(elementsize=16))
|
|
72
|
+
assert expect == actual
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_eq():
|
|
76
|
+
assert Shuffle() == Shuffle()
|
|
77
|
+
assert Shuffle(elementsize=16) != Shuffle()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _encode_worker(data):
|
|
81
|
+
compressor = Shuffle()
|
|
82
|
+
return compressor.encode(data)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _decode_worker(enc):
|
|
86
|
+
compressor = Shuffle()
|
|
87
|
+
return compressor.decode(enc)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@pytest.mark.parametrize('pool', [Pool, ThreadPool])
|
|
91
|
+
def test_multiprocessing(pool):
|
|
92
|
+
data = np.arange(1000000)
|
|
93
|
+
enc = _encode_worker(data)
|
|
94
|
+
|
|
95
|
+
pool = pool(5)
|
|
96
|
+
|
|
97
|
+
# test with process pool and thread pool
|
|
98
|
+
|
|
99
|
+
# test encoding
|
|
100
|
+
enc_results = pool.map(_encode_worker, [data] * 5)
|
|
101
|
+
assert all(len(enc) == len(e) for e in enc_results)
|
|
102
|
+
|
|
103
|
+
# test decoding
|
|
104
|
+
dec_results = pool.map(_decode_worker, [enc] * 5)
|
|
105
|
+
assert all(data.nbytes == len(d) for d in dec_results)
|
|
106
|
+
|
|
107
|
+
# tidy up
|
|
108
|
+
pool.close()
|
|
109
|
+
pool.join()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test_backwards_compatibility():
|
|
113
|
+
check_backwards_compatibility(Shuffle.codec_id, arrays, codecs)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
# def test_err_decode_object_buffer():
|
|
117
|
+
# check_err_decode_object_buffer(Shuffle())
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# def test_err_encode_object_buffer():
|
|
121
|
+
# check_err_encode_object_buffer(Shuffle())
|
|
122
|
+
|
|
123
|
+
# def test_decompression_error_handling():
|
|
124
|
+
# for codec in codecs:
|
|
125
|
+
# with pytest.raises(RuntimeError):
|
|
126
|
+
# codec.decode(bytearray())
|
|
127
|
+
# with pytest.raises(RuntimeError):
|
|
128
|
+
# codec.decode(bytearray(0))
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def test_expected_result():
|
|
132
|
+
# If the input is treated as a 2D byte array, with shape (size of element, number of elements),
|
|
133
|
+
# the shuffle is essentially a transpose. This can be made more apparent by using an array of
|
|
134
|
+
# big-endian integers, as below.
|
|
135
|
+
arr = np.array(
|
|
136
|
+
[
|
|
137
|
+
0x0001020304050607,
|
|
138
|
+
0x08090A0B0C0D0E0F,
|
|
139
|
+
0x1011121314151617,
|
|
140
|
+
0x18191A1B1C1D1E1F,
|
|
141
|
+
],
|
|
142
|
+
dtype='>u8',
|
|
143
|
+
)
|
|
144
|
+
expected = np.array(
|
|
145
|
+
[
|
|
146
|
+
0x00081018,
|
|
147
|
+
0x01091119,
|
|
148
|
+
0x020A121A,
|
|
149
|
+
0x030B131B,
|
|
150
|
+
0x040C141C,
|
|
151
|
+
0x050D151D,
|
|
152
|
+
0x060E161E,
|
|
153
|
+
0x070F171F,
|
|
154
|
+
],
|
|
155
|
+
dtype='u4',
|
|
156
|
+
)
|
|
157
|
+
codec = Shuffle(elementsize=arr.data.itemsize)
|
|
158
|
+
enc = codec.encode(arr)
|
|
159
|
+
np.testing.assert_array_equal(np.frombuffer(enc.data, '>u4'), expected)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def test_incompatible_elementsize():
|
|
163
|
+
arr = np.arange(1001, dtype='u1')
|
|
164
|
+
codec = Shuffle(elementsize=4)
|
|
165
|
+
with pytest.raises(ValueError):
|
|
166
|
+
codec.encode(arr)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from numcodecs.vlen import VLenArray
|
|
8
|
+
except ImportError as e: # pragma: no cover
|
|
9
|
+
raise unittest.SkipTest("vlen-array not available") from e
|
|
10
|
+
from numcodecs.tests.common import (
|
|
11
|
+
assert_array_items_equal,
|
|
12
|
+
check_backwards_compatibility,
|
|
13
|
+
check_config,
|
|
14
|
+
check_encode_decode_array,
|
|
15
|
+
check_repr,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
arrays = [
|
|
19
|
+
np.array([np.array([1, 2, 3]), np.array([4]), np.array([5, 6])] * 300, dtype=object),
|
|
20
|
+
np.array([np.array([1, 2, 3]), np.array([4]), np.array([5, 6])] * 300, dtype=object).reshape(
|
|
21
|
+
90, 10
|
|
22
|
+
),
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
codecs = [
|
|
27
|
+
VLenArray('<i1'),
|
|
28
|
+
VLenArray('<i2'),
|
|
29
|
+
VLenArray('<i4'),
|
|
30
|
+
VLenArray('<i8'),
|
|
31
|
+
VLenArray('<u1'),
|
|
32
|
+
VLenArray('<u2'),
|
|
33
|
+
VLenArray('<u4'),
|
|
34
|
+
VLenArray('<u8'),
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_encode_decode():
|
|
39
|
+
for arr in arrays:
|
|
40
|
+
for codec in codecs:
|
|
41
|
+
check_encode_decode_array(arr, codec)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_config():
|
|
45
|
+
codec = VLenArray('<i8')
|
|
46
|
+
check_config(codec)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_repr():
|
|
50
|
+
check_repr("VLenArray(dtype='<i8')")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_backwards_compatibility():
|
|
54
|
+
check_backwards_compatibility(VLenArray.codec_id, arrays, codecs)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_encode_errors():
|
|
58
|
+
codec = VLenArray('<i8')
|
|
59
|
+
with pytest.raises(ValueError):
|
|
60
|
+
codec.encode('foo')
|
|
61
|
+
with pytest.raises(ValueError):
|
|
62
|
+
codec.encode(['foo', 'bar'])
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_decode_errors():
|
|
66
|
+
codec = VLenArray('<i8')
|
|
67
|
+
with pytest.raises(TypeError):
|
|
68
|
+
codec.decode(1234)
|
|
69
|
+
# these should look like corrupt data
|
|
70
|
+
with pytest.raises(ValueError):
|
|
71
|
+
codec.decode(b'foo')
|
|
72
|
+
with pytest.raises(ValueError):
|
|
73
|
+
codec.decode(np.arange(2, 3, dtype='i4'))
|
|
74
|
+
with pytest.raises(ValueError):
|
|
75
|
+
codec.decode(np.arange(10, 20, dtype='i4'))
|
|
76
|
+
with pytest.raises(TypeError):
|
|
77
|
+
codec.decode('foo')
|
|
78
|
+
|
|
79
|
+
# test out parameter
|
|
80
|
+
enc = codec.encode(arrays[0])
|
|
81
|
+
with pytest.raises(TypeError):
|
|
82
|
+
codec.decode(enc, out=b'foo')
|
|
83
|
+
with pytest.raises(TypeError):
|
|
84
|
+
codec.decode(enc, out='foo')
|
|
85
|
+
with pytest.raises(TypeError):
|
|
86
|
+
codec.decode(enc, out=123)
|
|
87
|
+
with pytest.raises(ValueError):
|
|
88
|
+
codec.decode(enc, out=np.zeros(10, dtype='i4'))
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_encode_none():
|
|
92
|
+
a = np.array([[1, 3], None, [4, 7]], dtype=object)
|
|
93
|
+
codec = VLenArray(int)
|
|
94
|
+
enc = codec.encode(a)
|
|
95
|
+
dec = codec.decode(enc)
|
|
96
|
+
expect = np.array([np.array([1, 3]), np.array([]), np.array([4, 7])], dtype=object)
|
|
97
|
+
assert_array_items_equal(expect, dec)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from numcodecs.vlen import VLenBytes
|
|
8
|
+
except ImportError as e: # pragma: no cover
|
|
9
|
+
raise unittest.SkipTest("vlen-bytes not available") from e
|
|
10
|
+
from numcodecs.tests.common import (
|
|
11
|
+
assert_array_items_equal,
|
|
12
|
+
check_backwards_compatibility,
|
|
13
|
+
check_config,
|
|
14
|
+
check_encode_decode_array,
|
|
15
|
+
check_repr,
|
|
16
|
+
greetings,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
greetings_bytes = [g.encode('utf-8') for g in greetings]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
arrays = [
|
|
23
|
+
np.array([b'foo', b'bar', b'baz'] * 300, dtype=object),
|
|
24
|
+
np.array(greetings_bytes * 100, dtype=object),
|
|
25
|
+
np.array([b'foo', b'bar', b'baz'] * 300, dtype=object).reshape(90, 10),
|
|
26
|
+
np.array(greetings_bytes * 1000, dtype=object).reshape(
|
|
27
|
+
len(greetings_bytes), 100, 10, order='F'
|
|
28
|
+
),
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_encode_decode():
|
|
33
|
+
for arr in arrays:
|
|
34
|
+
codec = VLenBytes()
|
|
35
|
+
check_encode_decode_array(arr, codec)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_config():
|
|
39
|
+
codec = VLenBytes()
|
|
40
|
+
check_config(codec)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_repr():
|
|
44
|
+
check_repr("VLenBytes()")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_backwards_compatibility():
|
|
48
|
+
check_backwards_compatibility(VLenBytes.codec_id, arrays, [VLenBytes()])
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_encode_errors():
|
|
52
|
+
codec = VLenBytes()
|
|
53
|
+
with pytest.raises(TypeError):
|
|
54
|
+
codec.encode(1234)
|
|
55
|
+
with pytest.raises(TypeError):
|
|
56
|
+
codec.encode([1234, 5678])
|
|
57
|
+
with pytest.raises(TypeError):
|
|
58
|
+
codec.encode(np.ones(10, dtype='i4'))
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_decode_errors():
|
|
62
|
+
codec = VLenBytes()
|
|
63
|
+
with pytest.raises(TypeError):
|
|
64
|
+
codec.decode(1234)
|
|
65
|
+
# these should look like corrupt data
|
|
66
|
+
with pytest.raises(ValueError):
|
|
67
|
+
codec.decode(b'foo')
|
|
68
|
+
with pytest.raises(ValueError):
|
|
69
|
+
codec.decode(np.arange(2, 3, dtype='i4'))
|
|
70
|
+
with pytest.raises(ValueError):
|
|
71
|
+
codec.decode(np.arange(10, 20, dtype='i4'))
|
|
72
|
+
with pytest.raises(TypeError):
|
|
73
|
+
codec.decode('foo')
|
|
74
|
+
|
|
75
|
+
# test out parameter
|
|
76
|
+
enc = codec.encode(arrays[0])
|
|
77
|
+
with pytest.raises(TypeError):
|
|
78
|
+
codec.decode(enc, out=b'foo')
|
|
79
|
+
with pytest.raises(TypeError):
|
|
80
|
+
codec.decode(enc, out='foo')
|
|
81
|
+
with pytest.raises(TypeError):
|
|
82
|
+
codec.decode(enc, out=123)
|
|
83
|
+
with pytest.raises(ValueError):
|
|
84
|
+
codec.decode(enc, out=np.zeros(10, dtype='i4'))
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_encode_none():
|
|
88
|
+
a = np.array([b'foo', None, b'bar'], dtype=object)
|
|
89
|
+
codec = VLenBytes()
|
|
90
|
+
enc = codec.encode(a)
|
|
91
|
+
dec = codec.decode(enc)
|
|
92
|
+
expect = np.array([b'foo', b'', b'bar'], dtype=object)
|
|
93
|
+
assert_array_items_equal(expect, dec)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from numcodecs.vlen import VLenUTF8
|
|
8
|
+
except ImportError as e: # pragma: no cover
|
|
9
|
+
raise unittest.SkipTest("vlen-utf8 not available") from e
|
|
10
|
+
from numcodecs.tests.common import (
|
|
11
|
+
assert_array_items_equal,
|
|
12
|
+
check_backwards_compatibility,
|
|
13
|
+
check_config,
|
|
14
|
+
check_encode_decode_array,
|
|
15
|
+
check_repr,
|
|
16
|
+
greetings,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
arrays = [
|
|
20
|
+
np.array(['foo', 'bar', 'baz'] * 300, dtype=object),
|
|
21
|
+
np.array(greetings * 100, dtype=object),
|
|
22
|
+
np.array(['foo', 'bar', 'baz'] * 300, dtype=object).reshape(90, 10),
|
|
23
|
+
np.array(greetings * 1000, dtype=object).reshape(len(greetings), 100, 10, order='F'),
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_encode_decode():
|
|
28
|
+
for arr in arrays:
|
|
29
|
+
codec = VLenUTF8()
|
|
30
|
+
check_encode_decode_array(arr, codec)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_config():
|
|
34
|
+
codec = VLenUTF8()
|
|
35
|
+
check_config(codec)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_repr():
|
|
39
|
+
check_repr("VLenUTF8()")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_backwards_compatibility():
|
|
43
|
+
check_backwards_compatibility(VLenUTF8.codec_id, arrays, [VLenUTF8()])
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_encode_errors():
|
|
47
|
+
codec = VLenUTF8()
|
|
48
|
+
with pytest.raises(TypeError):
|
|
49
|
+
codec.encode(1234)
|
|
50
|
+
with pytest.raises(TypeError):
|
|
51
|
+
codec.encode([1234, 5678])
|
|
52
|
+
with pytest.raises(TypeError):
|
|
53
|
+
codec.encode(np.ones(10, dtype='i4'))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_decode_errors():
|
|
57
|
+
codec = VLenUTF8()
|
|
58
|
+
with pytest.raises(TypeError):
|
|
59
|
+
codec.decode(1234)
|
|
60
|
+
# these should look like corrupt data
|
|
61
|
+
with pytest.raises(ValueError):
|
|
62
|
+
codec.decode(b'foo')
|
|
63
|
+
with pytest.raises(ValueError):
|
|
64
|
+
codec.decode(np.arange(2, 3, dtype='i4'))
|
|
65
|
+
with pytest.raises(ValueError):
|
|
66
|
+
codec.decode(np.arange(10, 20, dtype='i4'))
|
|
67
|
+
with pytest.raises(TypeError):
|
|
68
|
+
codec.decode('foo')
|
|
69
|
+
|
|
70
|
+
# test out parameter
|
|
71
|
+
enc = codec.encode(arrays[0])
|
|
72
|
+
with pytest.raises(TypeError):
|
|
73
|
+
codec.decode(enc, out=b'foo')
|
|
74
|
+
with pytest.raises(TypeError):
|
|
75
|
+
codec.decode(enc, out='foo')
|
|
76
|
+
with pytest.raises(TypeError):
|
|
77
|
+
codec.decode(enc, out=123)
|
|
78
|
+
with pytest.raises(ValueError):
|
|
79
|
+
codec.decode(enc, out=np.zeros(10, dtype='i4'))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@pytest.mark.parametrize("writable", [True, False])
|
|
83
|
+
def test_encode_utf8(writable):
|
|
84
|
+
a = np.array(['foo', None, 'bar'], dtype=object)
|
|
85
|
+
if not writable:
|
|
86
|
+
a.setflags(write=False)
|
|
87
|
+
codec = VLenUTF8()
|
|
88
|
+
enc = codec.encode(a)
|
|
89
|
+
dec = codec.decode(enc)
|
|
90
|
+
expect = np.array(['foo', '', 'bar'], dtype=object)
|
|
91
|
+
assert_array_items_equal(expect, dec)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
8
|
+
import zarr
|
|
9
|
+
else:
|
|
10
|
+
zarr = pytest.importorskip("zarr", "3.1.3")
|
|
11
|
+
|
|
12
|
+
import numcodecs.zarr3 as zarr3
|
|
13
|
+
|
|
14
|
+
codec_names = [
|
|
15
|
+
"BZ2",
|
|
16
|
+
"CRC32",
|
|
17
|
+
"CRC32C",
|
|
18
|
+
"LZ4",
|
|
19
|
+
"LZMA",
|
|
20
|
+
"ZFPY",
|
|
21
|
+
"Adler32",
|
|
22
|
+
"AsType",
|
|
23
|
+
"BitRound",
|
|
24
|
+
"Blosc",
|
|
25
|
+
"Delta",
|
|
26
|
+
"FixedScaleOffset",
|
|
27
|
+
"Fletcher32",
|
|
28
|
+
"GZip",
|
|
29
|
+
"JenkinsLookup3",
|
|
30
|
+
"PCodec",
|
|
31
|
+
"PackBits",
|
|
32
|
+
"Quantize",
|
|
33
|
+
"Shuffle",
|
|
34
|
+
"Zlib",
|
|
35
|
+
"Zstd",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@pytest.mark.parametrize('codec_name', codec_names)
|
|
40
|
+
def test_export(codec_name: str) -> None:
|
|
41
|
+
"""
|
|
42
|
+
Ensure that numcodecs.zarr3 re-exports codecs defined in zarr.codecs.numcodecs
|
|
43
|
+
"""
|
|
44
|
+
with pytest.warns(
|
|
45
|
+
DeprecationWarning,
|
|
46
|
+
match="The numcodecs.zarr3 module is deprecated and will be removed in a future release of numcodecs. ",
|
|
47
|
+
):
|
|
48
|
+
assert getattr(zarr3, codec_name) == getattr(zarr.codecs.numcodecs, codec_name)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_zarr3_import():
|
|
7
|
+
ERROR_MESSAGE_MATCH = "Zarr 3.1.3 or later*"
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import zarr # noqa: F401
|
|
11
|
+
except ImportError: # pragma: no cover
|
|
12
|
+
with pytest.raises(ImportError, match=ERROR_MESSAGE_MATCH):
|
|
13
|
+
import numcodecs.zarr3 # noqa: F401
|