numcodecs 0.13.1__cp313-cp313-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numcodecs might be problematic. Click here for more details.
- numcodecs/__init__.py +143 -0
- numcodecs/_shuffle.cpython-313-darwin.so +0 -0
- numcodecs/abc.py +126 -0
- numcodecs/astype.py +76 -0
- numcodecs/base64.py +27 -0
- numcodecs/bitround.py +79 -0
- numcodecs/blosc.cpython-313-darwin.so +0 -0
- numcodecs/bz2.py +45 -0
- numcodecs/categorize.py +101 -0
- numcodecs/checksum32.py +94 -0
- numcodecs/compat.py +208 -0
- numcodecs/compat_ext.cpython-313-darwin.so +0 -0
- numcodecs/delta.py +97 -0
- numcodecs/fixedscaleoffset.py +132 -0
- numcodecs/fletcher32.cpython-313-darwin.so +0 -0
- numcodecs/gzip.py +52 -0
- numcodecs/jenkins.cpython-313-darwin.so +0 -0
- numcodecs/json.py +107 -0
- numcodecs/lz4.cpython-313-darwin.so +0 -0
- numcodecs/lzma.py +69 -0
- numcodecs/msgpacks.py +86 -0
- numcodecs/ndarray_like.py +65 -0
- numcodecs/packbits.py +85 -0
- numcodecs/pcodec.py +89 -0
- numcodecs/pickles.py +55 -0
- numcodecs/quantize.py +100 -0
- numcodecs/registry.py +72 -0
- numcodecs/shuffle.py +61 -0
- numcodecs/tests/__init__.py +3 -0
- numcodecs/tests/common.py +354 -0
- numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
- numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
- numcodecs/tests/test_astype.py +74 -0
- numcodecs/tests/test_base64.py +81 -0
- numcodecs/tests/test_bitround.py +81 -0
- numcodecs/tests/test_blosc.py +277 -0
- numcodecs/tests/test_bz2.py +66 -0
- numcodecs/tests/test_categorize.py +87 -0
- numcodecs/tests/test_checksum32.py +58 -0
- numcodecs/tests/test_compat.py +108 -0
- numcodecs/tests/test_delta.py +60 -0
- numcodecs/tests/test_entrypoints.py +24 -0
- numcodecs/tests/test_entrypoints_backport.py +35 -0
- numcodecs/tests/test_fixedscaleoffset.py +69 -0
- numcodecs/tests/test_fletcher32.py +56 -0
- numcodecs/tests/test_gzip.py +110 -0
- numcodecs/tests/test_jenkins.py +150 -0
- numcodecs/tests/test_json.py +85 -0
- numcodecs/tests/test_lz4.py +83 -0
- numcodecs/tests/test_lzma.py +90 -0
- numcodecs/tests/test_msgpacks.py +123 -0
- numcodecs/tests/test_ndarray_like.py +48 -0
- numcodecs/tests/test_packbits.py +39 -0
- numcodecs/tests/test_pcodec.py +80 -0
- numcodecs/tests/test_pickles.py +61 -0
- numcodecs/tests/test_quantize.py +76 -0
- numcodecs/tests/test_registry.py +40 -0
- numcodecs/tests/test_shuffle.py +168 -0
- numcodecs/tests/test_vlen_array.py +97 -0
- numcodecs/tests/test_vlen_bytes.py +93 -0
- numcodecs/tests/test_vlen_utf8.py +91 -0
- numcodecs/tests/test_zfpy.py +98 -0
- numcodecs/tests/test_zlib.py +94 -0
- numcodecs/tests/test_zstd.py +92 -0
- numcodecs/version.py +16 -0
- numcodecs/vlen.cpython-313-darwin.so +0 -0
- numcodecs/zfpy.py +111 -0
- numcodecs/zlib.py +42 -0
- numcodecs/zstd.cpython-313-darwin.so +0 -0
- numcodecs-0.13.1.dist-info/LICENSE.txt +21 -0
- numcodecs-0.13.1.dist-info/METADATA +64 -0
- numcodecs-0.13.1.dist-info/RECORD +74 -0
- numcodecs-0.13.1.dist-info/WHEEL +5 -0
- numcodecs-0.13.1.dist-info/top_level.txt +1 -0
numcodecs/msgpacks.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import msgpack
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from .abc import Codec
|
|
5
|
+
from .compat import ensure_contiguous_ndarray
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MsgPack(Codec):
|
|
9
|
+
"""Codec to encode data as msgpacked bytes. Useful for encoding an array of Python
|
|
10
|
+
objects.
|
|
11
|
+
|
|
12
|
+
.. versionchanged:: 0.6
|
|
13
|
+
The encoding format has been changed to include the array shape in the encoded
|
|
14
|
+
data, which ensures that all object arrays can be correctly encoded and decoded.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
use_single_float : bool, optional
|
|
19
|
+
Use single precision float type for float.
|
|
20
|
+
use_bin_type : bool, optional
|
|
21
|
+
Use bin type introduced in msgpack spec 2.0 for bytes. It also enables str8 type
|
|
22
|
+
for unicode.
|
|
23
|
+
raw : bool, optional
|
|
24
|
+
If true, unpack msgpack raw to Python bytes. Otherwise, unpack to Python str
|
|
25
|
+
by decoding with UTF-8 encoding.
|
|
26
|
+
|
|
27
|
+
Examples
|
|
28
|
+
--------
|
|
29
|
+
>>> import numcodecs
|
|
30
|
+
>>> import numpy as np
|
|
31
|
+
>>> x = np.array(['foo', 'bar', 'baz'], dtype='object')
|
|
32
|
+
>>> codec = numcodecs.MsgPack()
|
|
33
|
+
>>> codec.decode(codec.encode(x))
|
|
34
|
+
array(['foo', 'bar', 'baz'], dtype=object)
|
|
35
|
+
|
|
36
|
+
See Also
|
|
37
|
+
--------
|
|
38
|
+
numcodecs.pickles.Pickle, numcodecs.json.JSON, numcodecs.vlen.VLenUTF8
|
|
39
|
+
|
|
40
|
+
Notes
|
|
41
|
+
-----
|
|
42
|
+
Requires `msgpack <https://pypi.org/project/msgpack/>`_ to be installed.
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
codec_id = 'msgpack2'
|
|
47
|
+
|
|
48
|
+
def __init__(self, use_single_float=False, use_bin_type=True, raw=False):
|
|
49
|
+
self.use_single_float = use_single_float
|
|
50
|
+
self.use_bin_type = use_bin_type
|
|
51
|
+
self.raw = raw
|
|
52
|
+
|
|
53
|
+
def encode(self, buf):
|
|
54
|
+
try:
|
|
55
|
+
buf = np.asarray(buf)
|
|
56
|
+
except ValueError:
|
|
57
|
+
buf = np.asarray(buf, dtype=object)
|
|
58
|
+
items = buf.tolist()
|
|
59
|
+
items.extend((buf.dtype.str, buf.shape))
|
|
60
|
+
return msgpack.packb(
|
|
61
|
+
items,
|
|
62
|
+
use_bin_type=self.use_bin_type,
|
|
63
|
+
use_single_float=self.use_single_float,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def decode(self, buf, out=None):
|
|
67
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
68
|
+
items = msgpack.unpackb(buf, raw=self.raw)
|
|
69
|
+
dec = np.empty(items[-1], dtype=items[-2])
|
|
70
|
+
dec[:] = items[:-2]
|
|
71
|
+
if out is not None:
|
|
72
|
+
np.copyto(out, dec)
|
|
73
|
+
return out
|
|
74
|
+
else:
|
|
75
|
+
return dec
|
|
76
|
+
|
|
77
|
+
def get_config(self):
|
|
78
|
+
return dict(
|
|
79
|
+
id=self.codec_id,
|
|
80
|
+
raw=self.raw,
|
|
81
|
+
use_single_float=self.use_single_float,
|
|
82
|
+
use_bin_type=self.use_bin_type,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def __repr__(self):
|
|
86
|
+
return f'MsgPack(raw={self.raw!r}, use_bin_type={self.use_bin_type!r}, use_single_float={self.use_single_float!r})'
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import Any, ClassVar, Optional, Protocol, runtime_checkable
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class _CachedProtocolMeta(Protocol.__class__):
|
|
5
|
+
"""Custom implementation of @runtime_checkable
|
|
6
|
+
|
|
7
|
+
The native implementation of @runtime_checkable is slow,
|
|
8
|
+
see <https://github.com/zarr-developers/numcodecs/issues/379>.
|
|
9
|
+
|
|
10
|
+
This metaclass keeps an unbounded cache of the result of
|
|
11
|
+
isinstance checks using the object's class as the cache key.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
_instancecheck_cache: ClassVar[dict[tuple[type, type], bool]] = {}
|
|
15
|
+
|
|
16
|
+
def __instancecheck__(self, instance):
|
|
17
|
+
key = (self, instance.__class__)
|
|
18
|
+
ret = self._instancecheck_cache.get(key)
|
|
19
|
+
if ret is None:
|
|
20
|
+
ret = super().__instancecheck__(instance)
|
|
21
|
+
self._instancecheck_cache[key] = ret
|
|
22
|
+
return ret
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@runtime_checkable
|
|
26
|
+
class DType(Protocol, metaclass=_CachedProtocolMeta):
|
|
27
|
+
itemsize: int
|
|
28
|
+
name: str
|
|
29
|
+
kind: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@runtime_checkable
|
|
33
|
+
class FlagsObj(Protocol, metaclass=_CachedProtocolMeta):
|
|
34
|
+
c_contiguous: bool
|
|
35
|
+
f_contiguous: bool
|
|
36
|
+
owndata: bool
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@runtime_checkable
|
|
40
|
+
class NDArrayLike(Protocol, metaclass=_CachedProtocolMeta):
|
|
41
|
+
dtype: DType
|
|
42
|
+
shape: tuple[int, ...]
|
|
43
|
+
strides: tuple[int, ...]
|
|
44
|
+
ndim: int
|
|
45
|
+
size: int
|
|
46
|
+
itemsize: int
|
|
47
|
+
nbytes: int
|
|
48
|
+
flags: FlagsObj
|
|
49
|
+
|
|
50
|
+
def __len__(self) -> int: ... # pragma: no cover
|
|
51
|
+
|
|
52
|
+
def __getitem__(self, key) -> Any: ... # pragma: no cover
|
|
53
|
+
|
|
54
|
+
def __setitem__(self, key, value): ... # pragma: no cover
|
|
55
|
+
|
|
56
|
+
def tobytes(self, order: Optional[str] = ...) -> bytes: ... # pragma: no cover
|
|
57
|
+
|
|
58
|
+
def reshape(self, *shape: int, order: str = ...) -> "NDArrayLike": ... # pragma: no cover
|
|
59
|
+
|
|
60
|
+
def view(self, dtype: DType = ...) -> "NDArrayLike": ... # pragma: no cover
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def is_ndarray_like(obj: object) -> bool:
|
|
64
|
+
"""Return True when `obj` is ndarray-like"""
|
|
65
|
+
return isinstance(obj, NDArrayLike)
|
numcodecs/packbits.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .abc import Codec
|
|
4
|
+
from .compat import ensure_ndarray, ndarray_copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PackBits(Codec):
|
|
8
|
+
"""Codec to pack elements of a boolean array into bits in a uint8 array.
|
|
9
|
+
|
|
10
|
+
Examples
|
|
11
|
+
--------
|
|
12
|
+
>>> import numcodecs
|
|
13
|
+
>>> import numpy as np
|
|
14
|
+
>>> codec = numcodecs.PackBits()
|
|
15
|
+
>>> x = np.array([True, False, False, True], dtype=bool)
|
|
16
|
+
>>> y = codec.encode(x)
|
|
17
|
+
>>> y
|
|
18
|
+
array([ 4, 144], dtype=uint8)
|
|
19
|
+
>>> z = codec.decode(y)
|
|
20
|
+
>>> z
|
|
21
|
+
array([ True, False, False, True])
|
|
22
|
+
|
|
23
|
+
Notes
|
|
24
|
+
-----
|
|
25
|
+
The first element of the encoded array stores the number of bits that
|
|
26
|
+
were padded to complete the final byte.
|
|
27
|
+
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
codec_id = 'packbits'
|
|
31
|
+
|
|
32
|
+
def __init__(self):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
def encode(self, buf):
|
|
36
|
+
# normalise input
|
|
37
|
+
arr = ensure_ndarray(buf).view(bool)
|
|
38
|
+
|
|
39
|
+
# flatten to simplify implementation
|
|
40
|
+
arr = arr.reshape(-1, order='A')
|
|
41
|
+
|
|
42
|
+
# determine size of packed data
|
|
43
|
+
n = arr.size
|
|
44
|
+
n_bytes_packed = n // 8
|
|
45
|
+
n_bits_leftover = n % 8
|
|
46
|
+
if n_bits_leftover > 0:
|
|
47
|
+
n_bytes_packed += 1
|
|
48
|
+
|
|
49
|
+
# setup output
|
|
50
|
+
enc = np.empty(n_bytes_packed + 1, dtype='u1')
|
|
51
|
+
|
|
52
|
+
# store how many bits were padded
|
|
53
|
+
if n_bits_leftover:
|
|
54
|
+
n_bits_padded = 8 - n_bits_leftover
|
|
55
|
+
else:
|
|
56
|
+
n_bits_padded = 0
|
|
57
|
+
enc[0] = n_bits_padded
|
|
58
|
+
|
|
59
|
+
# apply encoding
|
|
60
|
+
enc[1:] = np.packbits(arr)
|
|
61
|
+
|
|
62
|
+
return enc
|
|
63
|
+
|
|
64
|
+
def decode(self, buf, out=None):
|
|
65
|
+
# normalise input
|
|
66
|
+
enc = ensure_ndarray(buf).view('u1')
|
|
67
|
+
|
|
68
|
+
# flatten to simplify implementation
|
|
69
|
+
enc = enc.reshape(-1, order='A')
|
|
70
|
+
|
|
71
|
+
# find out how many bits were padded
|
|
72
|
+
n_bits_padded = int(enc[0])
|
|
73
|
+
|
|
74
|
+
# apply decoding
|
|
75
|
+
dec = np.unpackbits(enc[1:])
|
|
76
|
+
|
|
77
|
+
# remove padded bits
|
|
78
|
+
if n_bits_padded:
|
|
79
|
+
dec = dec[:-n_bits_padded]
|
|
80
|
+
|
|
81
|
+
# view as boolean array
|
|
82
|
+
dec = dec.view(bool)
|
|
83
|
+
|
|
84
|
+
# handle destination
|
|
85
|
+
return ndarray_copy(dec, out)
|
numcodecs/pcodec.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from typing import Literal, Optional
|
|
2
|
+
|
|
3
|
+
import numcodecs
|
|
4
|
+
import numcodecs.abc
|
|
5
|
+
from numcodecs.compat import ensure_contiguous_ndarray
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone
|
|
9
|
+
except ImportError: # pragma: no cover
|
|
10
|
+
standalone = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
DEFAULT_MAX_PAGE_N = 262144
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PCodec(numcodecs.abc.Codec):
|
|
17
|
+
"""
|
|
18
|
+
PCodec (or pco, pronounced "pico") losslessly compresses and decompresses
|
|
19
|
+
numerical sequences with high compression ratio and fast speed.
|
|
20
|
+
|
|
21
|
+
See `PCodec Repo <https://github.com/mwlon/pcodec>`_ for more information.
|
|
22
|
+
|
|
23
|
+
PCodec supports only the following numerical dtypes: uint16, uint32, uint64,
|
|
24
|
+
int16, int32, int64, float16, float32, and float64.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
level : int
|
|
29
|
+
A compression level from 0-12, where 12 take the longest and compresses
|
|
30
|
+
the most.
|
|
31
|
+
delta_encoding_order : init or None
|
|
32
|
+
Either a delta encoding level from 0-7 or None. If set to None, pcodec
|
|
33
|
+
will try to infer the optimal delta encoding order.
|
|
34
|
+
mode_spec : {'auto', 'classic'}
|
|
35
|
+
Configures whether Pcodec should try to infer the best "mode" or
|
|
36
|
+
structure of the data (e.g. approximate multiples of 0.1) to improve
|
|
37
|
+
compression ratio, or skip this step and just use the numbers as-is
|
|
38
|
+
(Classic mode).
|
|
39
|
+
equal_pages_up_to : int
|
|
40
|
+
Divide the chunk into equal pages of up to this many numbers.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
codec_id = "pcodec"
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
level: int = 8,
|
|
48
|
+
delta_encoding_order: Optional[int] = None,
|
|
49
|
+
equal_pages_up_to: int = 262144,
|
|
50
|
+
# TODO one day, add support for the Try* mode specs
|
|
51
|
+
mode_spec: Literal['auto', 'classic'] = 'auto',
|
|
52
|
+
):
|
|
53
|
+
if standalone is None: # pragma: no cover
|
|
54
|
+
raise ImportError("pcodec must be installed to use the PCodec codec.")
|
|
55
|
+
|
|
56
|
+
# note that we use `level` instead of `compression_level` to
|
|
57
|
+
# match other codecs
|
|
58
|
+
self.level = level
|
|
59
|
+
self.delta_encoding_order = delta_encoding_order
|
|
60
|
+
self.equal_pages_up_to = equal_pages_up_to
|
|
61
|
+
self.mode_spec = mode_spec
|
|
62
|
+
|
|
63
|
+
def encode(self, buf):
|
|
64
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
65
|
+
|
|
66
|
+
match self.mode_spec:
|
|
67
|
+
case 'auto':
|
|
68
|
+
mode_spec = ModeSpec.auto()
|
|
69
|
+
case 'classic':
|
|
70
|
+
mode_spec = ModeSpec.classic()
|
|
71
|
+
case _:
|
|
72
|
+
raise ValueError(f"unknown value for mode_spec: {self.mode_spec}")
|
|
73
|
+
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
|
|
74
|
+
|
|
75
|
+
config = ChunkConfig(
|
|
76
|
+
compression_level=self.level,
|
|
77
|
+
delta_encoding_order=self.delta_encoding_order,
|
|
78
|
+
mode_spec=mode_spec,
|
|
79
|
+
paging_spec=paging_spec,
|
|
80
|
+
)
|
|
81
|
+
return standalone.simple_compress(buf, config)
|
|
82
|
+
|
|
83
|
+
def decode(self, buf, out=None):
|
|
84
|
+
if out is not None:
|
|
85
|
+
out = ensure_contiguous_ndarray(out)
|
|
86
|
+
standalone.simple_decompress_into(buf, out)
|
|
87
|
+
return out
|
|
88
|
+
else:
|
|
89
|
+
return standalone.simple_decompress(buf)
|
numcodecs/pickles.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from .abc import Codec
|
|
6
|
+
from .compat import ensure_contiguous_ndarray
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Pickle(Codec):
|
|
10
|
+
"""Codec to encode data as as pickled bytes. Useful for encoding an array of Python string
|
|
11
|
+
objects.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
protocol : int, defaults to pickle.HIGHEST_PROTOCOL
|
|
16
|
+
The protocol used to pickle data.
|
|
17
|
+
|
|
18
|
+
Examples
|
|
19
|
+
--------
|
|
20
|
+
>>> import numcodecs as codecs
|
|
21
|
+
>>> import numpy as np
|
|
22
|
+
>>> x = np.array(['foo', 'bar', 'baz'], dtype='object')
|
|
23
|
+
>>> f = codecs.Pickle()
|
|
24
|
+
>>> f.decode(f.encode(x))
|
|
25
|
+
array(['foo', 'bar', 'baz'], dtype=object)
|
|
26
|
+
|
|
27
|
+
See Also
|
|
28
|
+
--------
|
|
29
|
+
numcodecs.msgpacks.MsgPack
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
codec_id = 'pickle'
|
|
34
|
+
|
|
35
|
+
def __init__(self, protocol=pickle.HIGHEST_PROTOCOL):
|
|
36
|
+
self.protocol = protocol
|
|
37
|
+
|
|
38
|
+
def encode(self, buf):
|
|
39
|
+
return pickle.dumps(buf, protocol=self.protocol)
|
|
40
|
+
|
|
41
|
+
def decode(self, buf, out=None):
|
|
42
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
43
|
+
dec = pickle.loads(buf)
|
|
44
|
+
|
|
45
|
+
if out is not None:
|
|
46
|
+
np.copyto(out, dec)
|
|
47
|
+
return out
|
|
48
|
+
else:
|
|
49
|
+
return dec
|
|
50
|
+
|
|
51
|
+
def get_config(self):
|
|
52
|
+
return dict(id=self.codec_id, protocol=self.protocol)
|
|
53
|
+
|
|
54
|
+
def __repr__(self):
|
|
55
|
+
return f'Pickle(protocol={self.protocol})'
|
numcodecs/quantize.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import math
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from .abc import Codec
|
|
6
|
+
from .compat import ensure_ndarray, ndarray_copy
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Quantize(Codec):
|
|
10
|
+
"""Lossy filter to reduce the precision of floating point data.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
digits : int
|
|
15
|
+
Desired precision (number of decimal digits).
|
|
16
|
+
dtype : dtype
|
|
17
|
+
Data type to use for decoded data.
|
|
18
|
+
astype : dtype, optional
|
|
19
|
+
Data type to use for encoded data.
|
|
20
|
+
|
|
21
|
+
Examples
|
|
22
|
+
--------
|
|
23
|
+
>>> import numcodecs
|
|
24
|
+
>>> import numpy as np
|
|
25
|
+
>>> x = np.linspace(0, 1, 10, dtype='f8')
|
|
26
|
+
>>> x
|
|
27
|
+
array([0. , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
|
|
28
|
+
0.55555556, 0.66666667, 0.77777778, 0.88888889, 1. ])
|
|
29
|
+
>>> codec = numcodecs.Quantize(digits=1, dtype='f8')
|
|
30
|
+
>>> codec.encode(x)
|
|
31
|
+
array([0. , 0.125 , 0.25 , 0.3125, 0.4375, 0.5625, 0.6875,
|
|
32
|
+
0.75 , 0.875 , 1. ])
|
|
33
|
+
>>> codec = numcodecs.Quantize(digits=2, dtype='f8')
|
|
34
|
+
>>> codec.encode(x)
|
|
35
|
+
array([0. , 0.109375 , 0.21875 , 0.3359375, 0.4453125,
|
|
36
|
+
0.5546875, 0.6640625, 0.78125 , 0.890625 , 1. ])
|
|
37
|
+
>>> codec = numcodecs.Quantize(digits=3, dtype='f8')
|
|
38
|
+
>>> codec.encode(x)
|
|
39
|
+
array([0. , 0.11132812, 0.22265625, 0.33300781, 0.44433594,
|
|
40
|
+
0.55566406, 0.66699219, 0.77734375, 0.88867188, 1. ])
|
|
41
|
+
|
|
42
|
+
See Also
|
|
43
|
+
--------
|
|
44
|
+
numcodecs.fixedscaleoffset.FixedScaleOffset
|
|
45
|
+
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
codec_id = 'quantize'
|
|
49
|
+
|
|
50
|
+
def __init__(self, digits, dtype, astype=None):
|
|
51
|
+
self.digits = digits
|
|
52
|
+
self.dtype = np.dtype(dtype)
|
|
53
|
+
if astype is None:
|
|
54
|
+
self.astype = self.dtype
|
|
55
|
+
else:
|
|
56
|
+
self.astype = np.dtype(astype)
|
|
57
|
+
if self.dtype.kind != 'f' or self.astype.kind != 'f':
|
|
58
|
+
raise ValueError('only floating point data types are supported')
|
|
59
|
+
|
|
60
|
+
def encode(self, buf):
|
|
61
|
+
# normalise input
|
|
62
|
+
arr = ensure_ndarray(buf).view(self.dtype)
|
|
63
|
+
|
|
64
|
+
# apply scaling
|
|
65
|
+
precision = 10.0**-self.digits
|
|
66
|
+
exp = math.log10(precision)
|
|
67
|
+
if exp < 0:
|
|
68
|
+
exp = int(math.floor(exp))
|
|
69
|
+
else:
|
|
70
|
+
exp = int(math.ceil(exp))
|
|
71
|
+
bits = math.ceil(math.log2(10.0**-exp))
|
|
72
|
+
scale = 2.0**bits
|
|
73
|
+
enc = np.around(scale * arr) / scale
|
|
74
|
+
|
|
75
|
+
# cast dtype
|
|
76
|
+
enc = enc.astype(self.astype, copy=False)
|
|
77
|
+
|
|
78
|
+
return enc
|
|
79
|
+
|
|
80
|
+
def decode(self, buf, out=None):
|
|
81
|
+
# filter is lossy, decoding is no-op
|
|
82
|
+
dec = ensure_ndarray(buf).view(self.astype)
|
|
83
|
+
dec = dec.astype(self.dtype, copy=False)
|
|
84
|
+
return ndarray_copy(dec, out)
|
|
85
|
+
|
|
86
|
+
def get_config(self):
|
|
87
|
+
# override to handle encoding dtypes
|
|
88
|
+
return dict(
|
|
89
|
+
id=self.codec_id,
|
|
90
|
+
digits=self.digits,
|
|
91
|
+
dtype=self.dtype.str,
|
|
92
|
+
astype=self.astype.str,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def __repr__(self):
|
|
96
|
+
r = f'{type(self).__name__}(digits={self.digits}, dtype={self.dtype.str!r}'
|
|
97
|
+
if self.astype != self.dtype:
|
|
98
|
+
r += f', astype={self.astype.str!r}'
|
|
99
|
+
r += ')'
|
|
100
|
+
return r
|
numcodecs/registry.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""The registry module provides some simple convenience functions to enable
|
|
2
|
+
applications to dynamically register and look-up codec classes."""
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
from importlib.metadata import entry_points
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger("numcodecs")
|
|
8
|
+
codec_registry = {}
|
|
9
|
+
entries = {}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def run_entrypoints():
|
|
13
|
+
entries.clear()
|
|
14
|
+
eps = entry_points()
|
|
15
|
+
entries.update({e.name: e for e in eps.select(group="numcodecs.codecs")})
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
run_entrypoints()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_codec(config):
|
|
22
|
+
"""Obtain a codec for the given configuration.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
config : dict-like
|
|
27
|
+
Configuration object.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
codec : Codec
|
|
32
|
+
|
|
33
|
+
Examples
|
|
34
|
+
--------
|
|
35
|
+
|
|
36
|
+
>>> import numcodecs as codecs
|
|
37
|
+
>>> codec = codecs.get_codec(dict(id='zlib', level=1))
|
|
38
|
+
>>> codec
|
|
39
|
+
Zlib(level=1)
|
|
40
|
+
|
|
41
|
+
"""
|
|
42
|
+
config = dict(config)
|
|
43
|
+
codec_id = config.pop('id', None)
|
|
44
|
+
cls = codec_registry.get(codec_id)
|
|
45
|
+
if cls is None:
|
|
46
|
+
if codec_id in entries:
|
|
47
|
+
logger.debug("Auto loading codec '%s' from entrypoint", codec_id)
|
|
48
|
+
cls = entries[codec_id].load()
|
|
49
|
+
register_codec(cls, codec_id=codec_id)
|
|
50
|
+
if cls:
|
|
51
|
+
return cls.from_config(config)
|
|
52
|
+
raise ValueError(f'codec not available: {codec_id!r}')
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def register_codec(cls, codec_id=None):
|
|
56
|
+
"""Register a codec class.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
cls : Codec class
|
|
61
|
+
|
|
62
|
+
Notes
|
|
63
|
+
-----
|
|
64
|
+
This function maintains a mapping from codec identifiers to codec
|
|
65
|
+
classes. When a codec class is registered, it will replace any class
|
|
66
|
+
previously registered under the same codec identifier, if present.
|
|
67
|
+
|
|
68
|
+
"""
|
|
69
|
+
if codec_id is None:
|
|
70
|
+
codec_id = cls.codec_id
|
|
71
|
+
logger.debug("Registering codec '%s'", codec_id)
|
|
72
|
+
codec_registry[codec_id] = cls
|
numcodecs/shuffle.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from ._shuffle import _doShuffle, _doUnshuffle
|
|
4
|
+
from .abc import Codec
|
|
5
|
+
from .compat import ensure_contiguous_ndarray
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Shuffle(Codec):
|
|
9
|
+
"""Codec providing shuffle
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
elementsize : int
|
|
14
|
+
Size in bytes of the array elements. Default = 4
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
codec_id = 'shuffle'
|
|
19
|
+
|
|
20
|
+
def __init__(self, elementsize=4):
|
|
21
|
+
self.elementsize = elementsize
|
|
22
|
+
|
|
23
|
+
def _prepare_arrays(self, buf, out):
|
|
24
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
25
|
+
|
|
26
|
+
if out is None:
|
|
27
|
+
out = np.zeros(buf.nbytes, dtype='uint8')
|
|
28
|
+
else:
|
|
29
|
+
out = ensure_contiguous_ndarray(out)
|
|
30
|
+
|
|
31
|
+
if self.elementsize <= 1:
|
|
32
|
+
out.view(buf.dtype)[: len(buf)] = buf[:] # no shuffling needed
|
|
33
|
+
return buf, out
|
|
34
|
+
|
|
35
|
+
if buf.nbytes % self.elementsize != 0:
|
|
36
|
+
raise ValueError("Shuffle buffer is not an integer multiple of elementsize")
|
|
37
|
+
|
|
38
|
+
return buf, out
|
|
39
|
+
|
|
40
|
+
def encode(self, buf, out=None):
|
|
41
|
+
buf, out = self._prepare_arrays(buf, out)
|
|
42
|
+
|
|
43
|
+
if self.elementsize <= 1:
|
|
44
|
+
return out # no shuffling needed
|
|
45
|
+
|
|
46
|
+
_doShuffle(buf.view("uint8"), out.view("uint8"), self.elementsize)
|
|
47
|
+
|
|
48
|
+
return out
|
|
49
|
+
|
|
50
|
+
def decode(self, buf, out=None):
|
|
51
|
+
buf, out = self._prepare_arrays(buf, out)
|
|
52
|
+
|
|
53
|
+
if self.elementsize <= 1:
|
|
54
|
+
return out # no shuffling needed
|
|
55
|
+
|
|
56
|
+
_doUnshuffle(buf.view("uint8"), out.view("uint8"), self.elementsize)
|
|
57
|
+
|
|
58
|
+
return out
|
|
59
|
+
|
|
60
|
+
def __repr__(self):
|
|
61
|
+
return f'{type(self).__name__}(elementsize={self.elementsize})'
|