numcodecs 0.16.0__cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numcodecs might be problematic. Click here for more details.
- numcodecs/__init__.py +146 -0
- numcodecs/_shuffle.cpython-312-aarch64-linux-gnu.so +0 -0
- numcodecs/abc.py +127 -0
- numcodecs/astype.py +72 -0
- numcodecs/base64.py +26 -0
- numcodecs/bitround.py +80 -0
- numcodecs/blosc.cpython-312-aarch64-linux-gnu.so +0 -0
- numcodecs/bz2.py +45 -0
- numcodecs/categorize.py +98 -0
- numcodecs/checksum32.py +183 -0
- numcodecs/compat.py +206 -0
- numcodecs/compat_ext.cpython-312-aarch64-linux-gnu.so +0 -0
- numcodecs/delta.py +94 -0
- numcodecs/errors.py +26 -0
- numcodecs/fixedscaleoffset.py +130 -0
- numcodecs/fletcher32.cpython-312-aarch64-linux-gnu.so +0 -0
- numcodecs/gzip.py +50 -0
- numcodecs/jenkins.cpython-312-aarch64-linux-gnu.so +0 -0
- numcodecs/json.py +107 -0
- numcodecs/lz4.cpython-312-aarch64-linux-gnu.so +0 -0
- numcodecs/lzma.py +72 -0
- numcodecs/msgpacks.py +86 -0
- numcodecs/ndarray_like.py +65 -0
- numcodecs/packbits.py +82 -0
- numcodecs/pcodec.py +118 -0
- numcodecs/pickles.py +55 -0
- numcodecs/quantize.py +98 -0
- numcodecs/registry.py +74 -0
- numcodecs/shuffle.py +61 -0
- numcodecs/tests/__init__.py +3 -0
- numcodecs/tests/common.py +285 -0
- numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
- numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
- numcodecs/tests/test_astype.py +74 -0
- numcodecs/tests/test_base64.py +81 -0
- numcodecs/tests/test_bitround.py +81 -0
- numcodecs/tests/test_blosc.py +284 -0
- numcodecs/tests/test_bz2.py +66 -0
- numcodecs/tests/test_categorize.py +87 -0
- numcodecs/tests/test_checksum32.py +154 -0
- numcodecs/tests/test_compat.py +111 -0
- numcodecs/tests/test_delta.py +61 -0
- numcodecs/tests/test_entrypoints.py +24 -0
- numcodecs/tests/test_entrypoints_backport.py +36 -0
- numcodecs/tests/test_fixedscaleoffset.py +77 -0
- numcodecs/tests/test_fletcher32.py +56 -0
- numcodecs/tests/test_gzip.py +110 -0
- numcodecs/tests/test_jenkins.py +150 -0
- numcodecs/tests/test_json.py +85 -0
- numcodecs/tests/test_lz4.py +83 -0
- numcodecs/tests/test_lzma.py +94 -0
- numcodecs/tests/test_msgpacks.py +126 -0
- numcodecs/tests/test_ndarray_like.py +48 -0
- numcodecs/tests/test_packbits.py +39 -0
- numcodecs/tests/test_pcodec.py +90 -0
- numcodecs/tests/test_pickles.py +61 -0
- numcodecs/tests/test_quantize.py +76 -0
- numcodecs/tests/test_registry.py +43 -0
- numcodecs/tests/test_shuffle.py +166 -0
- numcodecs/tests/test_vlen_array.py +97 -0
- numcodecs/tests/test_vlen_bytes.py +97 -0
- numcodecs/tests/test_vlen_utf8.py +91 -0
- numcodecs/tests/test_zarr3.py +279 -0
- numcodecs/tests/test_zarr3_import.py +13 -0
- numcodecs/tests/test_zfpy.py +104 -0
- numcodecs/tests/test_zlib.py +94 -0
- numcodecs/tests/test_zstd.py +92 -0
- numcodecs/version.py +21 -0
- numcodecs/vlen.cpython-312-aarch64-linux-gnu.so +0 -0
- numcodecs/zarr3.py +401 -0
- numcodecs/zfpy.py +113 -0
- numcodecs/zlib.py +42 -0
- numcodecs/zstd.cpython-312-aarch64-linux-gnu.so +0 -0
- numcodecs-0.16.0.dist-info/METADATA +66 -0
- numcodecs-0.16.0.dist-info/RECORD +79 -0
- numcodecs-0.16.0.dist-info/WHEEL +6 -0
- numcodecs-0.16.0.dist-info/entry_points.txt +22 -0
- numcodecs-0.16.0.dist-info/licenses/LICENSE.txt +21 -0
- numcodecs-0.16.0.dist-info/top_level.txt +1 -0
numcodecs/json.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import json as _json
|
|
2
|
+
import textwrap
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from .abc import Codec
|
|
7
|
+
from .compat import ensure_text
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class JSON(Codec):
|
|
11
|
+
"""Codec to encode data as JSON. Useful for encoding an array of Python objects.
|
|
12
|
+
|
|
13
|
+
.. versionchanged:: 0.6
|
|
14
|
+
The encoding format has been changed to include the array shape in the encoded
|
|
15
|
+
data, which ensures that all object arrays can be correctly encoded and decoded.
|
|
16
|
+
|
|
17
|
+
Examples
|
|
18
|
+
--------
|
|
19
|
+
>>> import numcodecs
|
|
20
|
+
>>> import numpy as np
|
|
21
|
+
>>> x = np.array(['foo', 'bar', 'baz'], dtype='object')
|
|
22
|
+
>>> codec = numcodecs.JSON()
|
|
23
|
+
>>> codec.decode(codec.encode(x))
|
|
24
|
+
array(['foo', 'bar', 'baz'], dtype=object)
|
|
25
|
+
|
|
26
|
+
See Also
|
|
27
|
+
--------
|
|
28
|
+
numcodecs.pickles.Pickle, numcodecs.msgpacks.MsgPack
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
codec_id = 'json2'
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
encoding='utf-8',
|
|
37
|
+
skipkeys=False,
|
|
38
|
+
ensure_ascii=True,
|
|
39
|
+
check_circular=True,
|
|
40
|
+
allow_nan=True,
|
|
41
|
+
sort_keys=True,
|
|
42
|
+
indent=None,
|
|
43
|
+
separators=None,
|
|
44
|
+
strict=True,
|
|
45
|
+
):
|
|
46
|
+
self._text_encoding = encoding
|
|
47
|
+
if separators is None:
|
|
48
|
+
# ensure separators are explicitly specified, and consistent behaviour across
|
|
49
|
+
# Python versions, and most compact representation if indent is None
|
|
50
|
+
if indent is None:
|
|
51
|
+
separators = ',', ':'
|
|
52
|
+
else:
|
|
53
|
+
separators = ', ', ': '
|
|
54
|
+
separators = tuple(separators)
|
|
55
|
+
self._encoder_config = {
|
|
56
|
+
'skipkeys': skipkeys,
|
|
57
|
+
'ensure_ascii': ensure_ascii,
|
|
58
|
+
'check_circular': check_circular,
|
|
59
|
+
'allow_nan': allow_nan,
|
|
60
|
+
'indent': indent,
|
|
61
|
+
'separators': separators,
|
|
62
|
+
'sort_keys': sort_keys,
|
|
63
|
+
}
|
|
64
|
+
self._encoder = _json.JSONEncoder(**self._encoder_config)
|
|
65
|
+
self._decoder_config = {'strict': strict}
|
|
66
|
+
self._decoder = _json.JSONDecoder(**self._decoder_config)
|
|
67
|
+
|
|
68
|
+
def encode(self, buf):
|
|
69
|
+
try:
|
|
70
|
+
buf = np.asarray(buf)
|
|
71
|
+
except ValueError: # pragma: no cover
|
|
72
|
+
buf = np.asarray(buf, dtype=object)
|
|
73
|
+
items = np.atleast_1d(buf).tolist()
|
|
74
|
+
items.append(buf.dtype.str)
|
|
75
|
+
items.append(buf.shape)
|
|
76
|
+
return self._encoder.encode(items).encode(self._text_encoding)
|
|
77
|
+
|
|
78
|
+
def decode(self, buf, out=None):
|
|
79
|
+
items = self._decoder.decode(ensure_text(buf, self._text_encoding))
|
|
80
|
+
dec = np.empty(items[-1], dtype=items[-2])
|
|
81
|
+
if not items[-1]:
|
|
82
|
+
dec[...] = items[0]
|
|
83
|
+
else:
|
|
84
|
+
dec[:] = items[:-2]
|
|
85
|
+
if out is not None:
|
|
86
|
+
np.copyto(out, dec)
|
|
87
|
+
return out
|
|
88
|
+
else:
|
|
89
|
+
return dec
|
|
90
|
+
|
|
91
|
+
def get_config(self):
|
|
92
|
+
config = {'id': self.codec_id, 'encoding': self._text_encoding}
|
|
93
|
+
config.update(self._encoder_config)
|
|
94
|
+
config.update(self._decoder_config)
|
|
95
|
+
return config
|
|
96
|
+
|
|
97
|
+
def __repr__(self):
|
|
98
|
+
params = [f'encoding={self._text_encoding!r}']
|
|
99
|
+
for k, v in sorted(self._encoder_config.items()):
|
|
100
|
+
params.append(f'{k}={v!r}')
|
|
101
|
+
for k, v in sorted(self._decoder_config.items()):
|
|
102
|
+
params.append(f'{k}={v!r}')
|
|
103
|
+
classname = type(self).__name__
|
|
104
|
+
params = ', '.join(params)
|
|
105
|
+
return textwrap.fill(
|
|
106
|
+
f'{classname}({params})', width=80, break_long_words=False, subsequent_indent=' '
|
|
107
|
+
)
|
|
Binary file
|
numcodecs/lzma.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from types import ModuleType
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
_lzma: Optional[ModuleType] = None
|
|
5
|
+
try:
|
|
6
|
+
import lzma as _lzma
|
|
7
|
+
except ImportError: # pragma: no cover
|
|
8
|
+
try: # noqa: SIM105
|
|
9
|
+
from backports import lzma as _lzma # type: ignore[no-redef]
|
|
10
|
+
except ImportError:
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
if _lzma:
|
|
15
|
+
from .abc import Codec
|
|
16
|
+
from .compat import ensure_contiguous_ndarray, ndarray_copy
|
|
17
|
+
|
|
18
|
+
# noinspection PyShadowingBuiltins
|
|
19
|
+
class LZMA(Codec):
|
|
20
|
+
"""Codec providing compression using lzma via the Python standard
|
|
21
|
+
library.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
format : integer, optional
|
|
26
|
+
One of the lzma format codes, e.g., ``lzma.FORMAT_XZ``.
|
|
27
|
+
check : integer, optional
|
|
28
|
+
One of the lzma check codes, e.g., ``lzma.CHECK_NONE``.
|
|
29
|
+
preset : integer, optional
|
|
30
|
+
An integer between 0 and 9 inclusive, specifying the compression
|
|
31
|
+
level.
|
|
32
|
+
filters : list, optional
|
|
33
|
+
A list of dictionaries specifying compression filters. If
|
|
34
|
+
filters are provided, 'preset' must be None.
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
codec_id = 'lzma'
|
|
39
|
+
|
|
40
|
+
def __init__(self, format=1, check=-1, preset=None, filters=None):
|
|
41
|
+
self.format = format
|
|
42
|
+
self.check = check
|
|
43
|
+
self.preset = preset
|
|
44
|
+
self.filters = filters
|
|
45
|
+
|
|
46
|
+
def encode(self, buf):
|
|
47
|
+
# normalise inputs
|
|
48
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
49
|
+
|
|
50
|
+
# do compression
|
|
51
|
+
return _lzma.compress(
|
|
52
|
+
buf,
|
|
53
|
+
format=self.format,
|
|
54
|
+
check=self.check,
|
|
55
|
+
preset=self.preset,
|
|
56
|
+
filters=self.filters,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def decode(self, buf, out=None):
|
|
60
|
+
# normalise inputs
|
|
61
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
62
|
+
if out is not None:
|
|
63
|
+
out = ensure_contiguous_ndarray(out)
|
|
64
|
+
|
|
65
|
+
# do decompression
|
|
66
|
+
dec = _lzma.decompress(buf, format=self.format, filters=self.filters)
|
|
67
|
+
|
|
68
|
+
# handle destination
|
|
69
|
+
return ndarray_copy(dec, out)
|
|
70
|
+
|
|
71
|
+
def __repr__(self):
|
|
72
|
+
return f'{type(self).__name__}(format={self.format!r}, check={self.check!r}, preset={self.preset!r}, filters={self.filters!r})'
|
numcodecs/msgpacks.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import msgpack
|
|
2
|
+
import numpy as np
|
|
3
|
+
|
|
4
|
+
from .abc import Codec
|
|
5
|
+
from .compat import ensure_contiguous_ndarray
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MsgPack(Codec):
|
|
9
|
+
"""Codec to encode data as msgpacked bytes. Useful for encoding an array of Python
|
|
10
|
+
objects.
|
|
11
|
+
|
|
12
|
+
.. versionchanged:: 0.6
|
|
13
|
+
The encoding format has been changed to include the array shape in the encoded
|
|
14
|
+
data, which ensures that all object arrays can be correctly encoded and decoded.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
use_single_float : bool, optional
|
|
19
|
+
Use single precision float type for float.
|
|
20
|
+
use_bin_type : bool, optional
|
|
21
|
+
Use bin type introduced in msgpack spec 2.0 for bytes. It also enables str8 type
|
|
22
|
+
for unicode.
|
|
23
|
+
raw : bool, optional
|
|
24
|
+
If true, unpack msgpack raw to Python bytes. Otherwise, unpack to Python str
|
|
25
|
+
by decoding with UTF-8 encoding.
|
|
26
|
+
|
|
27
|
+
Examples
|
|
28
|
+
--------
|
|
29
|
+
>>> import numcodecs
|
|
30
|
+
>>> import numpy as np
|
|
31
|
+
>>> x = np.array(['foo', 'bar', 'baz'], dtype='object')
|
|
32
|
+
>>> codec = numcodecs.MsgPack()
|
|
33
|
+
>>> codec.decode(codec.encode(x))
|
|
34
|
+
array(['foo', 'bar', 'baz'], dtype=object)
|
|
35
|
+
|
|
36
|
+
See Also
|
|
37
|
+
--------
|
|
38
|
+
numcodecs.pickles.Pickle, numcodecs.json.JSON, numcodecs.vlen.VLenUTF8
|
|
39
|
+
|
|
40
|
+
Notes
|
|
41
|
+
-----
|
|
42
|
+
Requires `msgpack <https://pypi.org/project/msgpack/>`_ to be installed.
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
codec_id = 'msgpack2'
|
|
47
|
+
|
|
48
|
+
def __init__(self, use_single_float=False, use_bin_type=True, raw=False):
|
|
49
|
+
self.use_single_float = use_single_float
|
|
50
|
+
self.use_bin_type = use_bin_type
|
|
51
|
+
self.raw = raw
|
|
52
|
+
|
|
53
|
+
def encode(self, buf):
|
|
54
|
+
try:
|
|
55
|
+
buf = np.asarray(buf)
|
|
56
|
+
except ValueError:
|
|
57
|
+
buf = np.asarray(buf, dtype=object)
|
|
58
|
+
items = buf.tolist()
|
|
59
|
+
items.extend((buf.dtype.str, buf.shape))
|
|
60
|
+
return msgpack.packb(
|
|
61
|
+
items,
|
|
62
|
+
use_bin_type=self.use_bin_type,
|
|
63
|
+
use_single_float=self.use_single_float,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def decode(self, buf, out=None):
|
|
67
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
68
|
+
items = msgpack.unpackb(buf, raw=self.raw)
|
|
69
|
+
dec = np.empty(items[-1], dtype=items[-2])
|
|
70
|
+
dec[:] = items[:-2]
|
|
71
|
+
if out is not None:
|
|
72
|
+
np.copyto(out, dec)
|
|
73
|
+
return out
|
|
74
|
+
else:
|
|
75
|
+
return dec
|
|
76
|
+
|
|
77
|
+
def get_config(self):
|
|
78
|
+
return {
|
|
79
|
+
'id': self.codec_id,
|
|
80
|
+
'raw': self.raw,
|
|
81
|
+
'use_single_float': self.use_single_float,
|
|
82
|
+
'use_bin_type': self.use_bin_type,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
def __repr__(self):
|
|
86
|
+
return f'MsgPack(raw={self.raw!r}, use_bin_type={self.use_bin_type!r}, use_single_float={self.use_single_float!r})'
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import Any, ClassVar, Protocol, runtime_checkable
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class _CachedProtocolMeta(Protocol.__class__): # type: ignore[name-defined]
|
|
5
|
+
"""Custom implementation of @runtime_checkable
|
|
6
|
+
|
|
7
|
+
The native implementation of @runtime_checkable is slow,
|
|
8
|
+
see <https://github.com/zarr-developers/numcodecs/issues/379>.
|
|
9
|
+
|
|
10
|
+
This metaclass keeps an unbounded cache of the result of
|
|
11
|
+
isinstance checks using the object's class as the cache key.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
_instancecheck_cache: ClassVar[dict[tuple[type, type], bool]] = {}
|
|
15
|
+
|
|
16
|
+
def __instancecheck__(self, instance):
|
|
17
|
+
key = (self, instance.__class__)
|
|
18
|
+
ret = self._instancecheck_cache.get(key)
|
|
19
|
+
if ret is None:
|
|
20
|
+
ret = super().__instancecheck__(instance)
|
|
21
|
+
self._instancecheck_cache[key] = ret
|
|
22
|
+
return ret
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@runtime_checkable
|
|
26
|
+
class DType(Protocol, metaclass=_CachedProtocolMeta):
|
|
27
|
+
itemsize: int
|
|
28
|
+
name: str
|
|
29
|
+
kind: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@runtime_checkable
|
|
33
|
+
class FlagsObj(Protocol, metaclass=_CachedProtocolMeta):
|
|
34
|
+
c_contiguous: bool
|
|
35
|
+
f_contiguous: bool
|
|
36
|
+
owndata: bool
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@runtime_checkable
|
|
40
|
+
class NDArrayLike(Protocol, metaclass=_CachedProtocolMeta):
|
|
41
|
+
dtype: DType
|
|
42
|
+
shape: tuple[int, ...]
|
|
43
|
+
strides: tuple[int, ...]
|
|
44
|
+
ndim: int
|
|
45
|
+
size: int
|
|
46
|
+
itemsize: int
|
|
47
|
+
nbytes: int
|
|
48
|
+
flags: FlagsObj
|
|
49
|
+
|
|
50
|
+
def __len__(self) -> int: ... # pragma: no cover
|
|
51
|
+
|
|
52
|
+
def __getitem__(self, key) -> Any: ... # pragma: no cover
|
|
53
|
+
|
|
54
|
+
def __setitem__(self, key, value): ... # pragma: no cover
|
|
55
|
+
|
|
56
|
+
def tobytes(self, order: str | None = ...) -> bytes: ... # pragma: no cover
|
|
57
|
+
|
|
58
|
+
def reshape(self, *shape: int, order: str = ...) -> "NDArrayLike": ... # pragma: no cover
|
|
59
|
+
|
|
60
|
+
def view(self, dtype: DType = ...) -> "NDArrayLike": ... # pragma: no cover
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def is_ndarray_like(obj: object) -> bool:
|
|
64
|
+
"""Return True when `obj` is ndarray-like"""
|
|
65
|
+
return isinstance(obj, NDArrayLike)
|
numcodecs/packbits.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .abc import Codec
|
|
4
|
+
from .compat import ensure_ndarray, ndarray_copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class PackBits(Codec):
|
|
8
|
+
"""Codec to pack elements of a boolean array into bits in a uint8 array.
|
|
9
|
+
|
|
10
|
+
Examples
|
|
11
|
+
--------
|
|
12
|
+
>>> import numcodecs
|
|
13
|
+
>>> import numpy as np
|
|
14
|
+
>>> codec = numcodecs.PackBits()
|
|
15
|
+
>>> x = np.array([True, False, False, True], dtype=bool)
|
|
16
|
+
>>> y = codec.encode(x)
|
|
17
|
+
>>> y
|
|
18
|
+
array([ 4, 144], dtype=uint8)
|
|
19
|
+
>>> z = codec.decode(y)
|
|
20
|
+
>>> z
|
|
21
|
+
array([ True, False, False, True])
|
|
22
|
+
|
|
23
|
+
Notes
|
|
24
|
+
-----
|
|
25
|
+
The first element of the encoded array stores the number of bits that
|
|
26
|
+
were padded to complete the final byte.
|
|
27
|
+
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
codec_id = 'packbits'
|
|
31
|
+
|
|
32
|
+
def encode(self, buf):
|
|
33
|
+
# normalise input
|
|
34
|
+
arr = ensure_ndarray(buf).view(bool)
|
|
35
|
+
|
|
36
|
+
# flatten to simplify implementation
|
|
37
|
+
arr = arr.reshape(-1, order='A')
|
|
38
|
+
|
|
39
|
+
# determine size of packed data
|
|
40
|
+
n = arr.size
|
|
41
|
+
n_bytes_packed = n // 8
|
|
42
|
+
n_bits_leftover = n % 8
|
|
43
|
+
if n_bits_leftover > 0:
|
|
44
|
+
n_bytes_packed += 1
|
|
45
|
+
|
|
46
|
+
# setup output
|
|
47
|
+
enc = np.empty(n_bytes_packed + 1, dtype='u1')
|
|
48
|
+
|
|
49
|
+
# store how many bits were padded
|
|
50
|
+
if n_bits_leftover:
|
|
51
|
+
n_bits_padded = 8 - n_bits_leftover
|
|
52
|
+
else:
|
|
53
|
+
n_bits_padded = 0
|
|
54
|
+
enc[0] = n_bits_padded
|
|
55
|
+
|
|
56
|
+
# apply encoding
|
|
57
|
+
enc[1:] = np.packbits(arr)
|
|
58
|
+
|
|
59
|
+
return enc
|
|
60
|
+
|
|
61
|
+
def decode(self, buf, out=None):
|
|
62
|
+
# normalise input
|
|
63
|
+
enc = ensure_ndarray(buf).view('u1')
|
|
64
|
+
|
|
65
|
+
# flatten to simplify implementation
|
|
66
|
+
enc = enc.reshape(-1, order='A')
|
|
67
|
+
|
|
68
|
+
# find out how many bits were padded
|
|
69
|
+
n_bits_padded = int(enc[0])
|
|
70
|
+
|
|
71
|
+
# apply decoding
|
|
72
|
+
dec = np.unpackbits(enc[1:])
|
|
73
|
+
|
|
74
|
+
# remove padded bits
|
|
75
|
+
if n_bits_padded:
|
|
76
|
+
dec = dec[:-n_bits_padded]
|
|
77
|
+
|
|
78
|
+
# view as boolean array
|
|
79
|
+
dec = dec.view(bool)
|
|
80
|
+
|
|
81
|
+
# handle destination
|
|
82
|
+
return ndarray_copy(dec, out)
|
numcodecs/pcodec.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from numcodecs.abc import Codec
|
|
4
|
+
from numcodecs.compat import ensure_contiguous_ndarray
|
|
5
|
+
from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
|
|
6
|
+
|
|
7
|
+
DEFAULT_MAX_PAGE_N = 262144
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PCodec(Codec):
|
|
11
|
+
"""
|
|
12
|
+
PCodec (or pco, pronounced "pico") losslessly compresses and decompresses
|
|
13
|
+
numerical sequences with high compression ratio and fast speed.
|
|
14
|
+
|
|
15
|
+
See `PCodec Repo <https://github.com/mwlon/pcodec>`_ for more information.
|
|
16
|
+
|
|
17
|
+
PCodec supports only the following numerical dtypes: uint16, uint32, uint64,
|
|
18
|
+
int16, int32, int64, float16, float32, and float64.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
level : int
|
|
23
|
+
A compression level from 0-12, where 12 take the longest and compresses
|
|
24
|
+
the most.
|
|
25
|
+
mode_spec : {"auto", "classic"}
|
|
26
|
+
Configures whether Pcodec should try to infer the best "mode" or
|
|
27
|
+
structure of the data (e.g. approximate multiples of 0.1) to improve
|
|
28
|
+
compression ratio, or skip this step and just use the numbers as-is
|
|
29
|
+
(Classic mode). Note that the "try*" specs are not currently supported.
|
|
30
|
+
delta_spec : {"auto", "none", "try_consecutive", "try_lookback"}
|
|
31
|
+
Configures the delta encoding strategy. By default, uses "auto" which
|
|
32
|
+
will try to infer the best encoding order.
|
|
33
|
+
paging_spec : {"equal_pages_up_to"}
|
|
34
|
+
Configures the paging strategy. Only "equal_pages_up_to" is currently
|
|
35
|
+
supported.
|
|
36
|
+
delta_encoding_order : int or None
|
|
37
|
+
Explicit delta encoding level from 0-7. Only valid if delta_spec is
|
|
38
|
+
"try_consecutive" or "auto" (to support backwards compatibility with
|
|
39
|
+
older versions of this codec).
|
|
40
|
+
equal_pages_up_to : int
|
|
41
|
+
Divide the chunk into equal pages of up to this many numbers.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
codec_id = "pcodec"
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
level: int = 8,
|
|
49
|
+
*,
|
|
50
|
+
mode_spec: Literal["auto", "classic"] = "auto",
|
|
51
|
+
delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] = "auto",
|
|
52
|
+
paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
|
|
53
|
+
delta_encoding_order: int | None = None,
|
|
54
|
+
equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
|
|
55
|
+
):
|
|
56
|
+
# note that we use `level` instead of `compression_level` to
|
|
57
|
+
# match other codecs
|
|
58
|
+
self.level = level
|
|
59
|
+
self.mode_spec = mode_spec
|
|
60
|
+
self.delta_spec = delta_spec
|
|
61
|
+
self.paging_spec = paging_spec
|
|
62
|
+
self.delta_encoding_order = delta_encoding_order
|
|
63
|
+
self.equal_pages_up_to = equal_pages_up_to
|
|
64
|
+
|
|
65
|
+
def _get_chunk_config(self):
|
|
66
|
+
match self.mode_spec:
|
|
67
|
+
case "auto":
|
|
68
|
+
mode_spec = ModeSpec.auto()
|
|
69
|
+
case "classic":
|
|
70
|
+
mode_spec = ModeSpec.classic()
|
|
71
|
+
case _:
|
|
72
|
+
raise ValueError(f"mode_spec {self.mode_spec} is not supported")
|
|
73
|
+
|
|
74
|
+
if self.delta_encoding_order is not None and self.delta_spec == "auto":
|
|
75
|
+
# backwards compat for before delta_spec was introduced
|
|
76
|
+
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
|
|
77
|
+
elif self.delta_encoding_order is not None and self.delta_spec != "try_consecutive":
|
|
78
|
+
raise ValueError(
|
|
79
|
+
"delta_encoding_order can only be set for delta_spec='try_consecutive'"
|
|
80
|
+
)
|
|
81
|
+
else:
|
|
82
|
+
match self.delta_spec:
|
|
83
|
+
case "auto":
|
|
84
|
+
delta_spec = DeltaSpec.auto()
|
|
85
|
+
case "none":
|
|
86
|
+
delta_spec = DeltaSpec.none()
|
|
87
|
+
case "try_consecutive":
|
|
88
|
+
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
|
|
89
|
+
case "try_lookback":
|
|
90
|
+
delta_spec = DeltaSpec.try_lookback()
|
|
91
|
+
case _:
|
|
92
|
+
raise ValueError(f"delta_spec {self.delta_spec} is not supported")
|
|
93
|
+
|
|
94
|
+
match self.paging_spec:
|
|
95
|
+
case "equal_pages_up_to":
|
|
96
|
+
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
|
|
97
|
+
case _:
|
|
98
|
+
raise ValueError(f"paging_spec {self.paging_spec} is not supported")
|
|
99
|
+
|
|
100
|
+
return ChunkConfig(
|
|
101
|
+
compression_level=self.level,
|
|
102
|
+
delta_spec=delta_spec,
|
|
103
|
+
mode_spec=mode_spec,
|
|
104
|
+
paging_spec=paging_spec,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def encode(self, buf):
|
|
108
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
109
|
+
config = self._get_chunk_config()
|
|
110
|
+
return standalone.simple_compress(buf, config)
|
|
111
|
+
|
|
112
|
+
def decode(self, buf, out=None):
|
|
113
|
+
if out is not None:
|
|
114
|
+
out = ensure_contiguous_ndarray(out)
|
|
115
|
+
standalone.simple_decompress_into(buf, out)
|
|
116
|
+
return out
|
|
117
|
+
else:
|
|
118
|
+
return standalone.simple_decompress(buf)
|
numcodecs/pickles.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import pickle
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from .abc import Codec
|
|
6
|
+
from .compat import ensure_contiguous_ndarray
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Pickle(Codec):
|
|
10
|
+
"""Codec to encode data as as pickled bytes. Useful for encoding an array of Python string
|
|
11
|
+
objects.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
protocol : int, defaults to pickle.HIGHEST_PROTOCOL
|
|
16
|
+
The protocol used to pickle data.
|
|
17
|
+
|
|
18
|
+
Examples
|
|
19
|
+
--------
|
|
20
|
+
>>> import numcodecs as codecs
|
|
21
|
+
>>> import numpy as np
|
|
22
|
+
>>> x = np.array(['foo', 'bar', 'baz'], dtype='object')
|
|
23
|
+
>>> f = codecs.Pickle()
|
|
24
|
+
>>> f.decode(f.encode(x))
|
|
25
|
+
array(['foo', 'bar', 'baz'], dtype=object)
|
|
26
|
+
|
|
27
|
+
See Also
|
|
28
|
+
--------
|
|
29
|
+
numcodecs.msgpacks.MsgPack
|
|
30
|
+
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
codec_id = 'pickle'
|
|
34
|
+
|
|
35
|
+
def __init__(self, protocol=pickle.HIGHEST_PROTOCOL):
|
|
36
|
+
self.protocol = protocol
|
|
37
|
+
|
|
38
|
+
def encode(self, buf):
|
|
39
|
+
return pickle.dumps(buf, protocol=self.protocol)
|
|
40
|
+
|
|
41
|
+
def decode(self, buf, out=None):
|
|
42
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
43
|
+
dec = pickle.loads(buf)
|
|
44
|
+
|
|
45
|
+
if out is not None:
|
|
46
|
+
np.copyto(out, dec)
|
|
47
|
+
return out
|
|
48
|
+
else:
|
|
49
|
+
return dec
|
|
50
|
+
|
|
51
|
+
def get_config(self):
|
|
52
|
+
return {'id': self.codec_id, 'protocol': self.protocol}
|
|
53
|
+
|
|
54
|
+
def __repr__(self):
|
|
55
|
+
return f'Pickle(protocol={self.protocol})'
|
numcodecs/quantize.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import math
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from .abc import Codec
|
|
6
|
+
from .compat import ensure_ndarray, ndarray_copy
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Quantize(Codec):
|
|
10
|
+
"""Lossy filter to reduce the precision of floating point data.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
digits : int
|
|
15
|
+
Desired precision (number of decimal digits).
|
|
16
|
+
dtype : dtype
|
|
17
|
+
Data type to use for decoded data.
|
|
18
|
+
astype : dtype, optional
|
|
19
|
+
Data type to use for encoded data.
|
|
20
|
+
|
|
21
|
+
Examples
|
|
22
|
+
--------
|
|
23
|
+
>>> import numcodecs
|
|
24
|
+
>>> import numpy as np
|
|
25
|
+
>>> x = np.linspace(0, 1, 10, dtype='f8')
|
|
26
|
+
>>> x
|
|
27
|
+
array([0. , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
|
|
28
|
+
0.55555556, 0.66666667, 0.77777778, 0.88888889, 1. ])
|
|
29
|
+
>>> codec = numcodecs.Quantize(digits=1, dtype='f8')
|
|
30
|
+
>>> codec.encode(x)
|
|
31
|
+
array([0. , 0.125 , 0.25 , 0.3125, 0.4375, 0.5625, 0.6875,
|
|
32
|
+
0.75 , 0.875 , 1. ])
|
|
33
|
+
>>> codec = numcodecs.Quantize(digits=2, dtype='f8')
|
|
34
|
+
>>> codec.encode(x)
|
|
35
|
+
array([0. , 0.109375 , 0.21875 , 0.3359375, 0.4453125,
|
|
36
|
+
0.5546875, 0.6640625, 0.78125 , 0.890625 , 1. ])
|
|
37
|
+
>>> codec = numcodecs.Quantize(digits=3, dtype='f8')
|
|
38
|
+
>>> codec.encode(x)
|
|
39
|
+
array([0. , 0.11132812, 0.22265625, 0.33300781, 0.44433594,
|
|
40
|
+
0.55566406, 0.66699219, 0.77734375, 0.88867188, 1. ])
|
|
41
|
+
|
|
42
|
+
See Also
|
|
43
|
+
--------
|
|
44
|
+
numcodecs.fixedscaleoffset.FixedScaleOffset
|
|
45
|
+
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
codec_id = 'quantize'
|
|
49
|
+
|
|
50
|
+
def __init__(self, digits, dtype, astype=None):
|
|
51
|
+
self.digits = digits
|
|
52
|
+
self.dtype = np.dtype(dtype)
|
|
53
|
+
if astype is None:
|
|
54
|
+
self.astype = self.dtype
|
|
55
|
+
else:
|
|
56
|
+
self.astype = np.dtype(astype)
|
|
57
|
+
if self.dtype.kind != 'f' or self.astype.kind != 'f':
|
|
58
|
+
raise ValueError('only floating point data types are supported')
|
|
59
|
+
|
|
60
|
+
def encode(self, buf):
|
|
61
|
+
# normalise input
|
|
62
|
+
arr = ensure_ndarray(buf).view(self.dtype)
|
|
63
|
+
|
|
64
|
+
# apply scaling
|
|
65
|
+
precision = 10.0**-self.digits
|
|
66
|
+
exp = math.log10(precision)
|
|
67
|
+
if exp < 0:
|
|
68
|
+
exp = math.floor(exp)
|
|
69
|
+
else:
|
|
70
|
+
exp = math.ceil(exp)
|
|
71
|
+
bits = math.ceil(math.log2(10.0**-exp))
|
|
72
|
+
scale = 2.0**bits
|
|
73
|
+
enc = np.around(scale * arr) / scale
|
|
74
|
+
|
|
75
|
+
# cast dtype
|
|
76
|
+
return enc.astype(self.astype, copy=False)
|
|
77
|
+
|
|
78
|
+
def decode(self, buf, out=None):
|
|
79
|
+
# filter is lossy, decoding is no-op
|
|
80
|
+
dec = ensure_ndarray(buf).view(self.astype)
|
|
81
|
+
dec = dec.astype(self.dtype, copy=False)
|
|
82
|
+
return ndarray_copy(dec, out)
|
|
83
|
+
|
|
84
|
+
def get_config(self):
|
|
85
|
+
# override to handle encoding dtypes
|
|
86
|
+
return {
|
|
87
|
+
'id': self.codec_id,
|
|
88
|
+
'digits': self.digits,
|
|
89
|
+
'dtype': self.dtype.str,
|
|
90
|
+
'astype': self.astype.str,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
def __repr__(self):
|
|
94
|
+
r = f'{type(self).__name__}(digits={self.digits}, dtype={self.dtype.str!r}'
|
|
95
|
+
if self.astype != self.dtype:
|
|
96
|
+
r += f', astype={self.astype.str!r}'
|
|
97
|
+
r += ')'
|
|
98
|
+
return r
|