numcodecs 0.16.4__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. numcodecs/__init__.py +146 -0
  2. numcodecs/_shuffle.cpython-313-darwin.so +0 -0
  3. numcodecs/abc.py +126 -0
  4. numcodecs/astype.py +72 -0
  5. numcodecs/base64.py +26 -0
  6. numcodecs/bitround.py +80 -0
  7. numcodecs/blosc.cpython-313-darwin.so +0 -0
  8. numcodecs/bz2.py +45 -0
  9. numcodecs/categorize.py +98 -0
  10. numcodecs/checksum32.py +189 -0
  11. numcodecs/compat.py +206 -0
  12. numcodecs/compat_ext.cpython-313-darwin.so +0 -0
  13. numcodecs/delta.py +94 -0
  14. numcodecs/errors.py +26 -0
  15. numcodecs/fixedscaleoffset.py +130 -0
  16. numcodecs/fletcher32.cpython-313-darwin.so +0 -0
  17. numcodecs/gzip.py +50 -0
  18. numcodecs/jenkins.cpython-313-darwin.so +0 -0
  19. numcodecs/json.py +107 -0
  20. numcodecs/lz4.cpython-313-darwin.so +0 -0
  21. numcodecs/lzma.py +71 -0
  22. numcodecs/msgpacks.py +86 -0
  23. numcodecs/ndarray_like.py +65 -0
  24. numcodecs/packbits.py +82 -0
  25. numcodecs/pcodec.py +119 -0
  26. numcodecs/pickles.py +55 -0
  27. numcodecs/quantize.py +98 -0
  28. numcodecs/registry.py +74 -0
  29. numcodecs/shuffle.py +61 -0
  30. numcodecs/tests/__init__.py +3 -0
  31. numcodecs/tests/common.py +275 -0
  32. numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
  33. numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
  34. numcodecs/tests/test_astype.py +74 -0
  35. numcodecs/tests/test_base64.py +81 -0
  36. numcodecs/tests/test_bitround.py +81 -0
  37. numcodecs/tests/test_blosc.py +290 -0
  38. numcodecs/tests/test_bz2.py +66 -0
  39. numcodecs/tests/test_categorize.py +87 -0
  40. numcodecs/tests/test_checksum32.py +199 -0
  41. numcodecs/tests/test_compat.py +111 -0
  42. numcodecs/tests/test_delta.py +61 -0
  43. numcodecs/tests/test_entrypoints.py +24 -0
  44. numcodecs/tests/test_entrypoints_backport.py +36 -0
  45. numcodecs/tests/test_fixedscaleoffset.py +77 -0
  46. numcodecs/tests/test_fletcher32.py +56 -0
  47. numcodecs/tests/test_gzip.py +110 -0
  48. numcodecs/tests/test_jenkins.py +150 -0
  49. numcodecs/tests/test_json.py +85 -0
  50. numcodecs/tests/test_lz4.py +83 -0
  51. numcodecs/tests/test_lzma.py +94 -0
  52. numcodecs/tests/test_msgpacks.py +126 -0
  53. numcodecs/tests/test_ndarray_like.py +48 -0
  54. numcodecs/tests/test_packbits.py +39 -0
  55. numcodecs/tests/test_pcodec.py +90 -0
  56. numcodecs/tests/test_pickles.py +61 -0
  57. numcodecs/tests/test_pyzstd.py +76 -0
  58. numcodecs/tests/test_quantize.py +76 -0
  59. numcodecs/tests/test_registry.py +43 -0
  60. numcodecs/tests/test_shuffle.py +166 -0
  61. numcodecs/tests/test_vlen_array.py +97 -0
  62. numcodecs/tests/test_vlen_bytes.py +93 -0
  63. numcodecs/tests/test_vlen_utf8.py +91 -0
  64. numcodecs/tests/test_zarr3.py +48 -0
  65. numcodecs/tests/test_zarr3_import.py +13 -0
  66. numcodecs/tests/test_zfpy.py +104 -0
  67. numcodecs/tests/test_zlib.py +94 -0
  68. numcodecs/tests/test_zstd.py +189 -0
  69. numcodecs/version.py +34 -0
  70. numcodecs/vlen.cpython-313-darwin.so +0 -0
  71. numcodecs/zarr3.py +67 -0
  72. numcodecs/zfpy.py +112 -0
  73. numcodecs/zlib.py +42 -0
  74. numcodecs/zstd.cpython-313-darwin.so +0 -0
  75. numcodecs-0.16.4.dist-info/METADATA +67 -0
  76. numcodecs-0.16.4.dist-info/RECORD +87 -0
  77. numcodecs-0.16.4.dist-info/WHEEL +6 -0
  78. numcodecs-0.16.4.dist-info/licenses/LICENSE.txt +21 -0
  79. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSE.txt +31 -0
  80. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/BITSHUFFLE.txt +21 -0
  81. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/FASTLZ.txt +20 -0
  82. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/LZ4.txt +25 -0
  83. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/SNAPPY.txt +28 -0
  84. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/STDINT.txt +29 -0
  85. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/ZLIB-NG.txt +17 -0
  86. numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/ZLIB.txt +22 -0
  87. numcodecs-0.16.4.dist-info/top_level.txt +1 -0
numcodecs/json.py ADDED
@@ -0,0 +1,107 @@
1
+ import json as _json
2
+ import textwrap
3
+
4
+ import numpy as np
5
+
6
+ from .abc import Codec
7
+ from .compat import ensure_text
8
+
9
+
10
+ class JSON(Codec):
11
+ """Codec to encode data as JSON. Useful for encoding an array of Python objects.
12
+
13
+ .. versionchanged:: 0.6
14
+ The encoding format has been changed to include the array shape in the encoded
15
+ data, which ensures that all object arrays can be correctly encoded and decoded.
16
+
17
+ Examples
18
+ --------
19
+ >>> import numcodecs
20
+ >>> import numpy as np
21
+ >>> x = np.array(['foo', 'bar', 'baz'], dtype='object')
22
+ >>> codec = numcodecs.JSON()
23
+ >>> codec.decode(codec.encode(x))
24
+ array(['foo', 'bar', 'baz'], dtype=object)
25
+
26
+ See Also
27
+ --------
28
+ numcodecs.pickles.Pickle, numcodecs.msgpacks.MsgPack
29
+
30
+ """
31
+
32
+ codec_id = 'json2'
33
+
34
+ def __init__(
35
+ self,
36
+ encoding='utf-8',
37
+ skipkeys=False,
38
+ ensure_ascii=True,
39
+ check_circular=True,
40
+ allow_nan=True,
41
+ sort_keys=True,
42
+ indent=None,
43
+ separators=None,
44
+ strict=True,
45
+ ):
46
+ self._text_encoding = encoding
47
+ if separators is None:
48
+ # ensure separators are explicitly specified, and consistent behaviour across
49
+ # Python versions, and most compact representation if indent is None
50
+ if indent is None:
51
+ separators = ',', ':'
52
+ else:
53
+ separators = ', ', ': '
54
+ separators = tuple(separators)
55
+ self._encoder_config = {
56
+ 'skipkeys': skipkeys,
57
+ 'ensure_ascii': ensure_ascii,
58
+ 'check_circular': check_circular,
59
+ 'allow_nan': allow_nan,
60
+ 'indent': indent,
61
+ 'separators': separators,
62
+ 'sort_keys': sort_keys,
63
+ }
64
+ self._encoder = _json.JSONEncoder(**self._encoder_config)
65
+ self._decoder_config = {'strict': strict}
66
+ self._decoder = _json.JSONDecoder(**self._decoder_config)
67
+
68
+ def encode(self, buf):
69
+ try:
70
+ buf = np.asarray(buf)
71
+ except ValueError: # pragma: no cover
72
+ buf = np.asarray(buf, dtype=object)
73
+ items = np.atleast_1d(buf).tolist()
74
+ items.append(buf.dtype.str)
75
+ items.append(buf.shape)
76
+ return self._encoder.encode(items).encode(self._text_encoding)
77
+
78
+ def decode(self, buf, out=None):
79
+ items = self._decoder.decode(ensure_text(buf, self._text_encoding))
80
+ dec = np.empty(items[-1], dtype=items[-2])
81
+ if not items[-1]:
82
+ dec[...] = items[0]
83
+ else:
84
+ dec[:] = items[:-2]
85
+ if out is not None:
86
+ np.copyto(out, dec)
87
+ return out
88
+ else:
89
+ return dec
90
+
91
+ def get_config(self):
92
+ config = {'id': self.codec_id, 'encoding': self._text_encoding}
93
+ config.update(self._encoder_config)
94
+ config.update(self._decoder_config)
95
+ return config
96
+
97
+ def __repr__(self):
98
+ params = [f'encoding={self._text_encoding!r}']
99
+ for k, v in sorted(self._encoder_config.items()):
100
+ params.append(f'{k}={v!r}')
101
+ for k, v in sorted(self._decoder_config.items()):
102
+ params.append(f'{k}={v!r}')
103
+ classname = type(self).__name__
104
+ params = ', '.join(params)
105
+ return textwrap.fill(
106
+ f'{classname}({params})', width=80, break_long_words=False, subsequent_indent=' '
107
+ )
Binary file
numcodecs/lzma.py ADDED
@@ -0,0 +1,71 @@
1
+ from types import ModuleType
2
+
3
+ _lzma: ModuleType | None = None
4
+ try:
5
+ import lzma as _lzma
6
+ except ImportError: # pragma: no cover
7
+ try: # noqa: SIM105
8
+ from backports import lzma as _lzma # type: ignore[no-redef]
9
+ except ImportError:
10
+ pass
11
+
12
+
13
+ if _lzma:
14
+ from .abc import Codec
15
+ from .compat import ensure_contiguous_ndarray, ndarray_copy
16
+
17
+ # noinspection PyShadowingBuiltins
18
+ class LZMA(Codec):
19
+ """Codec providing compression using lzma via the Python standard
20
+ library.
21
+
22
+ Parameters
23
+ ----------
24
+ format : integer, optional
25
+ One of the lzma format codes, e.g., ``lzma.FORMAT_XZ``.
26
+ check : integer, optional
27
+ One of the lzma check codes, e.g., ``lzma.CHECK_NONE``.
28
+ preset : integer, optional
29
+ An integer between 0 and 9 inclusive, specifying the compression
30
+ level.
31
+ filters : list, optional
32
+ A list of dictionaries specifying compression filters. If
33
+ filters are provided, 'preset' must be None.
34
+
35
+ """
36
+
37
+ codec_id = 'lzma'
38
+
39
+ def __init__(self, format=1, check=-1, preset=None, filters=None):
40
+ self.format = format
41
+ self.check = check
42
+ self.preset = preset
43
+ self.filters = filters
44
+
45
+ def encode(self, buf):
46
+ # normalise inputs
47
+ buf = ensure_contiguous_ndarray(buf)
48
+
49
+ # do compression
50
+ return _lzma.compress(
51
+ buf,
52
+ format=self.format,
53
+ check=self.check,
54
+ preset=self.preset,
55
+ filters=self.filters,
56
+ )
57
+
58
+ def decode(self, buf, out=None):
59
+ # normalise inputs
60
+ buf = ensure_contiguous_ndarray(buf)
61
+ if out is not None:
62
+ out = ensure_contiguous_ndarray(out)
63
+
64
+ # do decompression
65
+ dec = _lzma.decompress(buf, format=self.format, filters=self.filters)
66
+
67
+ # handle destination
68
+ return ndarray_copy(dec, out)
69
+
70
+ def __repr__(self):
71
+ return f'{type(self).__name__}(format={self.format!r}, check={self.check!r}, preset={self.preset!r}, filters={self.filters!r})'
numcodecs/msgpacks.py ADDED
@@ -0,0 +1,86 @@
1
+ import msgpack
2
+ import numpy as np
3
+
4
+ from .abc import Codec
5
+ from .compat import ensure_contiguous_ndarray
6
+
7
+
8
+ class MsgPack(Codec):
9
+ """Codec to encode data as msgpacked bytes. Useful for encoding an array of Python
10
+ objects.
11
+
12
+ .. versionchanged:: 0.6
13
+ The encoding format has been changed to include the array shape in the encoded
14
+ data, which ensures that all object arrays can be correctly encoded and decoded.
15
+
16
+ Parameters
17
+ ----------
18
+ use_single_float : bool, optional
19
+ Use single precision float type for float.
20
+ use_bin_type : bool, optional
21
+ Use bin type introduced in msgpack spec 2.0 for bytes. It also enables str8 type
22
+ for unicode.
23
+ raw : bool, optional
24
+ If true, unpack msgpack raw to Python bytes. Otherwise, unpack to Python str
25
+ by decoding with UTF-8 encoding.
26
+
27
+ Examples
28
+ --------
29
+ >>> import numcodecs
30
+ >>> import numpy as np
31
+ >>> x = np.array(['foo', 'bar', 'baz'], dtype='object')
32
+ >>> codec = numcodecs.MsgPack()
33
+ >>> codec.decode(codec.encode(x))
34
+ array(['foo', 'bar', 'baz'], dtype=object)
35
+
36
+ See Also
37
+ --------
38
+ numcodecs.pickles.Pickle, numcodecs.json.JSON, numcodecs.vlen.VLenUTF8
39
+
40
+ Notes
41
+ -----
42
+ Requires `msgpack <https://pypi.org/project/msgpack/>`_ to be installed.
43
+
44
+ """
45
+
46
+ codec_id = 'msgpack2'
47
+
48
+ def __init__(self, use_single_float=False, use_bin_type=True, raw=False):
49
+ self.use_single_float = use_single_float
50
+ self.use_bin_type = use_bin_type
51
+ self.raw = raw
52
+
53
+ def encode(self, buf):
54
+ try:
55
+ buf = np.asarray(buf)
56
+ except ValueError:
57
+ buf = np.asarray(buf, dtype=object)
58
+ items = buf.tolist()
59
+ items.extend((buf.dtype.str, buf.shape))
60
+ return msgpack.packb(
61
+ items,
62
+ use_bin_type=self.use_bin_type,
63
+ use_single_float=self.use_single_float,
64
+ )
65
+
66
+ def decode(self, buf, out=None):
67
+ buf = ensure_contiguous_ndarray(buf)
68
+ items = msgpack.unpackb(buf, raw=self.raw)
69
+ dec = np.empty(items[-1], dtype=items[-2])
70
+ dec[:] = items[:-2]
71
+ if out is not None:
72
+ np.copyto(out, dec)
73
+ return out
74
+ else:
75
+ return dec
76
+
77
+ def get_config(self):
78
+ return {
79
+ 'id': self.codec_id,
80
+ 'raw': self.raw,
81
+ 'use_single_float': self.use_single_float,
82
+ 'use_bin_type': self.use_bin_type,
83
+ }
84
+
85
+ def __repr__(self):
86
+ return f'MsgPack(raw={self.raw!r}, use_bin_type={self.use_bin_type!r}, use_single_float={self.use_single_float!r})'
@@ -0,0 +1,65 @@
1
+ from typing import Any, ClassVar, Protocol, runtime_checkable
2
+
3
+
4
+ class _CachedProtocolMeta(Protocol.__class__): # type: ignore[name-defined]
5
+ """Custom implementation of @runtime_checkable
6
+
7
+ The native implementation of @runtime_checkable is slow,
8
+ see <https://github.com/zarr-developers/numcodecs/issues/379>.
9
+
10
+ This metaclass keeps an unbounded cache of the result of
11
+ isinstance checks using the object's class as the cache key.
12
+ """
13
+
14
+ _instancecheck_cache: ClassVar[dict[tuple[type, type], bool]] = {}
15
+
16
+ def __instancecheck__(self, instance):
17
+ key = (self, instance.__class__)
18
+ ret = self._instancecheck_cache.get(key)
19
+ if ret is None:
20
+ ret = super().__instancecheck__(instance)
21
+ self._instancecheck_cache[key] = ret
22
+ return ret
23
+
24
+
25
+ @runtime_checkable
26
+ class DType(Protocol, metaclass=_CachedProtocolMeta):
27
+ itemsize: int
28
+ name: str
29
+ kind: str
30
+
31
+
32
+ @runtime_checkable
33
+ class FlagsObj(Protocol, metaclass=_CachedProtocolMeta):
34
+ c_contiguous: bool
35
+ f_contiguous: bool
36
+ owndata: bool
37
+
38
+
39
+ @runtime_checkable
40
+ class NDArrayLike(Protocol, metaclass=_CachedProtocolMeta):
41
+ dtype: DType
42
+ shape: tuple[int, ...]
43
+ strides: tuple[int, ...]
44
+ ndim: int
45
+ size: int
46
+ itemsize: int
47
+ nbytes: int
48
+ flags: FlagsObj
49
+
50
+ def __len__(self) -> int: ... # pragma: no cover
51
+
52
+ def __getitem__(self, key) -> Any: ... # pragma: no cover
53
+
54
+ def __setitem__(self, key, value): ... # pragma: no cover
55
+
56
+ def tobytes(self, order: str | None = ...) -> bytes: ... # pragma: no cover
57
+
58
+ def reshape(self, *shape: int, order: str = ...) -> "NDArrayLike": ... # pragma: no cover
59
+
60
+ def view(self, dtype: DType = ...) -> "NDArrayLike": ... # pragma: no cover
61
+
62
+
63
+ def is_ndarray_like(obj: object) -> bool:
64
+ """Return True when `obj` is ndarray-like"""
65
+ return isinstance(obj, NDArrayLike)
numcodecs/packbits.py ADDED
@@ -0,0 +1,82 @@
1
+ import numpy as np
2
+
3
+ from .abc import Codec
4
+ from .compat import ensure_ndarray, ndarray_copy
5
+
6
+
7
+ class PackBits(Codec):
8
+ """Codec to pack elements of a boolean array into bits in a uint8 array.
9
+
10
+ Examples
11
+ --------
12
+ >>> import numcodecs
13
+ >>> import numpy as np
14
+ >>> codec = numcodecs.PackBits()
15
+ >>> x = np.array([True, False, False, True], dtype=bool)
16
+ >>> y = codec.encode(x)
17
+ >>> y
18
+ array([ 4, 144], dtype=uint8)
19
+ >>> z = codec.decode(y)
20
+ >>> z
21
+ array([ True, False, False, True])
22
+
23
+ Notes
24
+ -----
25
+ The first element of the encoded array stores the number of bits that
26
+ were padded to complete the final byte.
27
+
28
+ """
29
+
30
+ codec_id = 'packbits'
31
+
32
+ def encode(self, buf):
33
+ # normalise input
34
+ arr = ensure_ndarray(buf).view(bool)
35
+
36
+ # flatten to simplify implementation
37
+ arr = arr.reshape(-1, order='A')
38
+
39
+ # determine size of packed data
40
+ n = arr.size
41
+ n_bytes_packed = n // 8
42
+ n_bits_leftover = n % 8
43
+ if n_bits_leftover > 0:
44
+ n_bytes_packed += 1
45
+
46
+ # setup output
47
+ enc = np.empty(n_bytes_packed + 1, dtype='u1')
48
+
49
+ # store how many bits were padded
50
+ if n_bits_leftover:
51
+ n_bits_padded = 8 - n_bits_leftover
52
+ else:
53
+ n_bits_padded = 0
54
+ enc[0] = n_bits_padded
55
+
56
+ # apply encoding
57
+ enc[1:] = np.packbits(arr)
58
+
59
+ return enc
60
+
61
+ def decode(self, buf, out=None):
62
+ # normalise input
63
+ enc = ensure_ndarray(buf).view('u1')
64
+
65
+ # flatten to simplify implementation
66
+ enc = enc.reshape(-1, order='A')
67
+
68
+ # find out how many bits were padded
69
+ n_bits_padded = int(enc[0])
70
+
71
+ # apply decoding
72
+ dec = np.unpackbits(enc[1:])
73
+
74
+ # remove padded bits
75
+ if n_bits_padded:
76
+ dec = dec[:-n_bits_padded]
77
+
78
+ # view as boolean array
79
+ dec = dec.view(bool)
80
+
81
+ # handle destination
82
+ return ndarray_copy(dec, out)
numcodecs/pcodec.py ADDED
@@ -0,0 +1,119 @@
1
+ from typing import Literal
2
+
3
+ from numcodecs.abc import Codec
4
+ from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray
5
+ from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
6
+
7
+ DEFAULT_MAX_PAGE_N = 262144
8
+
9
+
10
+ class PCodec(Codec):
11
+ """
12
+ PCodec (or pco, pronounced "pico") losslessly compresses and decompresses
13
+ numerical sequences with high compression ratio and fast speed.
14
+
15
+ See `PCodec Repo <https://github.com/mwlon/pcodec>`_ for more information.
16
+
17
+ PCodec supports only the following numerical dtypes: uint16, uint32, uint64,
18
+ int16, int32, int64, float16, float32, and float64.
19
+
20
+ Parameters
21
+ ----------
22
+ level : int
23
+ A compression level from 0-12, where 12 take the longest and compresses
24
+ the most.
25
+ mode_spec : {"auto", "classic"}
26
+ Configures whether Pcodec should try to infer the best "mode" or
27
+ structure of the data (e.g. approximate multiples of 0.1) to improve
28
+ compression ratio, or skip this step and just use the numbers as-is
29
+ (Classic mode). Note that the "try*" specs are not currently supported.
30
+ delta_spec : {"auto", "none", "try_consecutive", "try_lookback"}
31
+ Configures the delta encoding strategy. By default, uses "auto" which
32
+ will try to infer the best encoding order.
33
+ paging_spec : {"equal_pages_up_to"}
34
+ Configures the paging strategy. Only "equal_pages_up_to" is currently
35
+ supported.
36
+ delta_encoding_order : int or None
37
+ Explicit delta encoding level from 0-7. Only valid if delta_spec is
38
+ "try_consecutive" or "auto" (to support backwards compatibility with
39
+ older versions of this codec).
40
+ equal_pages_up_to : int
41
+ Divide the chunk into equal pages of up to this many numbers.
42
+ """
43
+
44
+ codec_id = "pcodec"
45
+
46
+ def __init__(
47
+ self,
48
+ level: int = 8,
49
+ *,
50
+ mode_spec: Literal["auto", "classic"] = "auto",
51
+ delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] = "auto",
52
+ paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
53
+ delta_encoding_order: int | None = None,
54
+ equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
55
+ ):
56
+ # note that we use `level` instead of `compression_level` to
57
+ # match other codecs
58
+ self.level = level
59
+ self.mode_spec = mode_spec
60
+ self.delta_spec = delta_spec
61
+ self.paging_spec = paging_spec
62
+ self.delta_encoding_order = delta_encoding_order
63
+ self.equal_pages_up_to = equal_pages_up_to
64
+
65
+ def _get_chunk_config(self):
66
+ match self.mode_spec:
67
+ case "auto":
68
+ mode_spec = ModeSpec.auto()
69
+ case "classic":
70
+ mode_spec = ModeSpec.classic()
71
+ case _:
72
+ raise ValueError(f"mode_spec {self.mode_spec} is not supported")
73
+
74
+ if self.delta_encoding_order is not None and self.delta_spec == "auto":
75
+ # backwards compat for before delta_spec was introduced
76
+ delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
77
+ elif self.delta_encoding_order is not None and self.delta_spec != "try_consecutive":
78
+ raise ValueError(
79
+ "delta_encoding_order can only be set for delta_spec='try_consecutive'"
80
+ )
81
+ else:
82
+ match self.delta_spec:
83
+ case "auto":
84
+ delta_spec = DeltaSpec.auto()
85
+ case "none":
86
+ delta_spec = DeltaSpec.none()
87
+ case "try_consecutive":
88
+ delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
89
+ case "try_lookback":
90
+ delta_spec = DeltaSpec.try_lookback()
91
+ case _:
92
+ raise ValueError(f"delta_spec {self.delta_spec} is not supported")
93
+
94
+ match self.paging_spec:
95
+ case "equal_pages_up_to":
96
+ paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
97
+ case _:
98
+ raise ValueError(f"paging_spec {self.paging_spec} is not supported")
99
+
100
+ return ChunkConfig(
101
+ compression_level=self.level,
102
+ delta_spec=delta_spec,
103
+ mode_spec=mode_spec,
104
+ paging_spec=paging_spec,
105
+ )
106
+
107
+ def encode(self, buf):
108
+ buf = ensure_contiguous_ndarray(buf)
109
+ config = self._get_chunk_config()
110
+ return standalone.simple_compress(buf, config)
111
+
112
+ def decode(self, buf, out=None):
113
+ buf = ensure_bytes(buf)
114
+ if out is not None:
115
+ out = ensure_contiguous_ndarray(out)
116
+ standalone.simple_decompress_into(buf, out)
117
+ return out
118
+ else:
119
+ return standalone.simple_decompress(buf)
numcodecs/pickles.py ADDED
@@ -0,0 +1,55 @@
1
+ import pickle
2
+
3
+ import numpy as np
4
+
5
+ from .abc import Codec
6
+ from .compat import ensure_contiguous_ndarray
7
+
8
+
9
+ class Pickle(Codec):
10
+ """Codec to encode data as as pickled bytes. Useful for encoding an array of Python string
11
+ objects.
12
+
13
+ Parameters
14
+ ----------
15
+ protocol : int, defaults to pickle.HIGHEST_PROTOCOL
16
+ The protocol used to pickle data.
17
+
18
+ Examples
19
+ --------
20
+ >>> import numcodecs as codecs
21
+ >>> import numpy as np
22
+ >>> x = np.array(['foo', 'bar', 'baz'], dtype='object')
23
+ >>> f = codecs.Pickle()
24
+ >>> f.decode(f.encode(x))
25
+ array(['foo', 'bar', 'baz'], dtype=object)
26
+
27
+ See Also
28
+ --------
29
+ numcodecs.msgpacks.MsgPack
30
+
31
+ """
32
+
33
+ codec_id = 'pickle'
34
+
35
+ def __init__(self, protocol=pickle.HIGHEST_PROTOCOL):
36
+ self.protocol = protocol
37
+
38
+ def encode(self, buf):
39
+ return pickle.dumps(buf, protocol=self.protocol)
40
+
41
+ def decode(self, buf, out=None):
42
+ buf = ensure_contiguous_ndarray(buf)
43
+ dec = pickle.loads(buf)
44
+
45
+ if out is not None:
46
+ np.copyto(out, dec)
47
+ return out
48
+ else:
49
+ return dec
50
+
51
+ def get_config(self):
52
+ return {'id': self.codec_id, 'protocol': self.protocol}
53
+
54
+ def __repr__(self):
55
+ return f'Pickle(protocol={self.protocol})'
numcodecs/quantize.py ADDED
@@ -0,0 +1,98 @@
1
+ import math
2
+
3
+ import numpy as np
4
+
5
+ from .abc import Codec
6
+ from .compat import ensure_ndarray, ndarray_copy
7
+
8
+
9
+ class Quantize(Codec):
10
+ """Lossy filter to reduce the precision of floating point data.
11
+
12
+ Parameters
13
+ ----------
14
+ digits : int
15
+ Desired precision (number of decimal digits).
16
+ dtype : dtype
17
+ Data type to use for decoded data.
18
+ astype : dtype, optional
19
+ Data type to use for encoded data.
20
+
21
+ Examples
22
+ --------
23
+ >>> import numcodecs
24
+ >>> import numpy as np
25
+ >>> x = np.linspace(0, 1, 10, dtype='f8')
26
+ >>> x
27
+ array([0. , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
28
+ 0.55555556, 0.66666667, 0.77777778, 0.88888889, 1. ])
29
+ >>> codec = numcodecs.Quantize(digits=1, dtype='f8')
30
+ >>> codec.encode(x)
31
+ array([0. , 0.125 , 0.25 , 0.3125, 0.4375, 0.5625, 0.6875,
32
+ 0.75 , 0.875 , 1. ])
33
+ >>> codec = numcodecs.Quantize(digits=2, dtype='f8')
34
+ >>> codec.encode(x)
35
+ array([0. , 0.109375 , 0.21875 , 0.3359375, 0.4453125,
36
+ 0.5546875, 0.6640625, 0.78125 , 0.890625 , 1. ])
37
+ >>> codec = numcodecs.Quantize(digits=3, dtype='f8')
38
+ >>> codec.encode(x)
39
+ array([0. , 0.11132812, 0.22265625, 0.33300781, 0.44433594,
40
+ 0.55566406, 0.66699219, 0.77734375, 0.88867188, 1. ])
41
+
42
+ See Also
43
+ --------
44
+ numcodecs.fixedscaleoffset.FixedScaleOffset
45
+
46
+ """
47
+
48
+ codec_id = 'quantize'
49
+
50
+ def __init__(self, digits, dtype, astype=None):
51
+ self.digits = digits
52
+ self.dtype = np.dtype(dtype)
53
+ if astype is None:
54
+ self.astype = self.dtype
55
+ else:
56
+ self.astype = np.dtype(astype)
57
+ if self.dtype.kind != 'f' or self.astype.kind != 'f':
58
+ raise ValueError('only floating point data types are supported')
59
+
60
+ def encode(self, buf):
61
+ # normalise input
62
+ arr = ensure_ndarray(buf).view(self.dtype)
63
+
64
+ # apply scaling
65
+ precision = 10.0**-self.digits
66
+ exp = math.log10(precision)
67
+ if exp < 0:
68
+ exp = math.floor(exp)
69
+ else:
70
+ exp = math.ceil(exp)
71
+ bits = math.ceil(math.log2(10.0**-exp))
72
+ scale = 2.0**bits
73
+ enc = np.around(scale * arr) / scale
74
+
75
+ # cast dtype
76
+ return enc.astype(self.astype, copy=False)
77
+
78
+ def decode(self, buf, out=None):
79
+ # filter is lossy, decoding is no-op
80
+ dec = ensure_ndarray(buf).view(self.astype)
81
+ dec = dec.astype(self.dtype, copy=False)
82
+ return ndarray_copy(dec, out)
83
+
84
+ def get_config(self):
85
+ # override to handle encoding dtypes
86
+ return {
87
+ 'id': self.codec_id,
88
+ 'digits': self.digits,
89
+ 'dtype': self.dtype.str,
90
+ 'astype': self.astype.str,
91
+ }
92
+
93
+ def __repr__(self):
94
+ r = f'{type(self).__name__}(digits={self.digits}, dtype={self.dtype.str!r}'
95
+ if self.astype != self.dtype:
96
+ r += f', astype={self.astype.str!r}'
97
+ r += ')'
98
+ return r