numcodecs 0.16.4__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- numcodecs/__init__.py +146 -0
- numcodecs/_shuffle.cpython-313-darwin.so +0 -0
- numcodecs/abc.py +126 -0
- numcodecs/astype.py +72 -0
- numcodecs/base64.py +26 -0
- numcodecs/bitround.py +80 -0
- numcodecs/blosc.cpython-313-darwin.so +0 -0
- numcodecs/bz2.py +45 -0
- numcodecs/categorize.py +98 -0
- numcodecs/checksum32.py +189 -0
- numcodecs/compat.py +206 -0
- numcodecs/compat_ext.cpython-313-darwin.so +0 -0
- numcodecs/delta.py +94 -0
- numcodecs/errors.py +26 -0
- numcodecs/fixedscaleoffset.py +130 -0
- numcodecs/fletcher32.cpython-313-darwin.so +0 -0
- numcodecs/gzip.py +50 -0
- numcodecs/jenkins.cpython-313-darwin.so +0 -0
- numcodecs/json.py +107 -0
- numcodecs/lz4.cpython-313-darwin.so +0 -0
- numcodecs/lzma.py +71 -0
- numcodecs/msgpacks.py +86 -0
- numcodecs/ndarray_like.py +65 -0
- numcodecs/packbits.py +82 -0
- numcodecs/pcodec.py +119 -0
- numcodecs/pickles.py +55 -0
- numcodecs/quantize.py +98 -0
- numcodecs/registry.py +74 -0
- numcodecs/shuffle.py +61 -0
- numcodecs/tests/__init__.py +3 -0
- numcodecs/tests/common.py +275 -0
- numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
- numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
- numcodecs/tests/test_astype.py +74 -0
- numcodecs/tests/test_base64.py +81 -0
- numcodecs/tests/test_bitround.py +81 -0
- numcodecs/tests/test_blosc.py +290 -0
- numcodecs/tests/test_bz2.py +66 -0
- numcodecs/tests/test_categorize.py +87 -0
- numcodecs/tests/test_checksum32.py +199 -0
- numcodecs/tests/test_compat.py +111 -0
- numcodecs/tests/test_delta.py +61 -0
- numcodecs/tests/test_entrypoints.py +24 -0
- numcodecs/tests/test_entrypoints_backport.py +36 -0
- numcodecs/tests/test_fixedscaleoffset.py +77 -0
- numcodecs/tests/test_fletcher32.py +56 -0
- numcodecs/tests/test_gzip.py +110 -0
- numcodecs/tests/test_jenkins.py +150 -0
- numcodecs/tests/test_json.py +85 -0
- numcodecs/tests/test_lz4.py +83 -0
- numcodecs/tests/test_lzma.py +94 -0
- numcodecs/tests/test_msgpacks.py +126 -0
- numcodecs/tests/test_ndarray_like.py +48 -0
- numcodecs/tests/test_packbits.py +39 -0
- numcodecs/tests/test_pcodec.py +90 -0
- numcodecs/tests/test_pickles.py +61 -0
- numcodecs/tests/test_pyzstd.py +76 -0
- numcodecs/tests/test_quantize.py +76 -0
- numcodecs/tests/test_registry.py +43 -0
- numcodecs/tests/test_shuffle.py +166 -0
- numcodecs/tests/test_vlen_array.py +97 -0
- numcodecs/tests/test_vlen_bytes.py +93 -0
- numcodecs/tests/test_vlen_utf8.py +91 -0
- numcodecs/tests/test_zarr3.py +48 -0
- numcodecs/tests/test_zarr3_import.py +13 -0
- numcodecs/tests/test_zfpy.py +104 -0
- numcodecs/tests/test_zlib.py +94 -0
- numcodecs/tests/test_zstd.py +189 -0
- numcodecs/version.py +34 -0
- numcodecs/vlen.cpython-313-darwin.so +0 -0
- numcodecs/zarr3.py +67 -0
- numcodecs/zfpy.py +112 -0
- numcodecs/zlib.py +42 -0
- numcodecs/zstd.cpython-313-darwin.so +0 -0
- numcodecs-0.16.4.dist-info/METADATA +67 -0
- numcodecs-0.16.4.dist-info/RECORD +87 -0
- numcodecs-0.16.4.dist-info/WHEEL +6 -0
- numcodecs-0.16.4.dist-info/licenses/LICENSE.txt +21 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSE.txt +31 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/BITSHUFFLE.txt +21 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/FASTLZ.txt +20 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/LZ4.txt +25 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/SNAPPY.txt +28 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/STDINT.txt +29 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/ZLIB-NG.txt +17 -0
- numcodecs-0.16.4.dist-info/licenses/c-blosc/LICENSES/ZLIB.txt +22 -0
- numcodecs-0.16.4.dist-info/top_level.txt +1 -0
numcodecs/__init__.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# ruff: noqa: E402
|
|
2
|
+
"""Numcodecs is a Python package providing buffer compression and
|
|
3
|
+
transformation codecs for use in data storage and communication
|
|
4
|
+
applications. These include:
|
|
5
|
+
|
|
6
|
+
* Compression codecs, e.g., Zlib, BZ2, LZMA, ZFPY and Blosc.
|
|
7
|
+
* Pre-compression filters, e.g., Delta, Quantize, FixedScaleOffset,
|
|
8
|
+
PackBits, Categorize.
|
|
9
|
+
* Integrity checks, e.g., CRC32, Adler32.
|
|
10
|
+
|
|
11
|
+
All codecs implement the same API, allowing codecs to be organized into
|
|
12
|
+
pipelines in a variety of ways.
|
|
13
|
+
|
|
14
|
+
If you have a question, find a bug, would like to make a suggestion or
|
|
15
|
+
contribute code, please `raise an issue on GitHub
|
|
16
|
+
<https://github.com/zarr-developers/numcodecs/issues>`_.
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import atexit
|
|
21
|
+
import multiprocessing
|
|
22
|
+
from contextlib import suppress
|
|
23
|
+
|
|
24
|
+
from numcodecs.registry import get_codec as get_codec
|
|
25
|
+
from numcodecs.registry import register_codec
|
|
26
|
+
from numcodecs.version import version as __version__ # noqa: F401
|
|
27
|
+
from numcodecs.zlib import Zlib
|
|
28
|
+
|
|
29
|
+
register_codec(Zlib)
|
|
30
|
+
|
|
31
|
+
from numcodecs.gzip import GZip
|
|
32
|
+
|
|
33
|
+
register_codec(GZip)
|
|
34
|
+
|
|
35
|
+
from numcodecs.bz2 import BZ2
|
|
36
|
+
|
|
37
|
+
register_codec(BZ2)
|
|
38
|
+
|
|
39
|
+
from numcodecs.lzma import LZMA
|
|
40
|
+
|
|
41
|
+
register_codec(LZMA)
|
|
42
|
+
|
|
43
|
+
from numcodecs import blosc
|
|
44
|
+
from numcodecs.blosc import Blosc
|
|
45
|
+
|
|
46
|
+
register_codec(Blosc)
|
|
47
|
+
# initialize blosc
|
|
48
|
+
try:
|
|
49
|
+
ncores = multiprocessing.cpu_count()
|
|
50
|
+
except OSError: # pragma: no cover
|
|
51
|
+
ncores = 1
|
|
52
|
+
blosc._init()
|
|
53
|
+
blosc.set_nthreads(min(8, ncores))
|
|
54
|
+
atexit.register(blosc._destroy)
|
|
55
|
+
|
|
56
|
+
from numcodecs import zstd as zstd
|
|
57
|
+
from numcodecs.zstd import Zstd
|
|
58
|
+
|
|
59
|
+
register_codec(Zstd)
|
|
60
|
+
|
|
61
|
+
from numcodecs import lz4 as lz4
|
|
62
|
+
from numcodecs.lz4 import LZ4
|
|
63
|
+
|
|
64
|
+
register_codec(LZ4)
|
|
65
|
+
|
|
66
|
+
from numcodecs.astype import AsType
|
|
67
|
+
|
|
68
|
+
register_codec(AsType)
|
|
69
|
+
|
|
70
|
+
from numcodecs.delta import Delta
|
|
71
|
+
|
|
72
|
+
register_codec(Delta)
|
|
73
|
+
|
|
74
|
+
from numcodecs.quantize import Quantize
|
|
75
|
+
|
|
76
|
+
register_codec(Quantize)
|
|
77
|
+
|
|
78
|
+
from numcodecs.fixedscaleoffset import FixedScaleOffset
|
|
79
|
+
|
|
80
|
+
register_codec(FixedScaleOffset)
|
|
81
|
+
|
|
82
|
+
from numcodecs.packbits import PackBits
|
|
83
|
+
|
|
84
|
+
register_codec(PackBits)
|
|
85
|
+
|
|
86
|
+
from numcodecs.categorize import Categorize
|
|
87
|
+
|
|
88
|
+
register_codec(Categorize)
|
|
89
|
+
|
|
90
|
+
from numcodecs.pickles import Pickle
|
|
91
|
+
|
|
92
|
+
register_codec(Pickle)
|
|
93
|
+
|
|
94
|
+
from numcodecs.base64 import Base64
|
|
95
|
+
|
|
96
|
+
register_codec(Base64)
|
|
97
|
+
|
|
98
|
+
from numcodecs.shuffle import Shuffle
|
|
99
|
+
|
|
100
|
+
register_codec(Shuffle)
|
|
101
|
+
|
|
102
|
+
from numcodecs.bitround import BitRound
|
|
103
|
+
|
|
104
|
+
register_codec(BitRound)
|
|
105
|
+
|
|
106
|
+
from numcodecs.checksum32 import CRC32, Adler32, JenkinsLookup3
|
|
107
|
+
|
|
108
|
+
register_codec(CRC32)
|
|
109
|
+
register_codec(Adler32)
|
|
110
|
+
register_codec(JenkinsLookup3)
|
|
111
|
+
|
|
112
|
+
from numcodecs.json import JSON
|
|
113
|
+
|
|
114
|
+
register_codec(JSON)
|
|
115
|
+
|
|
116
|
+
from numcodecs import vlen as vlen
|
|
117
|
+
from numcodecs.vlen import VLenArray, VLenBytes, VLenUTF8
|
|
118
|
+
|
|
119
|
+
register_codec(VLenUTF8)
|
|
120
|
+
register_codec(VLenBytes)
|
|
121
|
+
register_codec(VLenArray)
|
|
122
|
+
|
|
123
|
+
from numcodecs.fletcher32 import Fletcher32
|
|
124
|
+
|
|
125
|
+
register_codec(Fletcher32)
|
|
126
|
+
|
|
127
|
+
# Optional depenedencies
|
|
128
|
+
with suppress(ImportError):
|
|
129
|
+
from numcodecs.zfpy import ZFPY
|
|
130
|
+
|
|
131
|
+
register_codec(ZFPY)
|
|
132
|
+
|
|
133
|
+
with suppress(ImportError):
|
|
134
|
+
from numcodecs.msgpacks import MsgPack
|
|
135
|
+
|
|
136
|
+
register_codec(MsgPack)
|
|
137
|
+
|
|
138
|
+
with suppress(ImportError):
|
|
139
|
+
from numcodecs.checksum32 import CRC32C
|
|
140
|
+
|
|
141
|
+
register_codec(CRC32C)
|
|
142
|
+
|
|
143
|
+
with suppress(ImportError):
|
|
144
|
+
from numcodecs.pcodec import PCodec
|
|
145
|
+
|
|
146
|
+
register_codec(PCodec)
|
|
Binary file
|
numcodecs/abc.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""This module defines the :class:`Codec` base class, a common interface for
|
|
2
|
+
all codec classes.
|
|
3
|
+
|
|
4
|
+
Codec classes must implement :func:`Codec.encode` and :func:`Codec.decode`
|
|
5
|
+
methods. Inputs to and outputs from these methods may be any Python object
|
|
6
|
+
exporting a contiguous buffer via the new-style Python protocol.
|
|
7
|
+
|
|
8
|
+
Codec classes must implement a :func:`Codec.get_config` method,
|
|
9
|
+
which must return a dictionary holding all configuration parameters
|
|
10
|
+
required to enable encoding and decoding of data. The expectation is that
|
|
11
|
+
these configuration parameters will be stored or communicated separately
|
|
12
|
+
from encoded data, and thus the codecs do not need to store all encoding
|
|
13
|
+
parameters within the encoded data. For broad compatibility,
|
|
14
|
+
the configuration object must contain only JSON-serializable values. The
|
|
15
|
+
configuration object must also contain an 'id' field storing the codec
|
|
16
|
+
identifier (see below).
|
|
17
|
+
|
|
18
|
+
Codec classes must implement a :func:`Codec.from_config` class method,
|
|
19
|
+
which will return an instance of the class initialized from a configuration
|
|
20
|
+
object.
|
|
21
|
+
|
|
22
|
+
Finally, codec classes must set a `codec_id` class-level attribute. This
|
|
23
|
+
must be a string. Two different codec classes may set the same value for the
|
|
24
|
+
`codec_id` attribute if and only if they are fully compatible, meaning that
|
|
25
|
+
(1) configuration parameters are the same, and (2) given the same
|
|
26
|
+
configuration, one class could correctly decode data encoded by the
|
|
27
|
+
other and vice versa.
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from abc import ABC, abstractmethod
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Codec(ABC):
|
|
35
|
+
"""Codec abstract base class."""
|
|
36
|
+
|
|
37
|
+
# override in sub-class
|
|
38
|
+
codec_id: str | None = None
|
|
39
|
+
"""Codec identifier."""
|
|
40
|
+
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def encode(self, buf): # pragma: no cover
|
|
43
|
+
"""Encode data in `buf`.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
buf : buffer-like
|
|
48
|
+
Data to be encoded. May be any object supporting the new-style
|
|
49
|
+
buffer protocol.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
enc : buffer-like
|
|
54
|
+
Encoded data. May be any object supporting the new-style buffer
|
|
55
|
+
protocol.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
def decode(self, buf, out=None): # pragma: no cover
|
|
60
|
+
"""Decode data in `buf`.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
buf : buffer-like
|
|
65
|
+
Encoded data. May be any object supporting the new-style buffer
|
|
66
|
+
protocol.
|
|
67
|
+
out : buffer-like, optional
|
|
68
|
+
Writeable buffer to store decoded data. N.B. if provided, this buffer must
|
|
69
|
+
be exactly the right size to store the decoded data.
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
dec : buffer-like
|
|
74
|
+
Decoded data. May be any object supporting the new-style
|
|
75
|
+
buffer protocol.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def get_config(self):
|
|
79
|
+
"""Return a dictionary holding configuration parameters for this
|
|
80
|
+
codec. Must include an 'id' field with the codec identifier. All
|
|
81
|
+
values must be compatible with JSON encoding."""
|
|
82
|
+
|
|
83
|
+
# override in sub-class if need special encoding of config values
|
|
84
|
+
|
|
85
|
+
# setup config object
|
|
86
|
+
config = {'id': self.codec_id}
|
|
87
|
+
|
|
88
|
+
# by default, assume all non-private members are configuration
|
|
89
|
+
# parameters - override this in sub-class if not the case
|
|
90
|
+
for k in self.__dict__:
|
|
91
|
+
if not k.startswith('_'):
|
|
92
|
+
config[k] = getattr(self, k)
|
|
93
|
+
|
|
94
|
+
return config
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def from_config(cls, config):
|
|
98
|
+
"""Instantiate codec from a configuration object."""
|
|
99
|
+
# N.B., assume at this point the 'id' field has been removed from
|
|
100
|
+
# the config object
|
|
101
|
+
|
|
102
|
+
# override in sub-class if need special decoding of config values
|
|
103
|
+
|
|
104
|
+
# by default, assume constructor accepts configuration parameters as
|
|
105
|
+
# keyword arguments without any special decoding
|
|
106
|
+
return cls(**config)
|
|
107
|
+
|
|
108
|
+
def __eq__(self, other):
|
|
109
|
+
# override in sub-class if need special equality comparison
|
|
110
|
+
try:
|
|
111
|
+
return self.get_config() == other.get_config()
|
|
112
|
+
except AttributeError:
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
def __repr__(self):
|
|
116
|
+
# override in sub-class if need special representation
|
|
117
|
+
|
|
118
|
+
# by default, assume all non-private members are configuration
|
|
119
|
+
# parameters and valid keyword arguments to constructor function
|
|
120
|
+
|
|
121
|
+
r = f'{type(self).__name__}('
|
|
122
|
+
params = [
|
|
123
|
+
f'{k}={getattr(self, k)!r}' for k in sorted(self.__dict__) if not k.startswith('_')
|
|
124
|
+
]
|
|
125
|
+
r += ', '.join(params) + ')'
|
|
126
|
+
return r
|
numcodecs/astype.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .abc import Codec
|
|
4
|
+
from .compat import ensure_ndarray, ndarray_copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AsType(Codec):
|
|
8
|
+
"""Filter to convert data between different types.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
encode_dtype : dtype
|
|
13
|
+
Data type to use for encoded data.
|
|
14
|
+
decode_dtype : dtype, optional
|
|
15
|
+
Data type to use for decoded data.
|
|
16
|
+
|
|
17
|
+
Notes
|
|
18
|
+
-----
|
|
19
|
+
If `encode_dtype` is of lower precision than `decode_dtype`, please be
|
|
20
|
+
aware that data loss can occur by writing data to disk using this filter.
|
|
21
|
+
No checks are made to ensure the casting will work in that direction and
|
|
22
|
+
data corruption will occur.
|
|
23
|
+
|
|
24
|
+
Examples
|
|
25
|
+
--------
|
|
26
|
+
>>> import numcodecs
|
|
27
|
+
>>> import numpy as np
|
|
28
|
+
>>> x = np.arange(100, 120, 2, dtype=np.int8)
|
|
29
|
+
>>> x
|
|
30
|
+
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int8)
|
|
31
|
+
>>> f = numcodecs.AsType(encode_dtype=x.dtype, decode_dtype=np.int16)
|
|
32
|
+
>>> y = f.decode(x)
|
|
33
|
+
>>> y
|
|
34
|
+
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int16)
|
|
35
|
+
>>> z = f.encode(y)
|
|
36
|
+
>>> z
|
|
37
|
+
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int8)
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
codec_id = 'astype'
|
|
42
|
+
|
|
43
|
+
def __init__(self, encode_dtype, decode_dtype):
|
|
44
|
+
self.encode_dtype = np.dtype(encode_dtype)
|
|
45
|
+
self.decode_dtype = np.dtype(decode_dtype)
|
|
46
|
+
|
|
47
|
+
def encode(self, buf):
|
|
48
|
+
# normalise input
|
|
49
|
+
arr = ensure_ndarray(buf).view(self.decode_dtype)
|
|
50
|
+
|
|
51
|
+
# convert and copy
|
|
52
|
+
return arr.astype(self.encode_dtype)
|
|
53
|
+
|
|
54
|
+
def decode(self, buf, out=None):
|
|
55
|
+
# normalise input
|
|
56
|
+
enc = ensure_ndarray(buf).view(self.encode_dtype)
|
|
57
|
+
|
|
58
|
+
# convert and copy
|
|
59
|
+
dec = enc.astype(self.decode_dtype)
|
|
60
|
+
|
|
61
|
+
# handle output
|
|
62
|
+
return ndarray_copy(dec, out)
|
|
63
|
+
|
|
64
|
+
def get_config(self):
|
|
65
|
+
return {
|
|
66
|
+
'id': self.codec_id,
|
|
67
|
+
'encode_dtype': self.encode_dtype.str,
|
|
68
|
+
'decode_dtype': self.decode_dtype.str,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
def __repr__(self):
|
|
72
|
+
return f'{type(self).__name__}(encode_dtype={self.encode_dtype.str!r}, decode_dtype={self.decode_dtype.str!r})'
|
numcodecs/base64.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import base64 as _base64
|
|
2
|
+
|
|
3
|
+
from .abc import Codec
|
|
4
|
+
from .compat import ensure_contiguous_ndarray, ndarray_copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Base64(Codec):
|
|
8
|
+
"""Codec providing base64 compression via the Python standard library."""
|
|
9
|
+
|
|
10
|
+
codec_id = "base64"
|
|
11
|
+
|
|
12
|
+
def encode(self, buf):
|
|
13
|
+
# normalise inputs
|
|
14
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
15
|
+
# do compression
|
|
16
|
+
return _base64.standard_b64encode(buf)
|
|
17
|
+
|
|
18
|
+
def decode(self, buf, out=None):
|
|
19
|
+
# normalise inputs
|
|
20
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
21
|
+
if out is not None:
|
|
22
|
+
out = ensure_contiguous_ndarray(out)
|
|
23
|
+
# do decompression
|
|
24
|
+
decompressed = _base64.standard_b64decode(buf)
|
|
25
|
+
# handle destination
|
|
26
|
+
return ndarray_copy(decompressed, out)
|
numcodecs/bitround.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .abc import Codec
|
|
4
|
+
from .compat import ensure_ndarray_like, ndarray_copy
|
|
5
|
+
|
|
6
|
+
# The size in bits of the mantissa/significand for the various floating types
|
|
7
|
+
# You cannot keep more bits of data than you have available
|
|
8
|
+
# https://en.wikipedia.org/wiki/IEEE_754
|
|
9
|
+
max_bits = {
|
|
10
|
+
"float16": 10,
|
|
11
|
+
"float32": 23,
|
|
12
|
+
"float64": 52,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BitRound(Codec):
|
|
17
|
+
"""Floating-point bit rounding codec
|
|
18
|
+
|
|
19
|
+
Drops a specified number of bits from the floating point mantissa,
|
|
20
|
+
leaving an array more amenable to compression. The number of bits to keep should
|
|
21
|
+
be determined by an information analysis of the data to be compressed.
|
|
22
|
+
The approach is based on the paper by Klöwer et al. 2021
|
|
23
|
+
(https://www.nature.com/articles/s43588-021-00156-2). See
|
|
24
|
+
https://github.com/zarr-developers/numcodecs/issues/298 for discussion
|
|
25
|
+
and the original implementation in Julia referred to at
|
|
26
|
+
https://github.com/milankl/BitInformation.jl
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
|
|
31
|
+
keepbits: int
|
|
32
|
+
The number of bits of the mantissa to keep. The range allowed
|
|
33
|
+
depends on the dtype input data. If keepbits is
|
|
34
|
+
equal to the maximum allowed for the data type, this is equivalent
|
|
35
|
+
to no transform.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
codec_id = 'bitround'
|
|
39
|
+
|
|
40
|
+
def __init__(self, keepbits: int):
|
|
41
|
+
if keepbits < 0:
|
|
42
|
+
raise ValueError("keepbits must be zero or positive")
|
|
43
|
+
self.keepbits = keepbits
|
|
44
|
+
|
|
45
|
+
def encode(self, buf):
|
|
46
|
+
"""Create int array by rounding floating-point data
|
|
47
|
+
|
|
48
|
+
The itemsize will be preserved, but the output should be much more
|
|
49
|
+
compressible.
|
|
50
|
+
"""
|
|
51
|
+
a = ensure_ndarray_like(buf)
|
|
52
|
+
if not a.dtype.kind == "f" or a.dtype.itemsize > 8:
|
|
53
|
+
raise TypeError("Only float arrays (16-64bit) can be bit-rounded")
|
|
54
|
+
bits = max_bits[str(a.dtype)]
|
|
55
|
+
# cast float to int type of same width (preserve endianness)
|
|
56
|
+
a_int_dtype = np.dtype(a.dtype.str.replace("f", "i"))
|
|
57
|
+
all_set = np.array(-1, dtype=a_int_dtype)
|
|
58
|
+
if self.keepbits == bits:
|
|
59
|
+
return a
|
|
60
|
+
if self.keepbits > bits:
|
|
61
|
+
raise ValueError("Keepbits too large for given dtype")
|
|
62
|
+
b = a.copy()
|
|
63
|
+
b = b.view(a_int_dtype)
|
|
64
|
+
maskbits = bits - self.keepbits
|
|
65
|
+
mask = (all_set >> maskbits) << maskbits
|
|
66
|
+
half_quantum1 = (1 << (maskbits - 1)) - 1
|
|
67
|
+
b += ((b >> maskbits) & 1) + half_quantum1
|
|
68
|
+
b &= mask
|
|
69
|
+
return b
|
|
70
|
+
|
|
71
|
+
def decode(self, buf, out=None):
|
|
72
|
+
"""Remake floats from ints
|
|
73
|
+
|
|
74
|
+
As with ``encode``, preserves itemsize.
|
|
75
|
+
"""
|
|
76
|
+
buf = ensure_ndarray_like(buf)
|
|
77
|
+
# Cast back from `int` to `float` type (noop if a `float`ing type buffer is provided)
|
|
78
|
+
dt = np.dtype(buf.dtype.str.replace("i", "f"))
|
|
79
|
+
data = buf.view(dt)
|
|
80
|
+
return ndarray_copy(data, out)
|
|
Binary file
|
numcodecs/bz2.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import bz2 as _bz2
|
|
2
|
+
|
|
3
|
+
from numcodecs.abc import Codec
|
|
4
|
+
from numcodecs.compat import ensure_contiguous_ndarray, ndarray_copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BZ2(Codec):
|
|
8
|
+
"""Codec providing compression using bzip2 via the Python standard library.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
level : int
|
|
13
|
+
Compression level.
|
|
14
|
+
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
codec_id = 'bz2'
|
|
18
|
+
|
|
19
|
+
def __init__(self, level=1):
|
|
20
|
+
self.level = level
|
|
21
|
+
|
|
22
|
+
def encode(self, buf):
|
|
23
|
+
# normalise input
|
|
24
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
25
|
+
|
|
26
|
+
# do compression
|
|
27
|
+
return _bz2.compress(buf, self.level)
|
|
28
|
+
|
|
29
|
+
# noinspection PyMethodMayBeStatic
|
|
30
|
+
def decode(self, buf, out=None):
|
|
31
|
+
# normalise inputs
|
|
32
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
33
|
+
if out is not None:
|
|
34
|
+
out = ensure_contiguous_ndarray(out)
|
|
35
|
+
|
|
36
|
+
# N.B., bz2 cannot handle ndarray directly because of truth testing issues
|
|
37
|
+
buf = memoryview(buf)
|
|
38
|
+
|
|
39
|
+
# do decompression
|
|
40
|
+
dec = _bz2.decompress(buf)
|
|
41
|
+
|
|
42
|
+
# handle destination - Python standard library bz2 module does not
|
|
43
|
+
# support direct decompression into buffer, so we have to copy into
|
|
44
|
+
# out if given
|
|
45
|
+
return ndarray_copy(dec, out)
|
numcodecs/categorize.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .abc import Codec
|
|
4
|
+
from .compat import ensure_ndarray, ensure_text, ndarray_copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Categorize(Codec):
|
|
8
|
+
"""Filter encoding categorical string data as integers.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
labels : sequence of strings
|
|
13
|
+
Category labels.
|
|
14
|
+
dtype : dtype
|
|
15
|
+
Data type to use for decoded data.
|
|
16
|
+
astype : dtype, optional
|
|
17
|
+
Data type to use for encoded data.
|
|
18
|
+
|
|
19
|
+
Examples
|
|
20
|
+
--------
|
|
21
|
+
>>> import numcodecs
|
|
22
|
+
>>> import numpy as np
|
|
23
|
+
>>> x = np.array(['male', 'female', 'female', 'male', 'unexpected'], dtype=object)
|
|
24
|
+
>>> x
|
|
25
|
+
array(['male', 'female', 'female', 'male', 'unexpected'],
|
|
26
|
+
dtype=object)
|
|
27
|
+
>>> codec = numcodecs.Categorize(labels=['female', 'male'], dtype=object)
|
|
28
|
+
>>> y = codec.encode(x)
|
|
29
|
+
>>> y
|
|
30
|
+
array([2, 1, 1, 2, 0], dtype=uint8)
|
|
31
|
+
>>> z = codec.decode(y)
|
|
32
|
+
>>> z
|
|
33
|
+
array(['male', 'female', 'female', 'male', ''],
|
|
34
|
+
dtype=object)
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
codec_id = 'categorize'
|
|
39
|
+
|
|
40
|
+
def __init__(self, labels, dtype, astype='u1'):
|
|
41
|
+
self.dtype = np.dtype(dtype)
|
|
42
|
+
if self.dtype.kind not in 'UO':
|
|
43
|
+
raise TypeError("only unicode ('U') and object ('O') dtypes are supported")
|
|
44
|
+
self.labels = [ensure_text(label) for label in labels]
|
|
45
|
+
self.astype = np.dtype(astype)
|
|
46
|
+
if self.astype == np.dtype(object):
|
|
47
|
+
raise TypeError('encoding as object array not supported')
|
|
48
|
+
|
|
49
|
+
def encode(self, buf):
|
|
50
|
+
# normalise input
|
|
51
|
+
if self.dtype == np.dtype(object):
|
|
52
|
+
arr = np.asarray(buf, dtype=object)
|
|
53
|
+
else:
|
|
54
|
+
arr = ensure_ndarray(buf).view(self.dtype)
|
|
55
|
+
|
|
56
|
+
# flatten to simplify implementation
|
|
57
|
+
arr = arr.reshape(-1, order='A')
|
|
58
|
+
|
|
59
|
+
# setup output array
|
|
60
|
+
enc = np.zeros_like(arr, dtype=self.astype)
|
|
61
|
+
|
|
62
|
+
# apply encoding, reserving 0 for values not specified in labels
|
|
63
|
+
for i, label in enumerate(self.labels):
|
|
64
|
+
enc[arr == label] = i + 1
|
|
65
|
+
|
|
66
|
+
return enc
|
|
67
|
+
|
|
68
|
+
def decode(self, buf, out=None):
|
|
69
|
+
# normalise input
|
|
70
|
+
enc = ensure_ndarray(buf).view(self.astype)
|
|
71
|
+
|
|
72
|
+
# flatten to simplify implementation
|
|
73
|
+
enc = enc.reshape(-1, order='A')
|
|
74
|
+
|
|
75
|
+
# setup output
|
|
76
|
+
dec = np.full_like(enc, fill_value='', dtype=self.dtype)
|
|
77
|
+
|
|
78
|
+
# apply decoding
|
|
79
|
+
for i, label in enumerate(self.labels):
|
|
80
|
+
dec[enc == (i + 1)] = label
|
|
81
|
+
|
|
82
|
+
# handle output
|
|
83
|
+
return ndarray_copy(dec, out)
|
|
84
|
+
|
|
85
|
+
def get_config(self):
|
|
86
|
+
return {
|
|
87
|
+
'id': self.codec_id,
|
|
88
|
+
'labels': self.labels,
|
|
89
|
+
'dtype': self.dtype.str,
|
|
90
|
+
'astype': self.astype.str,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
def __repr__(self):
|
|
94
|
+
# make sure labels part is not too long
|
|
95
|
+
labels = repr(self.labels[:3])
|
|
96
|
+
if len(self.labels) > 3:
|
|
97
|
+
labels = labels[:-1] + ', ...]'
|
|
98
|
+
return f'{type(self).__name__}(dtype={self.dtype.str!r}, astype={self.astype.str!r}, labels={labels})'
|