numcodecs 0.13.1__cp312-cp312-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numcodecs might be problematic. Click here for more details.
- numcodecs/__init__.py +143 -0
- numcodecs/_shuffle.cpython-312-darwin.so +0 -0
- numcodecs/abc.py +126 -0
- numcodecs/astype.py +76 -0
- numcodecs/base64.py +27 -0
- numcodecs/bitround.py +79 -0
- numcodecs/blosc.cpython-312-darwin.so +0 -0
- numcodecs/bz2.py +45 -0
- numcodecs/categorize.py +101 -0
- numcodecs/checksum32.py +94 -0
- numcodecs/compat.py +208 -0
- numcodecs/compat_ext.cpython-312-darwin.so +0 -0
- numcodecs/delta.py +97 -0
- numcodecs/fixedscaleoffset.py +132 -0
- numcodecs/fletcher32.cpython-312-darwin.so +0 -0
- numcodecs/gzip.py +52 -0
- numcodecs/jenkins.cpython-312-darwin.so +0 -0
- numcodecs/json.py +107 -0
- numcodecs/lz4.cpython-312-darwin.so +0 -0
- numcodecs/lzma.py +69 -0
- numcodecs/msgpacks.py +86 -0
- numcodecs/ndarray_like.py +65 -0
- numcodecs/packbits.py +85 -0
- numcodecs/pcodec.py +89 -0
- numcodecs/pickles.py +55 -0
- numcodecs/quantize.py +100 -0
- numcodecs/registry.py +72 -0
- numcodecs/shuffle.py +61 -0
- numcodecs/tests/__init__.py +3 -0
- numcodecs/tests/common.py +354 -0
- numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
- numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
- numcodecs/tests/test_astype.py +74 -0
- numcodecs/tests/test_base64.py +81 -0
- numcodecs/tests/test_bitround.py +81 -0
- numcodecs/tests/test_blosc.py +277 -0
- numcodecs/tests/test_bz2.py +66 -0
- numcodecs/tests/test_categorize.py +87 -0
- numcodecs/tests/test_checksum32.py +58 -0
- numcodecs/tests/test_compat.py +108 -0
- numcodecs/tests/test_delta.py +60 -0
- numcodecs/tests/test_entrypoints.py +24 -0
- numcodecs/tests/test_entrypoints_backport.py +35 -0
- numcodecs/tests/test_fixedscaleoffset.py +69 -0
- numcodecs/tests/test_fletcher32.py +56 -0
- numcodecs/tests/test_gzip.py +110 -0
- numcodecs/tests/test_jenkins.py +150 -0
- numcodecs/tests/test_json.py +85 -0
- numcodecs/tests/test_lz4.py +83 -0
- numcodecs/tests/test_lzma.py +90 -0
- numcodecs/tests/test_msgpacks.py +123 -0
- numcodecs/tests/test_ndarray_like.py +48 -0
- numcodecs/tests/test_packbits.py +39 -0
- numcodecs/tests/test_pcodec.py +80 -0
- numcodecs/tests/test_pickles.py +61 -0
- numcodecs/tests/test_quantize.py +76 -0
- numcodecs/tests/test_registry.py +40 -0
- numcodecs/tests/test_shuffle.py +168 -0
- numcodecs/tests/test_vlen_array.py +97 -0
- numcodecs/tests/test_vlen_bytes.py +93 -0
- numcodecs/tests/test_vlen_utf8.py +91 -0
- numcodecs/tests/test_zfpy.py +98 -0
- numcodecs/tests/test_zlib.py +94 -0
- numcodecs/tests/test_zstd.py +92 -0
- numcodecs/version.py +16 -0
- numcodecs/vlen.cpython-312-darwin.so +0 -0
- numcodecs/zfpy.py +111 -0
- numcodecs/zlib.py +42 -0
- numcodecs/zstd.cpython-312-darwin.so +0 -0
- numcodecs-0.13.1.dist-info/LICENSE.txt +21 -0
- numcodecs-0.13.1.dist-info/METADATA +64 -0
- numcodecs-0.13.1.dist-info/RECORD +74 -0
- numcodecs-0.13.1.dist-info/WHEEL +5 -0
- numcodecs-0.13.1.dist-info/top_level.txt +1 -0
numcodecs/compat.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# ruff: noqa: F401
|
|
2
|
+
import array
|
|
3
|
+
import codecs
|
|
4
|
+
import functools
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from .ndarray_like import NDArrayLike, is_ndarray_like
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def ensure_ndarray_like(buf) -> NDArrayLike:
|
|
12
|
+
"""Convenience function to coerce `buf` to ndarray-like array.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
buf : ndarray-like, array-like, or bytes-like
|
|
17
|
+
A numpy array like object such as numpy.ndarray, cupy.ndarray, or
|
|
18
|
+
any object exporting a buffer interface.
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
arr : NDArrayLike
|
|
23
|
+
A ndarray-like, sharing memory with `buf`.
|
|
24
|
+
|
|
25
|
+
Notes
|
|
26
|
+
-----
|
|
27
|
+
This function will not create a copy under any circumstances, it is guaranteed to
|
|
28
|
+
return a view on memory exported by `buf`.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
if not is_ndarray_like(buf):
|
|
32
|
+
if isinstance(buf, array.array) and buf.typecode in "cu":
|
|
33
|
+
# Guard condition, do not support array.array with unicode type, this is
|
|
34
|
+
# problematic because numpy does not support it on all platforms. Also do not
|
|
35
|
+
# support char as it was removed in Python 3.
|
|
36
|
+
raise TypeError("array.array with char or unicode type is not supported")
|
|
37
|
+
else:
|
|
38
|
+
# N.B., first take a memoryview to make sure that we subsequently create a
|
|
39
|
+
# numpy array from a memory buffer with no copy
|
|
40
|
+
mem = memoryview(buf)
|
|
41
|
+
# instantiate array from memoryview, ensures no copy
|
|
42
|
+
buf = np.array(mem, copy=False)
|
|
43
|
+
return buf
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def ensure_ndarray(buf) -> np.ndarray:
|
|
47
|
+
"""Convenience function to coerce `buf` to a numpy array, if it is not already a
|
|
48
|
+
numpy array.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
buf : array-like or bytes-like
|
|
53
|
+
A numpy array or any object exporting a buffer interface.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
arr : ndarray
|
|
58
|
+
A numpy array, sharing memory with `buf`.
|
|
59
|
+
|
|
60
|
+
Notes
|
|
61
|
+
-----
|
|
62
|
+
This function will not create a copy under any circumstances, it is guaranteed to
|
|
63
|
+
return a view on memory exported by `buf`.
|
|
64
|
+
"""
|
|
65
|
+
return np.array(ensure_ndarray_like(buf), copy=False)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def ensure_contiguous_ndarray_like(buf, max_buffer_size=None, flatten=True) -> NDArrayLike:
|
|
69
|
+
"""Convenience function to coerce `buf` to ndarray-like array.
|
|
70
|
+
Also ensures that the returned value exports fully contiguous memory,
|
|
71
|
+
and supports the new-style buffer interface. If the optional max_buffer_size is
|
|
72
|
+
provided, raise a ValueError if the number of bytes consumed by the returned
|
|
73
|
+
array exceeds this value.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
buf : ndarray-like, array-like, or bytes-like
|
|
78
|
+
A numpy array like object such as numpy.ndarray, cupy.ndarray, or
|
|
79
|
+
any object exporting a buffer interface.
|
|
80
|
+
max_buffer_size : int
|
|
81
|
+
If specified, the largest allowable value of arr.nbytes, where arr
|
|
82
|
+
is the returned array.
|
|
83
|
+
flatten : bool
|
|
84
|
+
If True, the array are flatten.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
arr : NDArrayLike
|
|
89
|
+
A ndarray-like, sharing memory with `buf`.
|
|
90
|
+
|
|
91
|
+
Notes
|
|
92
|
+
-----
|
|
93
|
+
This function will not create a copy under any circumstances, it is guaranteed to
|
|
94
|
+
return a view on memory exported by `buf`.
|
|
95
|
+
"""
|
|
96
|
+
arr = ensure_ndarray_like(buf)
|
|
97
|
+
|
|
98
|
+
# check for object arrays, these are just memory pointers, actual memory holding
|
|
99
|
+
# item data is scattered elsewhere
|
|
100
|
+
if arr.dtype == object:
|
|
101
|
+
raise TypeError("object arrays are not supported")
|
|
102
|
+
|
|
103
|
+
# check for datetime or timedelta ndarray, the buffer interface doesn't support those
|
|
104
|
+
if arr.dtype.kind in "Mm":
|
|
105
|
+
arr = arr.view(np.int64)
|
|
106
|
+
|
|
107
|
+
# check memory is contiguous, if so flatten
|
|
108
|
+
if arr.flags.c_contiguous or arr.flags.f_contiguous:
|
|
109
|
+
if flatten:
|
|
110
|
+
# can flatten without copy
|
|
111
|
+
arr = arr.reshape(-1, order="A")
|
|
112
|
+
else:
|
|
113
|
+
raise ValueError("an array with contiguous memory is required")
|
|
114
|
+
|
|
115
|
+
if max_buffer_size is not None and arr.nbytes > max_buffer_size:
|
|
116
|
+
msg = f"Codec does not support buffers of > {max_buffer_size} bytes"
|
|
117
|
+
raise ValueError(msg)
|
|
118
|
+
|
|
119
|
+
return arr
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True) -> np.array:
|
|
123
|
+
"""Convenience function to coerce `buf` to a numpy array, if it is not already a
|
|
124
|
+
numpy array. Also ensures that the returned value exports fully contiguous memory,
|
|
125
|
+
and supports the new-style buffer interface. If the optional max_buffer_size is
|
|
126
|
+
provided, raise a ValueError if the number of bytes consumed by the returned
|
|
127
|
+
array exceeds this value.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
buf : array-like or bytes-like
|
|
132
|
+
A numpy array or any object exporting a buffer interface.
|
|
133
|
+
max_buffer_size : int
|
|
134
|
+
If specified, the largest allowable value of arr.nbytes, where arr
|
|
135
|
+
is the returned array.
|
|
136
|
+
flatten : bool
|
|
137
|
+
If True, the array are flatten.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
arr : ndarray
|
|
142
|
+
A numpy array, sharing memory with `buf`.
|
|
143
|
+
|
|
144
|
+
Notes
|
|
145
|
+
-----
|
|
146
|
+
This function will not create a copy under any circumstances, it is guaranteed to
|
|
147
|
+
return a view on memory exported by `buf`.
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
return ensure_ndarray(
|
|
151
|
+
ensure_contiguous_ndarray_like(buf, max_buffer_size=max_buffer_size, flatten=flatten)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def ensure_bytes(buf) -> bytes:
|
|
156
|
+
"""Obtain a bytes object from memory exposed by `buf`."""
|
|
157
|
+
|
|
158
|
+
if not isinstance(buf, bytes):
|
|
159
|
+
arr = ensure_ndarray_like(buf)
|
|
160
|
+
|
|
161
|
+
# check for object arrays, these are just memory pointers,
|
|
162
|
+
# actual memory holding item data is scattered elsewhere
|
|
163
|
+
if arr.dtype == object:
|
|
164
|
+
raise TypeError("object arrays are not supported")
|
|
165
|
+
|
|
166
|
+
# create bytes
|
|
167
|
+
buf = arr.tobytes(order="A")
|
|
168
|
+
|
|
169
|
+
return buf
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def ensure_text(s, encoding="utf-8"):
|
|
173
|
+
if not isinstance(s, str):
|
|
174
|
+
s = ensure_contiguous_ndarray(s)
|
|
175
|
+
s = codecs.decode(s, encoding)
|
|
176
|
+
return s
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def ndarray_copy(src, dst) -> NDArrayLike:
|
|
180
|
+
"""Copy the contents of the array from `src` to `dst`."""
|
|
181
|
+
|
|
182
|
+
if dst is None:
|
|
183
|
+
# no-op
|
|
184
|
+
return src
|
|
185
|
+
|
|
186
|
+
# ensure ndarray like
|
|
187
|
+
src = ensure_ndarray_like(src)
|
|
188
|
+
dst = ensure_ndarray_like(dst)
|
|
189
|
+
|
|
190
|
+
# flatten source array
|
|
191
|
+
src = src.reshape(-1, order="A")
|
|
192
|
+
|
|
193
|
+
# ensure same data type
|
|
194
|
+
if dst.dtype != object:
|
|
195
|
+
src = src.view(dst.dtype)
|
|
196
|
+
|
|
197
|
+
# reshape source to match destination
|
|
198
|
+
if src.shape != dst.shape:
|
|
199
|
+
if dst.flags.f_contiguous:
|
|
200
|
+
order = "F"
|
|
201
|
+
else:
|
|
202
|
+
order = "C"
|
|
203
|
+
src = src.reshape(dst.shape, order=order)
|
|
204
|
+
|
|
205
|
+
# copy via numpy
|
|
206
|
+
np.copyto(dst, src)
|
|
207
|
+
|
|
208
|
+
return dst
|
|
Binary file
|
numcodecs/delta.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .abc import Codec
|
|
4
|
+
from .compat import ensure_ndarray, ndarray_copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Delta(Codec):
|
|
8
|
+
"""Codec to encode data as the difference between adjacent values.
|
|
9
|
+
|
|
10
|
+
Parameters
|
|
11
|
+
----------
|
|
12
|
+
dtype : dtype
|
|
13
|
+
Data type to use for decoded data.
|
|
14
|
+
astype : dtype, optional
|
|
15
|
+
Data type to use for encoded data.
|
|
16
|
+
|
|
17
|
+
Notes
|
|
18
|
+
-----
|
|
19
|
+
If `astype` is an integer data type, please ensure that it is
|
|
20
|
+
sufficiently large to store encoded values. No checks are made and data
|
|
21
|
+
may become corrupted due to integer overflow if `astype` is too small.
|
|
22
|
+
Note also that the encoded data for each chunk includes the absolute
|
|
23
|
+
value of the first element in the chunk, and so the encoded data type in
|
|
24
|
+
general needs to be large enough to store absolute values from the array.
|
|
25
|
+
|
|
26
|
+
Examples
|
|
27
|
+
--------
|
|
28
|
+
>>> import numcodecs
|
|
29
|
+
>>> import numpy as np
|
|
30
|
+
>>> x = np.arange(100, 120, 2, dtype='i2')
|
|
31
|
+
>>> codec = numcodecs.Delta(dtype='i2', astype='i1')
|
|
32
|
+
>>> y = codec.encode(x)
|
|
33
|
+
>>> y
|
|
34
|
+
array([100, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int8)
|
|
35
|
+
>>> z = codec.decode(y)
|
|
36
|
+
>>> z
|
|
37
|
+
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int16)
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
codec_id = 'delta'
|
|
42
|
+
|
|
43
|
+
def __init__(self, dtype, astype=None):
|
|
44
|
+
self.dtype = np.dtype(dtype)
|
|
45
|
+
if astype is None:
|
|
46
|
+
self.astype = self.dtype
|
|
47
|
+
else:
|
|
48
|
+
self.astype = np.dtype(astype)
|
|
49
|
+
if self.dtype == np.dtype(object) or self.astype == np.dtype(object):
|
|
50
|
+
raise ValueError('object arrays are not supported')
|
|
51
|
+
|
|
52
|
+
def encode(self, buf):
|
|
53
|
+
# normalise input
|
|
54
|
+
arr = ensure_ndarray(buf).view(self.dtype)
|
|
55
|
+
|
|
56
|
+
# flatten to simplify implementation
|
|
57
|
+
arr = arr.reshape(-1, order='A')
|
|
58
|
+
|
|
59
|
+
# setup encoded output
|
|
60
|
+
enc = np.empty_like(arr, dtype=self.astype)
|
|
61
|
+
|
|
62
|
+
# set first element
|
|
63
|
+
enc[0] = arr[0]
|
|
64
|
+
|
|
65
|
+
# compute differences
|
|
66
|
+
enc[1:] = np.diff(arr)
|
|
67
|
+
|
|
68
|
+
return enc
|
|
69
|
+
|
|
70
|
+
def decode(self, buf, out=None):
|
|
71
|
+
# normalise input
|
|
72
|
+
enc = ensure_ndarray(buf).view(self.astype)
|
|
73
|
+
|
|
74
|
+
# flatten to simplify implementation
|
|
75
|
+
enc = enc.reshape(-1, order='A')
|
|
76
|
+
|
|
77
|
+
# setup decoded output
|
|
78
|
+
dec = np.empty_like(enc, dtype=self.dtype)
|
|
79
|
+
|
|
80
|
+
# decode differences
|
|
81
|
+
np.cumsum(enc, out=dec)
|
|
82
|
+
|
|
83
|
+
# handle output
|
|
84
|
+
out = ndarray_copy(dec, out)
|
|
85
|
+
|
|
86
|
+
return out
|
|
87
|
+
|
|
88
|
+
def get_config(self):
|
|
89
|
+
# override to handle encoding dtypes
|
|
90
|
+
return dict(id=self.codec_id, dtype=self.dtype.str, astype=self.astype.str)
|
|
91
|
+
|
|
92
|
+
def __repr__(self):
|
|
93
|
+
r = f'{type(self).__name__}(dtype={self.dtype.str!r}'
|
|
94
|
+
if self.astype != self.dtype:
|
|
95
|
+
r += f', astype={self.astype.str!r}'
|
|
96
|
+
r += ')'
|
|
97
|
+
return r
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .abc import Codec
|
|
4
|
+
from .compat import ensure_ndarray, ndarray_copy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class FixedScaleOffset(Codec):
|
|
8
|
+
"""Simplified version of the scale-offset filter available in HDF5.
|
|
9
|
+
Applies the transformation `(x - offset) * scale` to all chunks. Results
|
|
10
|
+
are rounded to the nearest integer but are not packed according to the
|
|
11
|
+
minimum number of bits.
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
offset : float
|
|
16
|
+
Value to subtract from data.
|
|
17
|
+
scale : float
|
|
18
|
+
Value to multiply by data.
|
|
19
|
+
dtype : dtype
|
|
20
|
+
Data type to use for decoded data.
|
|
21
|
+
astype : dtype, optional
|
|
22
|
+
Data type to use for encoded data.
|
|
23
|
+
|
|
24
|
+
Notes
|
|
25
|
+
-----
|
|
26
|
+
If `astype` is an integer data type, please ensure that it is
|
|
27
|
+
sufficiently large to store encoded values. No checks are made and data
|
|
28
|
+
may become corrupted due to integer overflow if `astype` is too small.
|
|
29
|
+
|
|
30
|
+
Examples
|
|
31
|
+
--------
|
|
32
|
+
>>> import numcodecs
|
|
33
|
+
>>> import numpy as np
|
|
34
|
+
>>> x = np.linspace(1000, 1001, 10, dtype='f8')
|
|
35
|
+
>>> x
|
|
36
|
+
array([1000. , 1000.11111111, 1000.22222222, 1000.33333333,
|
|
37
|
+
1000.44444444, 1000.55555556, 1000.66666667, 1000.77777778,
|
|
38
|
+
1000.88888889, 1001. ])
|
|
39
|
+
>>> codec = numcodecs.FixedScaleOffset(offset=1000, scale=10, dtype='f8', astype='u1')
|
|
40
|
+
>>> y1 = codec.encode(x)
|
|
41
|
+
>>> y1
|
|
42
|
+
array([ 0, 1, 2, 3, 4, 6, 7, 8, 9, 10], dtype=uint8)
|
|
43
|
+
>>> z1 = codec.decode(y1)
|
|
44
|
+
>>> z1
|
|
45
|
+
array([1000. , 1000.1, 1000.2, 1000.3, 1000.4, 1000.6, 1000.7,
|
|
46
|
+
1000.8, 1000.9, 1001. ])
|
|
47
|
+
>>> codec = numcodecs.FixedScaleOffset(offset=1000, scale=10**2, dtype='f8', astype='u1')
|
|
48
|
+
>>> y2 = codec.encode(x)
|
|
49
|
+
>>> y2
|
|
50
|
+
array([ 0, 11, 22, 33, 44, 56, 67, 78, 89, 100], dtype=uint8)
|
|
51
|
+
>>> z2 = codec.decode(y2)
|
|
52
|
+
>>> z2
|
|
53
|
+
array([1000. , 1000.11, 1000.22, 1000.33, 1000.44, 1000.56,
|
|
54
|
+
1000.67, 1000.78, 1000.89, 1001. ])
|
|
55
|
+
>>> codec = numcodecs.FixedScaleOffset(offset=1000, scale=10**3, dtype='f8', astype='u2')
|
|
56
|
+
>>> y3 = codec.encode(x)
|
|
57
|
+
>>> y3
|
|
58
|
+
array([ 0, 111, 222, 333, 444, 556, 667, 778, 889, 1000], dtype=uint16)
|
|
59
|
+
>>> z3 = codec.decode(y3)
|
|
60
|
+
>>> z3
|
|
61
|
+
array([1000. , 1000.111, 1000.222, 1000.333, 1000.444, 1000.556,
|
|
62
|
+
1000.667, 1000.778, 1000.889, 1001. ])
|
|
63
|
+
|
|
64
|
+
See Also
|
|
65
|
+
--------
|
|
66
|
+
numcodecs.quantize.Quantize
|
|
67
|
+
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
codec_id = 'fixedscaleoffset'
|
|
71
|
+
|
|
72
|
+
def __init__(self, offset, scale, dtype, astype=None):
|
|
73
|
+
self.offset = offset
|
|
74
|
+
self.scale = scale
|
|
75
|
+
self.dtype = np.dtype(dtype)
|
|
76
|
+
if astype is None:
|
|
77
|
+
self.astype = self.dtype
|
|
78
|
+
else:
|
|
79
|
+
self.astype = np.dtype(astype)
|
|
80
|
+
if self.dtype == np.dtype(object) or self.astype == np.dtype(object):
|
|
81
|
+
raise ValueError('object arrays are not supported')
|
|
82
|
+
|
|
83
|
+
def encode(self, buf):
|
|
84
|
+
# normalise input
|
|
85
|
+
arr = ensure_ndarray(buf).view(self.dtype)
|
|
86
|
+
|
|
87
|
+
# flatten to simplify implementation
|
|
88
|
+
arr = arr.reshape(-1, order='A')
|
|
89
|
+
|
|
90
|
+
# compute scale offset
|
|
91
|
+
enc = (arr - self.offset) * self.scale
|
|
92
|
+
|
|
93
|
+
# round to nearest integer
|
|
94
|
+
enc = np.around(enc)
|
|
95
|
+
|
|
96
|
+
# convert dtype
|
|
97
|
+
enc = enc.astype(self.astype, copy=False)
|
|
98
|
+
|
|
99
|
+
return enc
|
|
100
|
+
|
|
101
|
+
def decode(self, buf, out=None):
|
|
102
|
+
# interpret buffer as numpy array
|
|
103
|
+
enc = ensure_ndarray(buf).view(self.astype)
|
|
104
|
+
|
|
105
|
+
# flatten to simplify implementation
|
|
106
|
+
enc = enc.reshape(-1, order='A')
|
|
107
|
+
|
|
108
|
+
# decode scale offset
|
|
109
|
+
dec = (enc / self.scale) + self.offset
|
|
110
|
+
|
|
111
|
+
# convert dtype
|
|
112
|
+
dec = dec.astype(self.dtype, copy=False)
|
|
113
|
+
|
|
114
|
+
# handle output
|
|
115
|
+
return ndarray_copy(dec, out)
|
|
116
|
+
|
|
117
|
+
def get_config(self):
|
|
118
|
+
# override to handle encoding dtypes
|
|
119
|
+
return dict(
|
|
120
|
+
id=self.codec_id,
|
|
121
|
+
scale=self.scale,
|
|
122
|
+
offset=self.offset,
|
|
123
|
+
dtype=self.dtype.str,
|
|
124
|
+
astype=self.astype.str,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def __repr__(self):
|
|
128
|
+
r = f'{type(self).__name__}(scale={self.scale}, offset={self.offset}, dtype={self.dtype.str!r}'
|
|
129
|
+
if self.astype != self.dtype:
|
|
130
|
+
r += f', astype={self.astype.str!r}'
|
|
131
|
+
r += ')'
|
|
132
|
+
return r
|
|
Binary file
|
numcodecs/gzip.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import gzip as _gzip
|
|
2
|
+
import io
|
|
3
|
+
|
|
4
|
+
from .abc import Codec
|
|
5
|
+
from .compat import ensure_bytes, ensure_contiguous_ndarray
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class GZip(Codec):
|
|
9
|
+
"""Codec providing gzip compression using zlib via the Python standard library.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
level : int
|
|
14
|
+
Compression level.
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
codec_id = 'gzip'
|
|
19
|
+
|
|
20
|
+
def __init__(self, level=1):
|
|
21
|
+
self.level = level
|
|
22
|
+
|
|
23
|
+
def encode(self, buf):
|
|
24
|
+
# normalise inputs
|
|
25
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
26
|
+
|
|
27
|
+
# do compression
|
|
28
|
+
compressed = io.BytesIO()
|
|
29
|
+
with _gzip.GzipFile(fileobj=compressed, mode='wb', compresslevel=self.level) as compressor:
|
|
30
|
+
compressor.write(buf)
|
|
31
|
+
compressed = compressed.getvalue()
|
|
32
|
+
|
|
33
|
+
return compressed
|
|
34
|
+
|
|
35
|
+
# noinspection PyMethodMayBeStatic
|
|
36
|
+
def decode(self, buf, out=None):
|
|
37
|
+
# normalise inputs
|
|
38
|
+
# BytesIO only copies if the data is not of `bytes` type.
|
|
39
|
+
# This allows `bytes` objects to pass through without copying.
|
|
40
|
+
buf = io.BytesIO(ensure_bytes(buf))
|
|
41
|
+
|
|
42
|
+
# do decompression
|
|
43
|
+
with _gzip.GzipFile(fileobj=buf, mode='rb') as decompressor:
|
|
44
|
+
if out is not None:
|
|
45
|
+
out_view = ensure_contiguous_ndarray(out)
|
|
46
|
+
decompressor.readinto(out_view)
|
|
47
|
+
if decompressor.read(1) != b'':
|
|
48
|
+
raise ValueError("Unable to fit data into `out`")
|
|
49
|
+
else:
|
|
50
|
+
out = decompressor.read()
|
|
51
|
+
|
|
52
|
+
return out
|
|
Binary file
|
numcodecs/json.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import json as _json
|
|
2
|
+
import textwrap
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from .abc import Codec
|
|
7
|
+
from .compat import ensure_text
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class JSON(Codec):
|
|
11
|
+
"""Codec to encode data as JSON. Useful for encoding an array of Python objects.
|
|
12
|
+
|
|
13
|
+
.. versionchanged:: 0.6
|
|
14
|
+
The encoding format has been changed to include the array shape in the encoded
|
|
15
|
+
data, which ensures that all object arrays can be correctly encoded and decoded.
|
|
16
|
+
|
|
17
|
+
Examples
|
|
18
|
+
--------
|
|
19
|
+
>>> import numcodecs
|
|
20
|
+
>>> import numpy as np
|
|
21
|
+
>>> x = np.array(['foo', 'bar', 'baz'], dtype='object')
|
|
22
|
+
>>> codec = numcodecs.JSON()
|
|
23
|
+
>>> codec.decode(codec.encode(x))
|
|
24
|
+
array(['foo', 'bar', 'baz'], dtype=object)
|
|
25
|
+
|
|
26
|
+
See Also
|
|
27
|
+
--------
|
|
28
|
+
numcodecs.pickles.Pickle, numcodecs.msgpacks.MsgPack
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
codec_id = 'json2'
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
encoding='utf-8',
|
|
37
|
+
skipkeys=False,
|
|
38
|
+
ensure_ascii=True,
|
|
39
|
+
check_circular=True,
|
|
40
|
+
allow_nan=True,
|
|
41
|
+
sort_keys=True,
|
|
42
|
+
indent=None,
|
|
43
|
+
separators=None,
|
|
44
|
+
strict=True,
|
|
45
|
+
):
|
|
46
|
+
self._text_encoding = encoding
|
|
47
|
+
if separators is None:
|
|
48
|
+
# ensure separators are explicitly specified, and consistent behaviour across
|
|
49
|
+
# Python versions, and most compact representation if indent is None
|
|
50
|
+
if indent is None:
|
|
51
|
+
separators = ',', ':'
|
|
52
|
+
else:
|
|
53
|
+
separators = ', ', ': '
|
|
54
|
+
separators = tuple(separators)
|
|
55
|
+
self._encoder_config = dict(
|
|
56
|
+
skipkeys=skipkeys,
|
|
57
|
+
ensure_ascii=ensure_ascii,
|
|
58
|
+
check_circular=check_circular,
|
|
59
|
+
allow_nan=allow_nan,
|
|
60
|
+
indent=indent,
|
|
61
|
+
separators=separators,
|
|
62
|
+
sort_keys=sort_keys,
|
|
63
|
+
)
|
|
64
|
+
self._encoder = _json.JSONEncoder(**self._encoder_config)
|
|
65
|
+
self._decoder_config = dict(strict=strict)
|
|
66
|
+
self._decoder = _json.JSONDecoder(**self._decoder_config)
|
|
67
|
+
|
|
68
|
+
def encode(self, buf):
|
|
69
|
+
try:
|
|
70
|
+
buf = np.asarray(buf)
|
|
71
|
+
except ValueError:
|
|
72
|
+
buf = np.asarray(buf, dtype=object)
|
|
73
|
+
items = np.atleast_1d(buf).tolist()
|
|
74
|
+
items.append(buf.dtype.str)
|
|
75
|
+
items.append(buf.shape)
|
|
76
|
+
return self._encoder.encode(items).encode(self._text_encoding)
|
|
77
|
+
|
|
78
|
+
def decode(self, buf, out=None):
|
|
79
|
+
items = self._decoder.decode(ensure_text(buf, self._text_encoding))
|
|
80
|
+
dec = np.empty(items[-1], dtype=items[-2])
|
|
81
|
+
if not items[-1]:
|
|
82
|
+
dec[...] = items[0]
|
|
83
|
+
else:
|
|
84
|
+
dec[:] = items[:-2]
|
|
85
|
+
if out is not None:
|
|
86
|
+
np.copyto(out, dec)
|
|
87
|
+
return out
|
|
88
|
+
else:
|
|
89
|
+
return dec
|
|
90
|
+
|
|
91
|
+
def get_config(self):
|
|
92
|
+
config = dict(id=self.codec_id, encoding=self._text_encoding)
|
|
93
|
+
config.update(self._encoder_config)
|
|
94
|
+
config.update(self._decoder_config)
|
|
95
|
+
return config
|
|
96
|
+
|
|
97
|
+
def __repr__(self):
|
|
98
|
+
params = [f'encoding={self._text_encoding!r}']
|
|
99
|
+
for k, v in sorted(self._encoder_config.items()):
|
|
100
|
+
params.append(f'{k}={v!r}')
|
|
101
|
+
for k, v in sorted(self._decoder_config.items()):
|
|
102
|
+
params.append(f'{k}={v!r}')
|
|
103
|
+
classname = type(self).__name__
|
|
104
|
+
params = ', '.join(params)
|
|
105
|
+
return textwrap.fill(
|
|
106
|
+
f'{classname}({params})', width=80, break_long_words=False, subsequent_indent=' '
|
|
107
|
+
)
|
|
Binary file
|
numcodecs/lzma.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
_lzma = None
|
|
2
|
+
try:
|
|
3
|
+
import lzma as _lzma
|
|
4
|
+
except ImportError: # pragma: no cover
|
|
5
|
+
try:
|
|
6
|
+
from backports import lzma as _lzma
|
|
7
|
+
except ImportError:
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
if _lzma:
|
|
12
|
+
from .abc import Codec
|
|
13
|
+
from .compat import ensure_contiguous_ndarray, ndarray_copy
|
|
14
|
+
|
|
15
|
+
# noinspection PyShadowingBuiltins
|
|
16
|
+
class LZMA(Codec):
|
|
17
|
+
"""Codec providing compression using lzma via the Python standard
|
|
18
|
+
library.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
format : integer, optional
|
|
23
|
+
One of the lzma format codes, e.g., ``lzma.FORMAT_XZ``.
|
|
24
|
+
check : integer, optional
|
|
25
|
+
One of the lzma check codes, e.g., ``lzma.CHECK_NONE``.
|
|
26
|
+
preset : integer, optional
|
|
27
|
+
An integer between 0 and 9 inclusive, specifying the compression
|
|
28
|
+
level.
|
|
29
|
+
filters : list, optional
|
|
30
|
+
A list of dictionaries specifying compression filters. If
|
|
31
|
+
filters are provided, 'preset' must be None.
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
codec_id = 'lzma'
|
|
36
|
+
|
|
37
|
+
def __init__(self, format=1, check=-1, preset=None, filters=None):
|
|
38
|
+
self.format = format
|
|
39
|
+
self.check = check
|
|
40
|
+
self.preset = preset
|
|
41
|
+
self.filters = filters
|
|
42
|
+
|
|
43
|
+
def encode(self, buf):
|
|
44
|
+
# normalise inputs
|
|
45
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
46
|
+
|
|
47
|
+
# do compression
|
|
48
|
+
return _lzma.compress(
|
|
49
|
+
buf,
|
|
50
|
+
format=self.format,
|
|
51
|
+
check=self.check,
|
|
52
|
+
preset=self.preset,
|
|
53
|
+
filters=self.filters,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def decode(self, buf, out=None):
|
|
57
|
+
# normalise inputs
|
|
58
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
59
|
+
if out is not None:
|
|
60
|
+
out = ensure_contiguous_ndarray(out)
|
|
61
|
+
|
|
62
|
+
# do decompression
|
|
63
|
+
dec = _lzma.decompress(buf, format=self.format, filters=self.filters)
|
|
64
|
+
|
|
65
|
+
# handle destination
|
|
66
|
+
return ndarray_copy(dec, out)
|
|
67
|
+
|
|
68
|
+
def __repr__(self):
|
|
69
|
+
return f'{type(self).__name__}(format={self.format!r}, check={self.check!r}, preset={self.preset!r}, filters={self.filters!r})'
|