numcodecs 0.16.0__cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numcodecs might be problematic. Click here for more details.

Files changed (79) hide show
  1. numcodecs/__init__.py +146 -0
  2. numcodecs/_shuffle.cpython-311-aarch64-linux-gnu.so +0 -0
  3. numcodecs/abc.py +127 -0
  4. numcodecs/astype.py +72 -0
  5. numcodecs/base64.py +26 -0
  6. numcodecs/bitround.py +80 -0
  7. numcodecs/blosc.cpython-311-aarch64-linux-gnu.so +0 -0
  8. numcodecs/bz2.py +45 -0
  9. numcodecs/categorize.py +98 -0
  10. numcodecs/checksum32.py +183 -0
  11. numcodecs/compat.py +206 -0
  12. numcodecs/compat_ext.cpython-311-aarch64-linux-gnu.so +0 -0
  13. numcodecs/delta.py +94 -0
  14. numcodecs/errors.py +26 -0
  15. numcodecs/fixedscaleoffset.py +130 -0
  16. numcodecs/fletcher32.cpython-311-aarch64-linux-gnu.so +0 -0
  17. numcodecs/gzip.py +50 -0
  18. numcodecs/jenkins.cpython-311-aarch64-linux-gnu.so +0 -0
  19. numcodecs/json.py +107 -0
  20. numcodecs/lz4.cpython-311-aarch64-linux-gnu.so +0 -0
  21. numcodecs/lzma.py +72 -0
  22. numcodecs/msgpacks.py +86 -0
  23. numcodecs/ndarray_like.py +65 -0
  24. numcodecs/packbits.py +82 -0
  25. numcodecs/pcodec.py +118 -0
  26. numcodecs/pickles.py +55 -0
  27. numcodecs/quantize.py +98 -0
  28. numcodecs/registry.py +74 -0
  29. numcodecs/shuffle.py +61 -0
  30. numcodecs/tests/__init__.py +3 -0
  31. numcodecs/tests/common.py +285 -0
  32. numcodecs/tests/package_with_entrypoint/__init__.py +11 -0
  33. numcodecs/tests/package_with_entrypoint-0.1.dist-info/entry_points.txt +2 -0
  34. numcodecs/tests/test_astype.py +74 -0
  35. numcodecs/tests/test_base64.py +81 -0
  36. numcodecs/tests/test_bitround.py +81 -0
  37. numcodecs/tests/test_blosc.py +284 -0
  38. numcodecs/tests/test_bz2.py +66 -0
  39. numcodecs/tests/test_categorize.py +87 -0
  40. numcodecs/tests/test_checksum32.py +154 -0
  41. numcodecs/tests/test_compat.py +111 -0
  42. numcodecs/tests/test_delta.py +61 -0
  43. numcodecs/tests/test_entrypoints.py +24 -0
  44. numcodecs/tests/test_entrypoints_backport.py +36 -0
  45. numcodecs/tests/test_fixedscaleoffset.py +77 -0
  46. numcodecs/tests/test_fletcher32.py +56 -0
  47. numcodecs/tests/test_gzip.py +110 -0
  48. numcodecs/tests/test_jenkins.py +150 -0
  49. numcodecs/tests/test_json.py +85 -0
  50. numcodecs/tests/test_lz4.py +83 -0
  51. numcodecs/tests/test_lzma.py +94 -0
  52. numcodecs/tests/test_msgpacks.py +126 -0
  53. numcodecs/tests/test_ndarray_like.py +48 -0
  54. numcodecs/tests/test_packbits.py +39 -0
  55. numcodecs/tests/test_pcodec.py +90 -0
  56. numcodecs/tests/test_pickles.py +61 -0
  57. numcodecs/tests/test_quantize.py +76 -0
  58. numcodecs/tests/test_registry.py +43 -0
  59. numcodecs/tests/test_shuffle.py +166 -0
  60. numcodecs/tests/test_vlen_array.py +97 -0
  61. numcodecs/tests/test_vlen_bytes.py +97 -0
  62. numcodecs/tests/test_vlen_utf8.py +91 -0
  63. numcodecs/tests/test_zarr3.py +279 -0
  64. numcodecs/tests/test_zarr3_import.py +13 -0
  65. numcodecs/tests/test_zfpy.py +104 -0
  66. numcodecs/tests/test_zlib.py +94 -0
  67. numcodecs/tests/test_zstd.py +92 -0
  68. numcodecs/version.py +21 -0
  69. numcodecs/vlen.cpython-311-aarch64-linux-gnu.so +0 -0
  70. numcodecs/zarr3.py +401 -0
  71. numcodecs/zfpy.py +113 -0
  72. numcodecs/zlib.py +42 -0
  73. numcodecs/zstd.cpython-311-aarch64-linux-gnu.so +0 -0
  74. numcodecs-0.16.0.dist-info/METADATA +66 -0
  75. numcodecs-0.16.0.dist-info/RECORD +79 -0
  76. numcodecs-0.16.0.dist-info/WHEEL +6 -0
  77. numcodecs-0.16.0.dist-info/entry_points.txt +22 -0
  78. numcodecs-0.16.0.dist-info/licenses/LICENSE.txt +21 -0
  79. numcodecs-0.16.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,183 @@
1
+ import abc
2
+ import struct
3
+ import zlib
4
+ from contextlib import suppress
5
+ from types import ModuleType
6
+ from typing import Literal, Optional
7
+
8
+ import numpy as np
9
+ from typing_extensions import Buffer
10
+
11
+ from .abc import Codec
12
+ from .compat import ensure_contiguous_ndarray, ndarray_copy
13
+ from .jenkins import jenkins_lookup3
14
+
15
+ _crc32c: Optional[ModuleType] = None
16
+ with suppress(ImportError):
17
+ import crc32c as _crc32c # type: ignore[no-redef, unused-ignore]
18
+
19
+ CHECKSUM_LOCATION = Literal['start', 'end']
20
+
21
+
22
+ class Checksum32(Codec, abc.ABC):
23
+ # override in sub-class
24
+ location: CHECKSUM_LOCATION = 'start'
25
+
26
+ def __init__(self, location: CHECKSUM_LOCATION | None = None):
27
+ if location is not None:
28
+ self.location = location
29
+ if self.location not in ['start', 'end']:
30
+ raise ValueError(f"Invalid checksum location: {self.location}")
31
+
32
+ def encode(self, buf):
33
+ arr = ensure_contiguous_ndarray(buf).view('u1')
34
+ checksum = self.checksum(arr) & 0xFFFFFFFF
35
+ enc = np.empty(arr.nbytes + 4, dtype='u1')
36
+ if self.location == 'start':
37
+ checksum_view = enc[:4]
38
+ payload_view = enc[4:]
39
+ else:
40
+ checksum_view = enc[-4:]
41
+ payload_view = enc[:-4]
42
+ checksum_view.view('<u4')[0] = checksum
43
+ ndarray_copy(arr, payload_view)
44
+ return enc
45
+
46
+ def decode(self, buf, out=None):
47
+ if len(buf) < 4:
48
+ raise ValueError("Input buffer is too short to contain a 32-bit checksum.")
49
+ if out is not None:
50
+ ensure_contiguous_ndarray(out) # check that out is a valid ndarray
51
+
52
+ arr = ensure_contiguous_ndarray(buf).view('u1')
53
+ if self.location == 'start':
54
+ checksum_view = arr[:4]
55
+ payload_view = arr[4:]
56
+ else:
57
+ checksum_view = arr[-4:]
58
+ payload_view = arr[:-4]
59
+ expect = checksum_view.view('<u4')[0]
60
+ checksum = self.checksum(payload_view) & 0xFFFFFFFF
61
+ if expect != checksum:
62
+ raise RuntimeError(
63
+ f"Stored and computed {self.codec_id} checksum do not match. Stored: {expect}. Computed: {checksum}."
64
+ )
65
+ return ndarray_copy(payload_view, out)
66
+
67
+ @staticmethod
68
+ @abc.abstractmethod
69
+ def checksum(data: Buffer, value: int) -> int: ...
70
+
71
+
72
+ class CRC32(Checksum32):
73
+ """Codec add a crc32 checksum to the buffer.
74
+
75
+ Parameters
76
+ ----------
77
+ location : 'start' or 'end'
78
+ Where to place the checksum in the buffer.
79
+ """
80
+
81
+ codec_id = 'crc32'
82
+ location = 'start'
83
+
84
+ @staticmethod
85
+ def checksum(data: Buffer, value: int = 0) -> int:
86
+ """
87
+ Thin wrapper around ``zlib.crc32``.
88
+ """
89
+ return zlib.crc32(data, value)
90
+
91
+
92
+ class Adler32(Checksum32):
93
+ """Codec add a adler32 checksum to the buffer.
94
+
95
+ Parameters
96
+ ----------
97
+ location : 'start' or 'end'
98
+ Where to place the checksum in the buffer.
99
+ """
100
+
101
+ codec_id = 'adler32'
102
+ location = 'start'
103
+
104
+ @staticmethod
105
+ def checksum(data: Buffer, value: int = 1) -> int:
106
+ """
107
+ Thin wrapper around ``zlib.adler32``.
108
+ """
109
+ return zlib.adler32(data, value)
110
+
111
+
112
+ class JenkinsLookup3(Checksum32):
113
+ """Bob Jenkin's lookup3 checksum with 32-bit output
114
+
115
+ This is the HDF5 implementation.
116
+ https://github.com/HDFGroup/hdf5/blob/577c192518598c7e2945683655feffcdbdf5a91b/src/H5checksum.c#L378-L472
117
+
118
+ With this codec, the checksum is concatenated on the end of the data
119
+ bytes when encoded. At decode time, the checksum is performed on
120
+ the data portion and compared with the four-byte checksum, raising
121
+ RuntimeError if inconsistent.
122
+
123
+ Parameters
124
+ ----------
125
+ initval : int
126
+ initial seed passed to the hash algorithm, default: 0
127
+ prefix : int
128
+ bytes prepended to the buffer before evaluating the hash, default: None
129
+ """
130
+
131
+ checksum = jenkins_lookup3
132
+ codec_id = "jenkins_lookup3"
133
+
134
+ def __init__(self, initval: int = 0, prefix=None):
135
+ self.initval = initval
136
+ if prefix is None:
137
+ self.prefix = None
138
+ else:
139
+ self.prefix = np.frombuffer(prefix, dtype='uint8')
140
+
141
+ def encode(self, buf):
142
+ """Return buffer plus 4-byte Bob Jenkin's lookup3 checksum"""
143
+ buf = ensure_contiguous_ndarray(buf).ravel().view('uint8')
144
+ if self.prefix is None:
145
+ val = jenkins_lookup3(buf, self.initval)
146
+ else:
147
+ val = jenkins_lookup3(np.hstack((self.prefix, buf)), self.initval)
148
+ return buf.tobytes() + struct.pack("<I", val)
149
+
150
+ def decode(self, buf, out=None):
151
+ """Check Bob Jenkin's lookup3 checksum, and return buffer without it"""
152
+ b = ensure_contiguous_ndarray(buf).view('uint8')
153
+ if self.prefix is None:
154
+ val = jenkins_lookup3(b[:-4], self.initval)
155
+ else:
156
+ val = jenkins_lookup3(np.hstack((self.prefix, b[:-4])), self.initval)
157
+ found = b[-4:].view("<u4")[0]
158
+ if val != found:
159
+ raise RuntimeError(
160
+ f"The Bob Jenkin's lookup3 checksum of the data ({val}) did not"
161
+ f" match the expected checksum ({found}).\n"
162
+ "This could be a sign that the data has been corrupted."
163
+ )
164
+ if out is not None:
165
+ out.view("uint8")[:] = b[:-4]
166
+ return out
167
+ return memoryview(b[:-4])
168
+
169
+
170
+ if _crc32c:
171
+
172
+ class CRC32C(Checksum32):
173
+ """Codec add a crc32c checksum to the buffer.
174
+
175
+ Parameters
176
+ ----------
177
+ location : 'start' or 'end'
178
+ Where to place the checksum in the buffer.
179
+ """
180
+
181
+ codec_id = 'crc32c'
182
+ checksum = _crc32c.crc32c # type: ignore[union-attr]
183
+ location = 'end'
numcodecs/compat.py ADDED
@@ -0,0 +1,206 @@
1
+ import array
2
+ import codecs
3
+
4
+ import numpy as np
5
+
6
+ from .ndarray_like import NDArrayLike, is_ndarray_like
7
+
8
+
9
+ def ensure_ndarray_like(buf) -> NDArrayLike:
10
+ """Convenience function to coerce `buf` to ndarray-like array.
11
+
12
+ Parameters
13
+ ----------
14
+ buf : ndarray-like, array-like, or bytes-like
15
+ A numpy array like object such as numpy.ndarray, cupy.ndarray, or
16
+ any object exporting a buffer interface.
17
+
18
+ Returns
19
+ -------
20
+ arr : NDArrayLike
21
+ A ndarray-like, sharing memory with `buf`.
22
+
23
+ Notes
24
+ -----
25
+ This function will not create a copy under any circumstances, it is guaranteed to
26
+ return a view on memory exported by `buf`.
27
+ """
28
+
29
+ if not is_ndarray_like(buf):
30
+ if isinstance(buf, array.array) and buf.typecode in "cu":
31
+ # Guard condition, do not support array.array with unicode type, this is
32
+ # problematic because numpy does not support it on all platforms. Also do not
33
+ # support char as it was removed in Python 3.
34
+ raise TypeError("array.array with char or unicode type is not supported")
35
+ else:
36
+ # N.B., first take a memoryview to make sure that we subsequently create a
37
+ # numpy array from a memory buffer with no copy
38
+ mem = memoryview(buf)
39
+ # instantiate array from memoryview, ensures no copy
40
+ buf = np.array(mem, copy=False)
41
+ return buf
42
+
43
+
44
+ def ensure_ndarray(buf) -> np.ndarray:
45
+ """Convenience function to coerce `buf` to a numpy array, if it is not already a
46
+ numpy array.
47
+
48
+ Parameters
49
+ ----------
50
+ buf : array-like or bytes-like
51
+ A numpy array or any object exporting a buffer interface.
52
+
53
+ Returns
54
+ -------
55
+ arr : ndarray
56
+ A numpy array, sharing memory with `buf`.
57
+
58
+ Notes
59
+ -----
60
+ This function will not create a copy under any circumstances, it is guaranteed to
61
+ return a view on memory exported by `buf`.
62
+ """
63
+ return np.array(ensure_ndarray_like(buf), copy=False)
64
+
65
+
66
+ def ensure_contiguous_ndarray_like(buf, max_buffer_size=None, flatten=True) -> NDArrayLike:
67
+ """Convenience function to coerce `buf` to ndarray-like array.
68
+ Also ensures that the returned value exports fully contiguous memory,
69
+ and supports the new-style buffer interface. If the optional max_buffer_size is
70
+ provided, raise a ValueError if the number of bytes consumed by the returned
71
+ array exceeds this value.
72
+
73
+ Parameters
74
+ ----------
75
+ buf : ndarray-like, array-like, or bytes-like
76
+ A numpy array like object such as numpy.ndarray, cupy.ndarray, or
77
+ any object exporting a buffer interface.
78
+ max_buffer_size : int
79
+ If specified, the largest allowable value of arr.nbytes, where arr
80
+ is the returned array.
81
+ flatten : bool
82
+ If True, the array are flatten.
83
+
84
+ Returns
85
+ -------
86
+ arr : NDArrayLike
87
+ A ndarray-like, sharing memory with `buf`.
88
+
89
+ Notes
90
+ -----
91
+ This function will not create a copy under any circumstances, it is guaranteed to
92
+ return a view on memory exported by `buf`.
93
+ """
94
+ arr = ensure_ndarray_like(buf)
95
+
96
+ # check for object arrays, these are just memory pointers, actual memory holding
97
+ # item data is scattered elsewhere
98
+ if arr.dtype == object:
99
+ raise TypeError("object arrays are not supported")
100
+
101
+ # check for datetime or timedelta ndarray, the buffer interface doesn't support those
102
+ if arr.dtype.kind in "Mm":
103
+ arr = arr.view(np.int64) # type: ignore[arg-type]
104
+
105
+ # check memory is contiguous, if so flatten
106
+ if arr.flags.c_contiguous or arr.flags.f_contiguous:
107
+ if flatten:
108
+ # can flatten without copy
109
+ arr = arr.reshape(-1, order="A")
110
+ else:
111
+ raise ValueError("an array with contiguous memory is required")
112
+
113
+ if max_buffer_size is not None and arr.nbytes > max_buffer_size:
114
+ msg = f"Codec does not support buffers of > {max_buffer_size} bytes"
115
+ raise ValueError(msg)
116
+
117
+ return arr
118
+
119
+
120
+ def ensure_contiguous_ndarray(buf, max_buffer_size=None, flatten=True) -> np.ndarray:
121
+ """Convenience function to coerce `buf` to a numpy array, if it is not already a
122
+ numpy array. Also ensures that the returned value exports fully contiguous memory,
123
+ and supports the new-style buffer interface. If the optional max_buffer_size is
124
+ provided, raise a ValueError if the number of bytes consumed by the returned
125
+ array exceeds this value.
126
+
127
+ Parameters
128
+ ----------
129
+ buf : array-like or bytes-like
130
+ A numpy array or any object exporting a buffer interface.
131
+ max_buffer_size : int
132
+ If specified, the largest allowable value of arr.nbytes, where arr
133
+ is the returned array.
134
+ flatten : bool
135
+ If True, the array are flatten.
136
+
137
+ Returns
138
+ -------
139
+ arr : ndarray
140
+ A numpy array, sharing memory with `buf`.
141
+
142
+ Notes
143
+ -----
144
+ This function will not create a copy under any circumstances, it is guaranteed to
145
+ return a view on memory exported by `buf`.
146
+ """
147
+
148
+ return ensure_ndarray(
149
+ ensure_contiguous_ndarray_like(buf, max_buffer_size=max_buffer_size, flatten=flatten)
150
+ )
151
+
152
+
153
+ def ensure_bytes(buf) -> bytes:
154
+ """Obtain a bytes object from memory exposed by `buf`."""
155
+
156
+ if not isinstance(buf, bytes):
157
+ arr = ensure_ndarray_like(buf)
158
+
159
+ # check for object arrays, these are just memory pointers,
160
+ # actual memory holding item data is scattered elsewhere
161
+ if arr.dtype == object:
162
+ raise TypeError("object arrays are not supported")
163
+
164
+ # create bytes
165
+ buf = arr.tobytes(order="A")
166
+
167
+ return buf
168
+
169
+
170
+ def ensure_text(s, encoding="utf-8"):
171
+ if not isinstance(s, str):
172
+ s = ensure_contiguous_ndarray(s)
173
+ s = codecs.decode(s, encoding)
174
+ return s
175
+
176
+
177
+ def ndarray_copy(src, dst) -> NDArrayLike:
178
+ """Copy the contents of the array from `src` to `dst`."""
179
+
180
+ if dst is None:
181
+ # no-op
182
+ return src
183
+
184
+ # ensure ndarray like
185
+ src = ensure_ndarray_like(src)
186
+ dst = ensure_ndarray_like(dst)
187
+
188
+ # flatten source array
189
+ src = src.reshape(-1, order="A")
190
+
191
+ # ensure same data type
192
+ if dst.dtype != object:
193
+ src = src.view(dst.dtype)
194
+
195
+ # reshape source to match destination
196
+ if src.shape != dst.shape:
197
+ if dst.flags.f_contiguous:
198
+ order = "F"
199
+ else:
200
+ order = "C"
201
+ src = src.reshape(dst.shape, order=order)
202
+
203
+ # copy via numpy
204
+ np.copyto(dst, src)
205
+
206
+ return dst
numcodecs/delta.py ADDED
@@ -0,0 +1,94 @@
1
+ import numpy as np
2
+
3
+ from .abc import Codec
4
+ from .compat import ensure_ndarray, ndarray_copy
5
+
6
+
7
+ class Delta(Codec):
8
+ """Codec to encode data as the difference between adjacent values.
9
+
10
+ Parameters
11
+ ----------
12
+ dtype : dtype
13
+ Data type to use for decoded data.
14
+ astype : dtype, optional
15
+ Data type to use for encoded data.
16
+
17
+ Notes
18
+ -----
19
+ If `astype` is an integer data type, please ensure that it is
20
+ sufficiently large to store encoded values. No checks are made and data
21
+ may become corrupted due to integer overflow if `astype` is too small.
22
+ Note also that the encoded data for each chunk includes the absolute
23
+ value of the first element in the chunk, and so the encoded data type in
24
+ general needs to be large enough to store absolute values from the array.
25
+
26
+ Examples
27
+ --------
28
+ >>> import numcodecs
29
+ >>> import numpy as np
30
+ >>> x = np.arange(100, 120, 2, dtype='i2')
31
+ >>> codec = numcodecs.Delta(dtype='i2', astype='i1')
32
+ >>> y = codec.encode(x)
33
+ >>> y
34
+ array([100, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int8)
35
+ >>> z = codec.decode(y)
36
+ >>> z
37
+ array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int16)
38
+
39
+ """
40
+
41
+ codec_id = 'delta'
42
+
43
+ def __init__(self, dtype, astype=None):
44
+ self.dtype = np.dtype(dtype)
45
+ if astype is None:
46
+ self.astype = self.dtype
47
+ else:
48
+ self.astype = np.dtype(astype)
49
+ if self.dtype == np.dtype(object) or self.astype == np.dtype(object):
50
+ raise ValueError('object arrays are not supported')
51
+
52
+ def encode(self, buf):
53
+ # normalise input
54
+ arr = ensure_ndarray(buf).view(self.dtype)
55
+
56
+ # flatten to simplify implementation
57
+ arr = arr.reshape(-1, order='A')
58
+
59
+ # setup encoded output
60
+ enc = np.empty_like(arr, dtype=self.astype)
61
+
62
+ # set first element
63
+ enc[0] = arr[0]
64
+
65
+ # compute differences
66
+ enc[1:] = np.diff(arr)
67
+ return enc
68
+
69
+ def decode(self, buf, out=None):
70
+ # normalise input
71
+ enc = ensure_ndarray(buf).view(self.astype)
72
+
73
+ # flatten to simplify implementation
74
+ enc = enc.reshape(-1, order='A')
75
+
76
+ # setup decoded output
77
+ dec = np.empty_like(enc, dtype=self.dtype)
78
+
79
+ # decode differences
80
+ np.cumsum(enc, out=dec)
81
+
82
+ # handle output
83
+ return ndarray_copy(dec, out)
84
+
85
+ def get_config(self):
86
+ # override to handle encoding dtypes
87
+ return {'id': self.codec_id, 'dtype': self.dtype.str, 'astype': self.astype.str}
88
+
89
+ def __repr__(self):
90
+ r = f'{type(self).__name__}(dtype={self.dtype.str!r}'
91
+ if self.astype != self.dtype:
92
+ r += f', astype={self.astype.str!r}'
93
+ r += ')'
94
+ return r
numcodecs/errors.py ADDED
@@ -0,0 +1,26 @@
1
+ """
2
+ This module defines custom exceptions that are raised in the `numcodecs` codebase.
3
+ """
4
+
5
+
6
+ class UnknownCodecError(ValueError):
7
+ """
8
+ An exception that is raised when trying to receive a codec that has not been registered.
9
+
10
+ Parameters
11
+ ----------
12
+ codec_id : str
13
+ Codec identifier.
14
+
15
+ Examples
16
+ ----------
17
+ >>> import numcodecs
18
+ >>> numcodecs.get_codec({"codec_id": "unknown"})
19
+ Traceback (most recent call last):
20
+ ...
21
+ UnknownCodecError: codec not available: 'unknown'
22
+ """
23
+
24
+ def __init__(self, codec_id: str):
25
+ self.codec_id = codec_id
26
+ super().__init__(f"codec not available: '{codec_id}'")
@@ -0,0 +1,130 @@
1
+ import numpy as np
2
+
3
+ from .abc import Codec
4
+ from .compat import ensure_ndarray, ndarray_copy
5
+
6
+
7
+ class FixedScaleOffset(Codec):
8
+ """Simplified version of the scale-offset filter available in HDF5.
9
+ Applies the transformation `(x - offset) * scale` to all chunks. Results
10
+ are rounded to the nearest integer but are not packed according to the
11
+ minimum number of bits.
12
+
13
+ Parameters
14
+ ----------
15
+ offset : float
16
+ Value to subtract from data.
17
+ scale : float
18
+ Value to multiply by data.
19
+ dtype : dtype
20
+ Data type to use for decoded data.
21
+ astype : dtype, optional
22
+ Data type to use for encoded data.
23
+
24
+ Notes
25
+ -----
26
+ If `astype` is an integer data type, please ensure that it is
27
+ sufficiently large to store encoded values. No checks are made and data
28
+ may become corrupted due to integer overflow if `astype` is too small.
29
+
30
+ Examples
31
+ --------
32
+ >>> import numcodecs
33
+ >>> import numpy as np
34
+ >>> x = np.linspace(1000, 1001, 10, dtype='f8')
35
+ >>> x
36
+ array([1000. , 1000.11111111, 1000.22222222, 1000.33333333,
37
+ 1000.44444444, 1000.55555556, 1000.66666667, 1000.77777778,
38
+ 1000.88888889, 1001. ])
39
+ >>> codec = numcodecs.FixedScaleOffset(offset=1000, scale=10, dtype='f8', astype='u1')
40
+ >>> y1 = codec.encode(x)
41
+ >>> y1
42
+ array([ 0, 1, 2, 3, 4, 6, 7, 8, 9, 10], dtype=uint8)
43
+ >>> z1 = codec.decode(y1)
44
+ >>> z1
45
+ array([1000. , 1000.1, 1000.2, 1000.3, 1000.4, 1000.6, 1000.7,
46
+ 1000.8, 1000.9, 1001. ])
47
+ >>> codec = numcodecs.FixedScaleOffset(offset=1000, scale=10**2, dtype='f8', astype='u1')
48
+ >>> y2 = codec.encode(x)
49
+ >>> y2
50
+ array([ 0, 11, 22, 33, 44, 56, 67, 78, 89, 100], dtype=uint8)
51
+ >>> z2 = codec.decode(y2)
52
+ >>> z2
53
+ array([1000. , 1000.11, 1000.22, 1000.33, 1000.44, 1000.56,
54
+ 1000.67, 1000.78, 1000.89, 1001. ])
55
+ >>> codec = numcodecs.FixedScaleOffset(offset=1000, scale=10**3, dtype='f8', astype='u2')
56
+ >>> y3 = codec.encode(x)
57
+ >>> y3
58
+ array([ 0, 111, 222, 333, 444, 556, 667, 778, 889, 1000], dtype=uint16)
59
+ >>> z3 = codec.decode(y3)
60
+ >>> z3
61
+ array([1000. , 1000.111, 1000.222, 1000.333, 1000.444, 1000.556,
62
+ 1000.667, 1000.778, 1000.889, 1001. ])
63
+
64
+ See Also
65
+ --------
66
+ numcodecs.quantize.Quantize
67
+
68
+ """
69
+
70
+ codec_id = 'fixedscaleoffset'
71
+
72
+ def __init__(self, offset, scale, dtype, astype=None):
73
+ self.offset = offset
74
+ self.scale = scale
75
+ self.dtype = np.dtype(dtype)
76
+ if astype is None:
77
+ self.astype = self.dtype
78
+ else:
79
+ self.astype = np.dtype(astype)
80
+ if self.dtype == np.dtype(object) or self.astype == np.dtype(object):
81
+ raise ValueError('object arrays are not supported')
82
+
83
+ def encode(self, buf):
84
+ # normalise input
85
+ arr = ensure_ndarray(buf).view(self.dtype)
86
+
87
+ # flatten to simplify implementation
88
+ arr = arr.reshape(-1, order='A')
89
+
90
+ # compute scale offset
91
+ enc = (arr - self.offset) * self.scale
92
+
93
+ # round to nearest integer
94
+ enc = np.around(enc)
95
+
96
+ # convert dtype
97
+ return enc.astype(self.astype, copy=False)
98
+
99
+ def decode(self, buf, out=None):
100
+ # interpret buffer as numpy array
101
+ enc = ensure_ndarray(buf).view(self.astype)
102
+
103
+ # flatten to simplify implementation
104
+ enc = enc.reshape(-1, order='A')
105
+
106
+ # decode scale offset
107
+ dec = (enc / self.scale) + self.offset
108
+
109
+ # convert dtype
110
+ dec = dec.astype(self.dtype, copy=False)
111
+
112
+ # handle output
113
+ return ndarray_copy(dec, out)
114
+
115
+ def get_config(self):
116
+ # override to handle encoding dtypes
117
+ return {
118
+ 'id': self.codec_id,
119
+ 'scale': self.scale,
120
+ 'offset': self.offset,
121
+ 'dtype': self.dtype.str,
122
+ 'astype': self.astype.str,
123
+ }
124
+
125
+ def __repr__(self):
126
+ r = f'{type(self).__name__}(scale={self.scale}, offset={self.offset}, dtype={self.dtype.str!r}'
127
+ if self.astype != self.dtype:
128
+ r += f', astype={self.astype.str!r}'
129
+ r += ')'
130
+ return r
numcodecs/gzip.py ADDED
@@ -0,0 +1,50 @@
1
+ import gzip as _gzip
2
+ import io
3
+
4
+ from .abc import Codec
5
+ from .compat import ensure_bytes, ensure_contiguous_ndarray
6
+
7
+
8
+ class GZip(Codec):
9
+ """Codec providing gzip compression using zlib via the Python standard library.
10
+
11
+ Parameters
12
+ ----------
13
+ level : int
14
+ Compression level.
15
+
16
+ """
17
+
18
+ codec_id = 'gzip'
19
+
20
+ def __init__(self, level=1):
21
+ self.level = level
22
+
23
+ def encode(self, buf):
24
+ # normalise inputs
25
+ buf = ensure_contiguous_ndarray(buf)
26
+
27
+ # do compression
28
+ compressed = io.BytesIO()
29
+ with _gzip.GzipFile(fileobj=compressed, mode='wb', compresslevel=self.level) as compressor:
30
+ compressor.write(buf)
31
+ return compressed.getvalue()
32
+
33
+ # noinspection PyMethodMayBeStatic
34
+ def decode(self, buf, out=None):
35
+ # normalise inputs
36
+ # BytesIO only copies if the data is not of `bytes` type.
37
+ # This allows `bytes` objects to pass through without copying.
38
+ buf = io.BytesIO(ensure_bytes(buf))
39
+
40
+ # do decompression
41
+ with _gzip.GzipFile(fileobj=buf, mode='rb') as decompressor:
42
+ if out is not None:
43
+ out_view = ensure_contiguous_ndarray(out)
44
+ decompressor.readinto(out_view)
45
+ if decompressor.read(1) != b'':
46
+ raise ValueError("Unable to fit data into `out`")
47
+ else:
48
+ out = decompressor.read()
49
+
50
+ return out