numcodecs 0.14.0__cp313-cp313-win_amd64.whl → 0.15.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of numcodecs might be problematic. Click here for more details.
- numcodecs/__init__.py +44 -43
- numcodecs/_shuffle.cp313-win_amd64.pyd +0 -0
- numcodecs/abc.py +1 -1
- numcodecs/astype.py +2 -6
- numcodecs/base64.py +1 -2
- numcodecs/blosc.cp313-win_amd64.pyd +0 -0
- numcodecs/categorize.py +7 -10
- numcodecs/checksum32.py +24 -24
- numcodecs/compat_ext.cp313-win_amd64.pyd +0 -0
- numcodecs/delta.py +3 -10
- numcodecs/fixedscaleoffset.py +8 -10
- numcodecs/fletcher32.cp313-win_amd64.pyd +0 -0
- numcodecs/gzip.py +1 -3
- numcodecs/jenkins.cp313-win_amd64.pyd +0 -0
- numcodecs/json.py +11 -11
- numcodecs/lz4.cp313-win_amd64.pyd +0 -0
- numcodecs/lzma.py +1 -1
- numcodecs/msgpacks.py +6 -6
- numcodecs/ndarray_like.py +2 -2
- numcodecs/pcodec.py +61 -32
- numcodecs/pickles.py +1 -1
- numcodecs/quantize.py +7 -9
- numcodecs/registry.py +1 -1
- numcodecs/tests/common.py +3 -4
- numcodecs/tests/test_blosc.py +9 -11
- numcodecs/tests/test_checksum32.py +25 -9
- numcodecs/tests/test_lzma.py +1 -1
- numcodecs/tests/test_pcodec.py +18 -8
- numcodecs/tests/test_registry.py +2 -2
- numcodecs/tests/test_shuffle.py +2 -4
- numcodecs/tests/test_vlen_bytes.py +3 -0
- numcodecs/tests/test_zarr3.py +70 -44
- numcodecs/version.py +2 -2
- numcodecs/vlen.cp313-win_amd64.pyd +0 -0
- numcodecs/zarr3.py +44 -22
- numcodecs/zfpy.py +1 -1
- numcodecs/zstd.cp313-win_amd64.pyd +0 -0
- {numcodecs-0.14.0.dist-info → numcodecs-0.15.0.dist-info}/METADATA +20 -21
- numcodecs-0.15.0.dist-info/RECORD +78 -0
- {numcodecs-0.14.0.dist-info → numcodecs-0.15.0.dist-info}/WHEEL +1 -1
- numcodecs-0.14.0.dist-info/RECORD +0 -78
- {numcodecs-0.14.0.dist-info → numcodecs-0.15.0.dist-info}/LICENSE.txt +0 -0
- {numcodecs-0.14.0.dist-info → numcodecs-0.15.0.dist-info}/entry_points.txt +0 -0
- {numcodecs-0.14.0.dist-info → numcodecs-0.15.0.dist-info}/top_level.txt +0 -0
numcodecs/__init__.py
CHANGED
|
@@ -36,41 +36,32 @@ from numcodecs.bz2 import BZ2
|
|
|
36
36
|
|
|
37
37
|
register_codec(BZ2)
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
from numcodecs.lzma import LZMA
|
|
39
|
+
from numcodecs.lzma import LZMA
|
|
41
40
|
|
|
42
|
-
|
|
41
|
+
register_codec(LZMA)
|
|
43
42
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
from numcodecs.blosc import Blosc
|
|
47
|
-
|
|
48
|
-
register_codec(Blosc)
|
|
49
|
-
# initialize blosc
|
|
50
|
-
try:
|
|
51
|
-
ncores = multiprocessing.cpu_count()
|
|
52
|
-
except OSError: # pragma: no cover
|
|
53
|
-
ncores = 1
|
|
54
|
-
blosc.init()
|
|
55
|
-
blosc.set_nthreads(min(8, ncores))
|
|
56
|
-
atexit.register(blosc.destroy)
|
|
43
|
+
from numcodecs import blosc
|
|
44
|
+
from numcodecs.blosc import Blosc
|
|
57
45
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
46
|
+
register_codec(Blosc)
|
|
47
|
+
# initialize blosc
|
|
48
|
+
try:
|
|
49
|
+
ncores = multiprocessing.cpu_count()
|
|
50
|
+
except OSError: # pragma: no cover
|
|
51
|
+
ncores = 1
|
|
52
|
+
blosc._init()
|
|
53
|
+
blosc.set_nthreads(min(8, ncores))
|
|
54
|
+
atexit.register(blosc.destroy)
|
|
61
55
|
|
|
62
|
-
|
|
56
|
+
from numcodecs import zstd as zstd
|
|
57
|
+
from numcodecs.zstd import Zstd
|
|
63
58
|
|
|
64
|
-
|
|
65
|
-
from numcodecs import lz4 as lz4
|
|
66
|
-
from numcodecs.lz4 import LZ4
|
|
59
|
+
register_codec(Zstd)
|
|
67
60
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
with suppress(ImportError):
|
|
71
|
-
from numcodecs.zfpy import ZFPY
|
|
61
|
+
from numcodecs import lz4 as lz4
|
|
62
|
+
from numcodecs.lz4 import LZ4
|
|
72
63
|
|
|
73
|
-
|
|
64
|
+
register_codec(LZ4)
|
|
74
65
|
|
|
75
66
|
from numcodecs.astype import AsType
|
|
76
67
|
|
|
@@ -112,15 +103,9 @@ from numcodecs.bitround import BitRound
|
|
|
112
103
|
|
|
113
104
|
register_codec(BitRound)
|
|
114
105
|
|
|
115
|
-
|
|
116
|
-
from numcodecs.msgpacks import MsgPack
|
|
117
|
-
|
|
118
|
-
register_codec(MsgPack)
|
|
119
|
-
|
|
120
|
-
from numcodecs.checksum32 import CRC32, CRC32C, Adler32, JenkinsLookup3
|
|
106
|
+
from numcodecs.checksum32 import CRC32, Adler32, JenkinsLookup3
|
|
121
107
|
|
|
122
108
|
register_codec(CRC32)
|
|
123
|
-
register_codec(CRC32C)
|
|
124
109
|
register_codec(Adler32)
|
|
125
110
|
register_codec(JenkinsLookup3)
|
|
126
111
|
|
|
@@ -128,18 +113,34 @@ from numcodecs.json import JSON
|
|
|
128
113
|
|
|
129
114
|
register_codec(JSON)
|
|
130
115
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
from numcodecs.vlen import VLenArray, VLenBytes, VLenUTF8
|
|
116
|
+
from numcodecs import vlen as vlen
|
|
117
|
+
from numcodecs.vlen import VLenArray, VLenBytes, VLenUTF8
|
|
134
118
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
119
|
+
register_codec(VLenUTF8)
|
|
120
|
+
register_codec(VLenBytes)
|
|
121
|
+
register_codec(VLenArray)
|
|
138
122
|
|
|
139
123
|
from numcodecs.fletcher32 import Fletcher32
|
|
140
124
|
|
|
141
125
|
register_codec(Fletcher32)
|
|
142
126
|
|
|
143
|
-
|
|
127
|
+
# Optional depenedencies
|
|
128
|
+
with suppress(ImportError):
|
|
129
|
+
from numcodecs.zfpy import ZFPY
|
|
130
|
+
|
|
131
|
+
register_codec(ZFPY)
|
|
132
|
+
|
|
133
|
+
with suppress(ImportError):
|
|
134
|
+
from numcodecs.msgpacks import MsgPack
|
|
135
|
+
|
|
136
|
+
register_codec(MsgPack)
|
|
137
|
+
|
|
138
|
+
with suppress(ImportError):
|
|
139
|
+
from numcodecs.checksum32 import CRC32C
|
|
140
|
+
|
|
141
|
+
register_codec(CRC32C)
|
|
142
|
+
|
|
143
|
+
with suppress(ImportError):
|
|
144
|
+
from numcodecs.pcodec import PCodec
|
|
144
145
|
|
|
145
|
-
register_codec(PCodec)
|
|
146
|
+
register_codec(PCodec)
|
|
Binary file
|
numcodecs/abc.py
CHANGED
|
@@ -84,7 +84,7 @@ class Codec(ABC):
|
|
|
84
84
|
# override in sub-class if need special encoding of config values
|
|
85
85
|
|
|
86
86
|
# setup config object
|
|
87
|
-
config =
|
|
87
|
+
config = {'id': self.codec_id}
|
|
88
88
|
|
|
89
89
|
# by default, assume all non-private members are configuration
|
|
90
90
|
# parameters - override this in sub-class if not the case
|
numcodecs/astype.py
CHANGED
|
@@ -49,9 +49,7 @@ class AsType(Codec):
|
|
|
49
49
|
arr = ensure_ndarray(buf).view(self.decode_dtype)
|
|
50
50
|
|
|
51
51
|
# convert and copy
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
return enc
|
|
52
|
+
return arr.astype(self.encode_dtype)
|
|
55
53
|
|
|
56
54
|
def decode(self, buf, out=None):
|
|
57
55
|
# normalise input
|
|
@@ -61,9 +59,7 @@ class AsType(Codec):
|
|
|
61
59
|
dec = enc.astype(self.decode_dtype)
|
|
62
60
|
|
|
63
61
|
# handle output
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
return out
|
|
62
|
+
return ndarray_copy(dec, out)
|
|
67
63
|
|
|
68
64
|
def get_config(self):
|
|
69
65
|
return {
|
numcodecs/base64.py
CHANGED
|
@@ -13,8 +13,7 @@ class Base64(Codec):
|
|
|
13
13
|
# normalise inputs
|
|
14
14
|
buf = ensure_contiguous_ndarray(buf)
|
|
15
15
|
# do compression
|
|
16
|
-
|
|
17
|
-
return compressed
|
|
16
|
+
return _base64.standard_b64encode(buf)
|
|
18
17
|
|
|
19
18
|
def decode(self, buf, out=None):
|
|
20
19
|
# normalise inputs
|
|
Binary file
|
numcodecs/categorize.py
CHANGED
|
@@ -80,18 +80,15 @@ class Categorize(Codec):
|
|
|
80
80
|
dec[enc == (i + 1)] = label
|
|
81
81
|
|
|
82
82
|
# handle output
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
return dec
|
|
83
|
+
return ndarray_copy(dec, out)
|
|
86
84
|
|
|
87
85
|
def get_config(self):
|
|
88
|
-
|
|
89
|
-
id
|
|
90
|
-
labels
|
|
91
|
-
dtype
|
|
92
|
-
astype
|
|
93
|
-
|
|
94
|
-
return config
|
|
86
|
+
return {
|
|
87
|
+
'id': self.codec_id,
|
|
88
|
+
'labels': self.labels,
|
|
89
|
+
'dtype': self.dtype.str,
|
|
90
|
+
'astype': self.astype.str,
|
|
91
|
+
}
|
|
95
92
|
|
|
96
93
|
def __repr__(self):
|
|
97
94
|
# make sure labels part is not too long
|
numcodecs/checksum32.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import struct
|
|
2
2
|
import zlib
|
|
3
3
|
from collections.abc import Callable
|
|
4
|
-
from
|
|
4
|
+
from contextlib import suppress
|
|
5
|
+
from types import ModuleType
|
|
6
|
+
from typing import TYPE_CHECKING, Literal, Optional
|
|
5
7
|
|
|
6
8
|
import numpy as np
|
|
7
9
|
|
|
@@ -9,7 +11,11 @@ from .abc import Codec
|
|
|
9
11
|
from .compat import ensure_contiguous_ndarray, ndarray_copy
|
|
10
12
|
from .jenkins import jenkins_lookup3
|
|
11
13
|
|
|
12
|
-
|
|
14
|
+
_crc32c: Optional[ModuleType] = None
|
|
15
|
+
with suppress(ImportError):
|
|
16
|
+
import crc32c as _crc32c # type: ignore[no-redef]
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
13
19
|
from typing_extensions import Buffer
|
|
14
20
|
|
|
15
21
|
CHECKSUM_LOCATION = Literal['start', 'end']
|
|
@@ -76,28 +82,6 @@ class CRC32(Checksum32):
|
|
|
76
82
|
location = 'start'
|
|
77
83
|
|
|
78
84
|
|
|
79
|
-
class CRC32C(Checksum32):
|
|
80
|
-
"""Codec add a crc32c checksum to the buffer.
|
|
81
|
-
|
|
82
|
-
Parameters
|
|
83
|
-
----------
|
|
84
|
-
location : 'start' or 'end'
|
|
85
|
-
Where to place the checksum in the buffer.
|
|
86
|
-
"""
|
|
87
|
-
|
|
88
|
-
codec_id = 'crc32c'
|
|
89
|
-
|
|
90
|
-
def checksum(self, buf):
|
|
91
|
-
try:
|
|
92
|
-
from crc32c import crc32c as crc32c_
|
|
93
|
-
|
|
94
|
-
return crc32c_(buf)
|
|
95
|
-
except ImportError: # pragma: no cover
|
|
96
|
-
raise ImportError("crc32c must be installed to use the CRC32C checksum codec.")
|
|
97
|
-
|
|
98
|
-
location = 'end'
|
|
99
|
-
|
|
100
|
-
|
|
101
85
|
class Adler32(Checksum32):
|
|
102
86
|
"""Codec add a adler32 checksum to the buffer.
|
|
103
87
|
|
|
@@ -168,3 +152,19 @@ class JenkinsLookup3(Checksum32):
|
|
|
168
152
|
out.view("uint8")[:] = b[:-4]
|
|
169
153
|
return out
|
|
170
154
|
return memoryview(b[:-4])
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
if _crc32c:
|
|
158
|
+
|
|
159
|
+
class CRC32C(Checksum32):
|
|
160
|
+
"""Codec add a crc32c checksum to the buffer.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
location : 'start' or 'end'
|
|
165
|
+
Where to place the checksum in the buffer.
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
codec_id = 'crc32c'
|
|
169
|
+
checksum = _crc32c.crc32c # type: ignore[union-attr]
|
|
170
|
+
location = 'end'
|
|
Binary file
|
numcodecs/delta.py
CHANGED
|
@@ -63,12 +63,7 @@ class Delta(Codec):
|
|
|
63
63
|
enc[0] = arr[0]
|
|
64
64
|
|
|
65
65
|
# compute differences
|
|
66
|
-
|
|
67
|
-
if arr.dtype == bool:
|
|
68
|
-
np.not_equal(arr[1:], arr[:-1], out=enc[1:])
|
|
69
|
-
else:
|
|
70
|
-
np.subtract(arr[1:], arr[:-1], out=enc[1:])
|
|
71
|
-
|
|
66
|
+
enc[1:] = np.diff(arr)
|
|
72
67
|
return enc
|
|
73
68
|
|
|
74
69
|
def decode(self, buf, out=None):
|
|
@@ -85,13 +80,11 @@ class Delta(Codec):
|
|
|
85
80
|
np.cumsum(enc, out=dec)
|
|
86
81
|
|
|
87
82
|
# handle output
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
return out
|
|
83
|
+
return ndarray_copy(dec, out)
|
|
91
84
|
|
|
92
85
|
def get_config(self):
|
|
93
86
|
# override to handle encoding dtypes
|
|
94
|
-
return
|
|
87
|
+
return {'id': self.codec_id, 'dtype': self.dtype.str, 'astype': self.astype.str}
|
|
95
88
|
|
|
96
89
|
def __repr__(self):
|
|
97
90
|
r = f'{type(self).__name__}(dtype={self.dtype.str!r}'
|
numcodecs/fixedscaleoffset.py
CHANGED
|
@@ -94,9 +94,7 @@ class FixedScaleOffset(Codec):
|
|
|
94
94
|
enc = np.around(enc)
|
|
95
95
|
|
|
96
96
|
# convert dtype
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
return enc
|
|
97
|
+
return enc.astype(self.astype, copy=False)
|
|
100
98
|
|
|
101
99
|
def decode(self, buf, out=None):
|
|
102
100
|
# interpret buffer as numpy array
|
|
@@ -116,13 +114,13 @@ class FixedScaleOffset(Codec):
|
|
|
116
114
|
|
|
117
115
|
def get_config(self):
|
|
118
116
|
# override to handle encoding dtypes
|
|
119
|
-
return
|
|
120
|
-
id
|
|
121
|
-
scale
|
|
122
|
-
offset
|
|
123
|
-
dtype
|
|
124
|
-
astype
|
|
125
|
-
|
|
117
|
+
return {
|
|
118
|
+
'id': self.codec_id,
|
|
119
|
+
'scale': self.scale,
|
|
120
|
+
'offset': self.offset,
|
|
121
|
+
'dtype': self.dtype.str,
|
|
122
|
+
'astype': self.astype.str,
|
|
123
|
+
}
|
|
126
124
|
|
|
127
125
|
def __repr__(self):
|
|
128
126
|
r = f'{type(self).__name__}(scale={self.scale}, offset={self.offset}, dtype={self.dtype.str!r}'
|
|
Binary file
|
numcodecs/gzip.py
CHANGED
|
@@ -28,9 +28,7 @@ class GZip(Codec):
|
|
|
28
28
|
compressed = io.BytesIO()
|
|
29
29
|
with _gzip.GzipFile(fileobj=compressed, mode='wb', compresslevel=self.level) as compressor:
|
|
30
30
|
compressor.write(buf)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
return compressed
|
|
31
|
+
return compressed.getvalue()
|
|
34
32
|
|
|
35
33
|
# noinspection PyMethodMayBeStatic
|
|
36
34
|
def decode(self, buf, out=None):
|
|
Binary file
|
numcodecs/json.py
CHANGED
|
@@ -52,17 +52,17 @@ class JSON(Codec):
|
|
|
52
52
|
else:
|
|
53
53
|
separators = ', ', ': '
|
|
54
54
|
separators = tuple(separators)
|
|
55
|
-
self._encoder_config =
|
|
56
|
-
skipkeys
|
|
57
|
-
ensure_ascii
|
|
58
|
-
check_circular
|
|
59
|
-
allow_nan
|
|
60
|
-
indent
|
|
61
|
-
separators
|
|
62
|
-
sort_keys
|
|
63
|
-
|
|
55
|
+
self._encoder_config = {
|
|
56
|
+
'skipkeys': skipkeys,
|
|
57
|
+
'ensure_ascii': ensure_ascii,
|
|
58
|
+
'check_circular': check_circular,
|
|
59
|
+
'allow_nan': allow_nan,
|
|
60
|
+
'indent': indent,
|
|
61
|
+
'separators': separators,
|
|
62
|
+
'sort_keys': sort_keys,
|
|
63
|
+
}
|
|
64
64
|
self._encoder = _json.JSONEncoder(**self._encoder_config)
|
|
65
|
-
self._decoder_config =
|
|
65
|
+
self._decoder_config = {'strict': strict}
|
|
66
66
|
self._decoder = _json.JSONDecoder(**self._decoder_config)
|
|
67
67
|
|
|
68
68
|
def encode(self, buf):
|
|
@@ -89,7 +89,7 @@ class JSON(Codec):
|
|
|
89
89
|
return dec
|
|
90
90
|
|
|
91
91
|
def get_config(self):
|
|
92
|
-
config =
|
|
92
|
+
config = {'id': self.codec_id, 'encoding': self._text_encoding}
|
|
93
93
|
config.update(self._encoder_config)
|
|
94
94
|
config.update(self._decoder_config)
|
|
95
95
|
return config
|
|
Binary file
|
numcodecs/lzma.py
CHANGED
numcodecs/msgpacks.py
CHANGED
|
@@ -75,12 +75,12 @@ class MsgPack(Codec):
|
|
|
75
75
|
return dec
|
|
76
76
|
|
|
77
77
|
def get_config(self):
|
|
78
|
-
return
|
|
79
|
-
id
|
|
80
|
-
raw
|
|
81
|
-
use_single_float
|
|
82
|
-
use_bin_type
|
|
83
|
-
|
|
78
|
+
return {
|
|
79
|
+
'id': self.codec_id,
|
|
80
|
+
'raw': self.raw,
|
|
81
|
+
'use_single_float': self.use_single_float,
|
|
82
|
+
'use_bin_type': self.use_bin_type,
|
|
83
|
+
}
|
|
84
84
|
|
|
85
85
|
def __repr__(self):
|
|
86
86
|
return f'MsgPack(raw={self.raw!r}, use_bin_type={self.use_bin_type!r}, use_single_float={self.use_single_float!r})'
|
numcodecs/ndarray_like.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, ClassVar,
|
|
1
|
+
from typing import Any, ClassVar, Protocol, runtime_checkable
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class _CachedProtocolMeta(Protocol.__class__): # type: ignore[name-defined]
|
|
@@ -53,7 +53,7 @@ class NDArrayLike(Protocol, metaclass=_CachedProtocolMeta):
|
|
|
53
53
|
|
|
54
54
|
def __setitem__(self, key, value): ... # pragma: no cover
|
|
55
55
|
|
|
56
|
-
def tobytes(self, order:
|
|
56
|
+
def tobytes(self, order: str | None = ...) -> bytes: ... # pragma: no cover
|
|
57
57
|
|
|
58
58
|
def reshape(self, *shape: int, order: str = ...) -> "NDArrayLike": ... # pragma: no cover
|
|
59
59
|
|
numcodecs/pcodec.py
CHANGED
|
@@ -1,19 +1,13 @@
|
|
|
1
|
-
from typing import Literal
|
|
1
|
+
from typing import Literal
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
import numcodecs.abc
|
|
3
|
+
from numcodecs.abc import Codec
|
|
5
4
|
from numcodecs.compat import ensure_contiguous_ndarray
|
|
6
|
-
|
|
7
|
-
try:
|
|
8
|
-
from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone
|
|
9
|
-
except ImportError: # pragma: no cover
|
|
10
|
-
standalone = None
|
|
11
|
-
|
|
5
|
+
from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
|
|
12
6
|
|
|
13
7
|
DEFAULT_MAX_PAGE_N = 262144
|
|
14
8
|
|
|
15
9
|
|
|
16
|
-
class PCodec(
|
|
10
|
+
class PCodec(Codec):
|
|
17
11
|
"""
|
|
18
12
|
PCodec (or pco, pronounced "pico") losslessly compresses and decompresses
|
|
19
13
|
numerical sequences with high compression ratio and fast speed.
|
|
@@ -28,14 +22,21 @@ class PCodec(numcodecs.abc.Codec):
|
|
|
28
22
|
level : int
|
|
29
23
|
A compression level from 0-12, where 12 take the longest and compresses
|
|
30
24
|
the most.
|
|
31
|
-
|
|
32
|
-
Either a delta encoding level from 0-7 or None. If set to None, pcodec
|
|
33
|
-
will try to infer the optimal delta encoding order.
|
|
34
|
-
mode_spec : {'auto', 'classic'}
|
|
25
|
+
mode_spec : {"auto", "classic"}
|
|
35
26
|
Configures whether Pcodec should try to infer the best "mode" or
|
|
36
27
|
structure of the data (e.g. approximate multiples of 0.1) to improve
|
|
37
28
|
compression ratio, or skip this step and just use the numbers as-is
|
|
38
|
-
(Classic mode).
|
|
29
|
+
(Classic mode). Note that the "try*" specs are not currently supported.
|
|
30
|
+
delta_spec : {"auto", "none", "try_consecutive", "try_lookback"}
|
|
31
|
+
Configures the delta encoding strategy. By default, uses "auto" which
|
|
32
|
+
will try to infer the best encoding order.
|
|
33
|
+
paging_spec : {"equal_pages_up_to"}
|
|
34
|
+
Configures the paging strategy. Only "equal_pages_up_to" is currently
|
|
35
|
+
supported.
|
|
36
|
+
delta_encoding_order : int or None
|
|
37
|
+
Explicit delta encoding level from 0-7. Only valid if delta_spec is
|
|
38
|
+
"try_consecutive" or "auto" (to support backwards compatibility with
|
|
39
|
+
older versions of this codec).
|
|
39
40
|
equal_pages_up_to : int
|
|
40
41
|
Divide the chunk into equal pages of up to this many numbers.
|
|
41
42
|
"""
|
|
@@ -45,39 +46,67 @@ class PCodec(numcodecs.abc.Codec):
|
|
|
45
46
|
def __init__(
|
|
46
47
|
self,
|
|
47
48
|
level: int = 8,
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
49
|
+
*,
|
|
50
|
+
mode_spec: Literal["auto", "classic"] = "auto",
|
|
51
|
+
delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] = "auto",
|
|
52
|
+
paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
|
|
53
|
+
delta_encoding_order: int | None = None,
|
|
54
|
+
equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
|
|
52
55
|
):
|
|
53
|
-
if standalone is None: # pragma: no cover
|
|
54
|
-
raise ImportError("pcodec must be installed to use the PCodec codec.")
|
|
55
|
-
|
|
56
56
|
# note that we use `level` instead of `compression_level` to
|
|
57
57
|
# match other codecs
|
|
58
58
|
self.level = level
|
|
59
|
+
self.mode_spec = mode_spec
|
|
60
|
+
self.delta_spec = delta_spec
|
|
61
|
+
self.paging_spec = paging_spec
|
|
59
62
|
self.delta_encoding_order = delta_encoding_order
|
|
60
63
|
self.equal_pages_up_to = equal_pages_up_to
|
|
61
|
-
self.mode_spec = mode_spec
|
|
62
|
-
|
|
63
|
-
def encode(self, buf):
|
|
64
|
-
buf = ensure_contiguous_ndarray(buf)
|
|
65
64
|
|
|
65
|
+
def _get_chunk_config(self):
|
|
66
66
|
match self.mode_spec:
|
|
67
|
-
case
|
|
67
|
+
case "auto":
|
|
68
68
|
mode_spec = ModeSpec.auto()
|
|
69
|
-
case
|
|
69
|
+
case "classic":
|
|
70
70
|
mode_spec = ModeSpec.classic()
|
|
71
71
|
case _:
|
|
72
|
-
raise ValueError(f"
|
|
73
|
-
|
|
72
|
+
raise ValueError(f"mode_spec {self.mode_spec} is not supported")
|
|
73
|
+
|
|
74
|
+
if self.delta_encoding_order is not None and self.delta_spec == "auto":
|
|
75
|
+
# backwards compat for before delta_spec was introduced
|
|
76
|
+
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
|
|
77
|
+
elif self.delta_encoding_order is not None and self.delta_spec != "try_consecutive":
|
|
78
|
+
raise ValueError(
|
|
79
|
+
"delta_encoding_order can only be set for delta_spec='try_consecutive'"
|
|
80
|
+
)
|
|
81
|
+
else:
|
|
82
|
+
match self.delta_spec:
|
|
83
|
+
case "auto":
|
|
84
|
+
delta_spec = DeltaSpec.auto()
|
|
85
|
+
case "none":
|
|
86
|
+
delta_spec = DeltaSpec.none()
|
|
87
|
+
case "try_consecutive":
|
|
88
|
+
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
|
|
89
|
+
case "try_lookback":
|
|
90
|
+
delta_spec = DeltaSpec.try_lookback()
|
|
91
|
+
case _:
|
|
92
|
+
raise ValueError(f"delta_spec {self.delta_spec} is not supported")
|
|
93
|
+
|
|
94
|
+
match self.paging_spec:
|
|
95
|
+
case "equal_pages_up_to":
|
|
96
|
+
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
|
|
97
|
+
case _:
|
|
98
|
+
raise ValueError(f"paging_spec {self.paging_spec} is not supported")
|
|
74
99
|
|
|
75
|
-
|
|
100
|
+
return ChunkConfig(
|
|
76
101
|
compression_level=self.level,
|
|
77
|
-
|
|
102
|
+
delta_spec=delta_spec,
|
|
78
103
|
mode_spec=mode_spec,
|
|
79
104
|
paging_spec=paging_spec,
|
|
80
105
|
)
|
|
106
|
+
|
|
107
|
+
def encode(self, buf):
|
|
108
|
+
buf = ensure_contiguous_ndarray(buf)
|
|
109
|
+
config = self._get_chunk_config()
|
|
81
110
|
return standalone.simple_compress(buf, config)
|
|
82
111
|
|
|
83
112
|
def decode(self, buf, out=None):
|
numcodecs/pickles.py
CHANGED
numcodecs/quantize.py
CHANGED
|
@@ -73,9 +73,7 @@ class Quantize(Codec):
|
|
|
73
73
|
enc = np.around(scale * arr) / scale
|
|
74
74
|
|
|
75
75
|
# cast dtype
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
return enc
|
|
76
|
+
return enc.astype(self.astype, copy=False)
|
|
79
77
|
|
|
80
78
|
def decode(self, buf, out=None):
|
|
81
79
|
# filter is lossy, decoding is no-op
|
|
@@ -85,12 +83,12 @@ class Quantize(Codec):
|
|
|
85
83
|
|
|
86
84
|
def get_config(self):
|
|
87
85
|
# override to handle encoding dtypes
|
|
88
|
-
return
|
|
89
|
-
id
|
|
90
|
-
digits
|
|
91
|
-
dtype
|
|
92
|
-
astype
|
|
93
|
-
|
|
86
|
+
return {
|
|
87
|
+
'id': self.codec_id,
|
|
88
|
+
'digits': self.digits,
|
|
89
|
+
'dtype': self.dtype.str,
|
|
90
|
+
'astype': self.astype.str,
|
|
91
|
+
}
|
|
94
92
|
|
|
95
93
|
def __repr__(self):
|
|
96
94
|
r = f'{type(self).__name__}(digits={self.digits}, dtype={self.dtype.str!r}'
|
numcodecs/registry.py
CHANGED
numcodecs/tests/common.py
CHANGED
|
@@ -7,8 +7,7 @@ import numpy as np
|
|
|
7
7
|
import pytest
|
|
8
8
|
from numpy.testing import assert_array_almost_equal, assert_array_equal
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
from numcodecs import * # noqa: F403
|
|
10
|
+
from numcodecs import * # noqa: F403 # for eval to find names in repr tests
|
|
12
11
|
from numcodecs.compat import ensure_bytes, ensure_ndarray
|
|
13
12
|
from numcodecs.registry import get_codec
|
|
14
13
|
|
|
@@ -19,9 +18,9 @@ greetings = [
|
|
|
19
18
|
'Hei maailma!',
|
|
20
19
|
'Xin chào thế giới',
|
|
21
20
|
'Njatjeta Botë!',
|
|
22
|
-
'Γεια σου κόσμε!',
|
|
21
|
+
'Γεια σου κόσμε!', # noqa: RUF001
|
|
23
22
|
'こんにちは世界',
|
|
24
|
-
'世界,你好!',
|
|
23
|
+
'世界,你好!', # noqa: RUF001
|
|
25
24
|
'Helló, világ!',
|
|
26
25
|
'Zdravo svete!',
|
|
27
26
|
'เฮลโลเวิลด์',
|