numcodecs-combinators 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/.github/workflows/ci.yml +15 -2
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/PKG-INFO +2 -1
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/README.md +1 -0
- numcodecs_combinators-0.2.9/_typos.toml +2 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/docs/index.md +1 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/pyproject.toml +3 -2
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/src/numcodecs_combinators/__init__.py +3 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/src/numcodecs_combinators/abc.py +2 -0
- numcodecs_combinators-0.2.9/src/numcodecs_combinators/best.py +212 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/src/numcodecs_combinators/framed.py +2 -2
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/src/numcodecs_combinators/stack.py +46 -6
- numcodecs_combinators-0.2.9/tests/test_best.py +85 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/tests/test_stack.py +34 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/.github/workflows/publish.yml +0 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/.gitignore +0 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/.python-version +0 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/.readthedocs.yaml +0 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/LICENSE +0 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/docs/requirements.txt +0 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/mkdocs.yml +0 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/src/numcodecs_combinators/py.typed +0 -0
- {numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/tests/test_framed.py +0 -0
|
@@ -6,7 +6,10 @@ on:
|
|
|
6
6
|
- main
|
|
7
7
|
pull_request:
|
|
8
8
|
branches:
|
|
9
|
-
-
|
|
9
|
+
- "*"
|
|
10
|
+
|
|
11
|
+
env:
|
|
12
|
+
CLICOLOR: 1
|
|
10
13
|
|
|
11
14
|
jobs:
|
|
12
15
|
check:
|
|
@@ -21,6 +24,16 @@ jobs:
|
|
|
21
24
|
with:
|
|
22
25
|
args: check
|
|
23
26
|
|
|
27
|
+
spelling:
|
|
28
|
+
name: Spellcheck
|
|
29
|
+
runs-on: ubuntu-latest
|
|
30
|
+
steps:
|
|
31
|
+
- name: Checkout the Repository
|
|
32
|
+
uses: actions/checkout@v4
|
|
33
|
+
|
|
34
|
+
- name: Spellcheck repo
|
|
35
|
+
uses: crate-ci/typos@v1.32.0
|
|
36
|
+
|
|
24
37
|
fmt:
|
|
25
38
|
name: Formatting
|
|
26
39
|
runs-on: ubuntu-latest
|
|
@@ -50,7 +63,7 @@ jobs:
|
|
|
50
63
|
run: uv sync --all-extras --dev && uv pip install .
|
|
51
64
|
|
|
52
65
|
- name: Run tests
|
|
53
|
-
run: uv run pytest
|
|
66
|
+
run: uv run pytest -v -W error
|
|
54
67
|
|
|
55
68
|
mypy:
|
|
56
69
|
name: Typecheck
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: numcodecs-combinators
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.9
|
|
4
4
|
Summary: Combinator codecs for the `numcodecs` buffer compression API
|
|
5
5
|
License: Copyright (c) 2024, Juniper Tyree
|
|
6
6
|
|
|
@@ -402,6 +402,7 @@ The following combinators, implementing the `CodecCombinatorMixin` are provided:
|
|
|
402
402
|
|
|
403
403
|
- `CodecStack`: a stack of codecs
|
|
404
404
|
- `FramedCodecStack`: a stack of codecs that is framed with array data type and shape information
|
|
405
|
+
- `PickBestCodec`: pick the best codec to encode the data
|
|
405
406
|
|
|
406
407
|
[`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/
|
|
407
408
|
|
|
@@ -12,6 +12,7 @@ The following combinators, implementing the `CodecCombinatorMixin` are provided:
|
|
|
12
12
|
|
|
13
13
|
- `CodecStack`: a stack of codecs
|
|
14
14
|
- `FramedCodecStack`: a stack of codecs that is framed with array data type and shape information
|
|
15
|
+
- `PickBestCodec`: pick the best codec to encode the data
|
|
15
16
|
|
|
16
17
|
[`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/
|
|
17
18
|
|
|
@@ -12,6 +12,7 @@ The following combinators, implementing the [`CodecCombinatorMixin`][numcodecs_c
|
|
|
12
12
|
|
|
13
13
|
- [`CodecStack`][numcodecs_combinators.stack.CodecStack]: a stack of codecs
|
|
14
14
|
- [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack]: a stack of codecs that is framed with array data type and shape information
|
|
15
|
+
- [`PickBestCodec`][numcodecs_combinators.best.PickBestCodec]: pick the best codec to encode the data
|
|
15
16
|
|
|
16
17
|
## Funding
|
|
17
18
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "numcodecs-combinators"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.9"
|
|
8
8
|
description = "Combinator codecs for the `numcodecs` buffer compression API"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -21,8 +21,9 @@ optional-dependencies.xarray = [ "xarray>=2024.06", "dask>=2024.6" ]
|
|
|
21
21
|
dev = ["mypy~=1.14", "pytest~=8.3"]
|
|
22
22
|
|
|
23
23
|
[project.entry-points."numcodecs.codecs"]
|
|
24
|
-
"combinators.
|
|
24
|
+
"combinators.best" = "numcodecs_combinators.best:PickBestCodec"
|
|
25
25
|
"combinators.framed" = "numcodecs_combinators.framed:FramedCodecStack"
|
|
26
|
+
"combinators.stack" = "numcodecs_combinators.stack:CodecStack"
|
|
26
27
|
|
|
27
28
|
[tool.setuptools.packages.find]
|
|
28
29
|
where = ["src"]
|
{numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/src/numcodecs_combinators/__init__.py
RENAMED
|
@@ -8,6 +8,8 @@ provided:
|
|
|
8
8
|
- [`CodecStack`][numcodecs_combinators.stack.CodecStack]: a stack of codecs
|
|
9
9
|
- [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack]: a stack
|
|
10
10
|
of codecs that is framed with array data type and shape information
|
|
11
|
+
- [`PickBestCodec`][numcodecs_combinators.best.PickBestCodec]: pick the best
|
|
12
|
+
codec to encode the data
|
|
11
13
|
"""
|
|
12
14
|
|
|
13
15
|
__all__ = ["map_codec"]
|
|
@@ -18,6 +20,7 @@ from typing import Callable
|
|
|
18
20
|
from numcodecs.abc import Codec
|
|
19
21
|
|
|
20
22
|
from . import abc as abc
|
|
23
|
+
from . import best as best
|
|
21
24
|
from . import framed as framed
|
|
22
25
|
from . import stack as stack
|
|
23
26
|
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module defines the [`PickBestCodec`][numcodecs_combinators.best.PickBestCodec] class, which picks the codec that encoded the data best.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
__all__ = ["PickBestCodec"]
|
|
6
|
+
|
|
7
|
+
from io import BytesIO
|
|
8
|
+
from typing import Callable, Optional
|
|
9
|
+
|
|
10
|
+
import numcodecs
|
|
11
|
+
import numcodecs.compat
|
|
12
|
+
import numcodecs.registry
|
|
13
|
+
import numpy as np
|
|
14
|
+
import varint
|
|
15
|
+
from numcodecs.abc import Codec
|
|
16
|
+
from typing_extensions import Buffer, Self # MSPV 3.12
|
|
17
|
+
|
|
18
|
+
from .abc import CodecCombinatorMixin
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PickBestCodec(Codec, CodecCombinatorMixin, tuple[Codec]):
|
|
22
|
+
"""
|
|
23
|
+
A codec that tries encoding with all combined codecs and then picks the one with the fewest bytes.
|
|
24
|
+
|
|
25
|
+
The inner codecs must all encode to 1D byte arrays. To use a codec not
|
|
26
|
+
encoding to bytes with this combinator, you can wrap it using
|
|
27
|
+
[`FramedCodecStack(codec)`][numcodecs_combinators.framed.FramedCodecStack]
|
|
28
|
+
combinator.
|
|
29
|
+
|
|
30
|
+
This combinator uses the ULEB128 variable length integer encoding to encode
|
|
31
|
+
the index of the codec that was chosen to encode and uses this index as a
|
|
32
|
+
header before the encoded bytes. The header index is only included if this
|
|
33
|
+
combinator wraps at least two codecs. If this combinator wraps zero codecs,
|
|
34
|
+
it passes the original data through unchanged.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
__slots__ = ()
|
|
38
|
+
|
|
39
|
+
codec_id: str = "combinators.best" # type: ignore
|
|
40
|
+
|
|
41
|
+
def __init__(self, *args: dict | Codec):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def __new__(cls, *args: dict | Codec) -> Self:
|
|
45
|
+
return super(PickBestCodec, cls).__new__(
|
|
46
|
+
cls,
|
|
47
|
+
tuple(
|
|
48
|
+
codec
|
|
49
|
+
if isinstance(codec, Codec)
|
|
50
|
+
else numcodecs.registry.get_codec(codec)
|
|
51
|
+
for codec in args
|
|
52
|
+
),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def encode(self, buf: Buffer) -> bytes:
|
|
56
|
+
"""Encode the data in `buf`.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
buf : Buffer
|
|
61
|
+
Data to be encoded. May be any object supporting the new-style
|
|
62
|
+
buffer protocol.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
enc : bytes
|
|
67
|
+
Encoded and data as a bytestring.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
if len(self) == 0:
|
|
71
|
+
return buf
|
|
72
|
+
|
|
73
|
+
data = numcodecs.compat.ensure_ndarray(buf)
|
|
74
|
+
|
|
75
|
+
best_size = np.inf
|
|
76
|
+
best_index = None
|
|
77
|
+
best_encoded = None
|
|
78
|
+
|
|
79
|
+
for i, codec in enumerate(self):
|
|
80
|
+
encoded = numcodecs.compat.ensure_ndarray(codec.encode(np.copy(data)))
|
|
81
|
+
assert encoded.dtype == np.dtype("uint8"), (
|
|
82
|
+
f"codec best[{i}] must encode to bytes"
|
|
83
|
+
)
|
|
84
|
+
assert encoded.ndim <= 1, f"codec best[{i}] must encode to 1D bytes"
|
|
85
|
+
|
|
86
|
+
if encoded.nbytes < best_size:
|
|
87
|
+
best_size = encoded.nbytes
|
|
88
|
+
best_index = i
|
|
89
|
+
best_encoded = encoded
|
|
90
|
+
|
|
91
|
+
encoded_index = varint.encode(best_index)
|
|
92
|
+
encoded_bytes = numcodecs.compat.ensure_bytes(best_encoded)
|
|
93
|
+
|
|
94
|
+
if len(self) == 1:
|
|
95
|
+
return encoded_bytes
|
|
96
|
+
|
|
97
|
+
return encoded_index + encoded_bytes
|
|
98
|
+
|
|
99
|
+
def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer:
|
|
100
|
+
"""Decode the data in `buf`.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
buf : Buffer
|
|
105
|
+
Encoded data. Must be an object representing a bytestring, e.g.
|
|
106
|
+
[`bytes`][bytes] or a 1D array of [`np.uint8`][numpy.uint8]s etc.
|
|
107
|
+
out : Buffer, optional
|
|
108
|
+
Writeable buffer to store decoded data. N.B. if provided, this buffer must
|
|
109
|
+
be exactly the right size to store the decoded data.
|
|
110
|
+
|
|
111
|
+
Returns
|
|
112
|
+
-------
|
|
113
|
+
dec : Buffer
|
|
114
|
+
Decoded data. May be any object supporting the new-style
|
|
115
|
+
buffer protocol.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
if len(self) == 0:
|
|
119
|
+
return numcodecs.compat.ndarray_copy(buf, out)
|
|
120
|
+
|
|
121
|
+
b = numcodecs.compat.ensure_bytes(buf)
|
|
122
|
+
b_io = BytesIO(b)
|
|
123
|
+
|
|
124
|
+
if len(self) == 1:
|
|
125
|
+
best_index = 0
|
|
126
|
+
else:
|
|
127
|
+
best_index = varint.decode_stream(b_io)
|
|
128
|
+
|
|
129
|
+
return self[best_index].decode(b_io.read(), out=out)
|
|
130
|
+
|
|
131
|
+
def get_config(self) -> dict:
|
|
132
|
+
"""
|
|
133
|
+
Returns the configuration of the best codec combinator.
|
|
134
|
+
|
|
135
|
+
[`numcodecs.registry.get_codec(config)`][numcodecs.registry.get_codec]
|
|
136
|
+
can be used to reconstruct this combinator from the returned config.
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
config : dict
|
|
141
|
+
Configuration of the best codec combinator.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
return dict(
|
|
145
|
+
id=type(self).codec_id,
|
|
146
|
+
codecs=tuple(codec.get_config() for codec in self),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def from_config(cls, config: dict) -> Self:
|
|
151
|
+
"""
|
|
152
|
+
Instantiate the best codec combinator from a configuration [`dict`][dict].
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
config : dict
|
|
157
|
+
Configuration of the best codec combinator.
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
best : PickBestCodec
|
|
162
|
+
Instantiated best codec combinator.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
return cls(*config["codecs"])
|
|
166
|
+
|
|
167
|
+
def __repr__(self) -> str:
|
|
168
|
+
repr = ", ".join(f"{codec!r}" for codec in self)
|
|
169
|
+
|
|
170
|
+
return f"{type(self).__name__}({repr})"
|
|
171
|
+
|
|
172
|
+
def map(self, mapper: Callable[[Codec], Codec]) -> "PickBestCodec":
|
|
173
|
+
"""
|
|
174
|
+
Apply the `mapper` to all codecs that are in this combinator.
|
|
175
|
+
In the returned combinator, each codec is replaced by its mapped codec.
|
|
176
|
+
|
|
177
|
+
The `mapper` should recursively apply itself to any inner codecs that
|
|
178
|
+
also implement the [`CodecCombinatorMixin`][numcodecs_combinators.abc.CodecCombinatorMixin]
|
|
179
|
+
mixin.
|
|
180
|
+
|
|
181
|
+
To automatically handle the recursive application as a caller, you can
|
|
182
|
+
use
|
|
183
|
+
```python
|
|
184
|
+
numcodecs_combinators.map_codec(best, mapper)
|
|
185
|
+
```
|
|
186
|
+
instead.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
mapper : Callable[[Codec], Codec]
|
|
191
|
+
The callable that should be applied to each codec to map over this
|
|
192
|
+
best codec combinator.
|
|
193
|
+
|
|
194
|
+
Returns
|
|
195
|
+
-------
|
|
196
|
+
mapped : PickBestCodec
|
|
197
|
+
The mapped best codec combinator.
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
return PickBestCodec(*map(mapper, self))
|
|
201
|
+
|
|
202
|
+
def __add__(self, other) -> "PickBestCodec":
|
|
203
|
+
return PickBestCodec(*tuple.__add__(self, other))
|
|
204
|
+
|
|
205
|
+
def __mul__(self, other) -> "PickBestCodec":
|
|
206
|
+
return PickBestCodec(*tuple.__mul__(self, other))
|
|
207
|
+
|
|
208
|
+
def __rmul__(self, other) -> "PickBestCodec":
|
|
209
|
+
return PickBestCodec(*tuple.__rmul__(self, other))
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
numcodecs.registry.register_codec(PickBestCodec)
|
{numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/src/numcodecs_combinators/framed.py
RENAMED
|
@@ -28,8 +28,8 @@ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
|
|
|
28
28
|
intermediary, encoded) are stored as part of the encoding, which is output
|
|
29
29
|
as a bytestring.
|
|
30
30
|
|
|
31
|
-
On
|
|
32
|
-
right to left to decode into known
|
|
31
|
+
On decoding, this framing information is used to apply the codecs from
|
|
32
|
+
right to left to decode into known output data types and shapes.
|
|
33
33
|
|
|
34
34
|
Therefore, the [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack]
|
|
35
35
|
can be used to combine codecs which require knowing the output data type
|
{numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/src/numcodecs_combinators/stack.py
RENAMED
|
@@ -140,6 +140,8 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
|
|
|
140
140
|
buffer protocol.
|
|
141
141
|
"""
|
|
142
142
|
|
|
143
|
+
chunked = getattr(buf, "chunked", False)
|
|
144
|
+
|
|
143
145
|
encoded = np.asarray(
|
|
144
146
|
numcodecs.compat.ensure_contiguous_ndarray_like(buf, flatten=False)
|
|
145
147
|
)
|
|
@@ -149,16 +151,23 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
|
|
|
149
151
|
silhouettes.append((encoded.shape, encoded.dtype))
|
|
150
152
|
encoded = np.asarray(
|
|
151
153
|
numcodecs.compat.ensure_contiguous_ndarray_like(
|
|
152
|
-
codec.encode((encoded)),
|
|
154
|
+
codec.encode(_MaybeChunkedNdArray(encoded) if chunked else encoded),
|
|
155
|
+
flatten=False,
|
|
153
156
|
)
|
|
154
157
|
)
|
|
155
158
|
|
|
156
|
-
decoded = encoded
|
|
159
|
+
decoded = encoded.view(np.ndarray)
|
|
157
160
|
|
|
158
161
|
for codec in reversed(self):
|
|
159
162
|
shape, dtype = silhouettes.pop()
|
|
160
163
|
out = np.empty(shape=shape, dtype=dtype)
|
|
161
|
-
decoded =
|
|
164
|
+
decoded = (
|
|
165
|
+
codec.decode(decoded, _MaybeChunkedNdArray(out) if chunked else out)
|
|
166
|
+
.view(dtype)
|
|
167
|
+
.reshape(shape)
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
decoded = decoded.view(np.ndarray)
|
|
162
171
|
|
|
163
172
|
if isinstance(decoded, type(buf)):
|
|
164
173
|
return decoded
|
|
@@ -167,7 +176,15 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
|
|
|
167
176
|
|
|
168
177
|
def encode_decode_data_array(self, da: "xr.DataArray") -> "xr.DataArray":
|
|
169
178
|
"""
|
|
170
|
-
Encode, then decode the data array
|
|
179
|
+
Encode, then decode the data array `da`. If `da` is chunked, each chunk
|
|
180
|
+
is encoded and decoded *independently*.
|
|
181
|
+
|
|
182
|
+
Since each chunk is encoded *independently*, this method may cause
|
|
183
|
+
chunk boundary artifacts. Do *not* use this method if the codec
|
|
184
|
+
requires access to the entire data at once or if it needs to access
|
|
185
|
+
a neighbourhood of points across the chunk boundary. In these cases,
|
|
186
|
+
it is preferable to use
|
|
187
|
+
`da.copy(data=stack.encode_decode(da.values))` instead.
|
|
171
188
|
|
|
172
189
|
The encode-decode computation may be deferred until the
|
|
173
190
|
[`compute`][xarray.DataArray.compute] method is called on the result.
|
|
@@ -188,6 +205,8 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
|
|
|
188
205
|
|
|
189
206
|
import xarray as xr
|
|
190
207
|
|
|
208
|
+
chunked = da.chunks is not None
|
|
209
|
+
|
|
191
210
|
def encode_decode_data_array_single_chunk(
|
|
192
211
|
da: xr.DataArray,
|
|
193
212
|
) -> xr.DataArray:
|
|
@@ -198,9 +217,11 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
|
|
|
198
217
|
return da.copy(deep=False).chunk(single_chunk)
|
|
199
218
|
|
|
200
219
|
# eagerly compute the input chunk and encode and decode it
|
|
201
|
-
decoded = self.encode_decode(da.values) # type: ignore
|
|
220
|
+
decoded = self.encode_decode(_MaybeChunkedNdArray(da.values, chunked)) # type: ignore
|
|
202
221
|
|
|
203
|
-
return da.copy(deep=False, data=decoded).chunk(
|
|
222
|
+
return da.copy(deep=False, data=np.array(decoded).view(np.ndarray)).chunk(
|
|
223
|
+
single_chunk
|
|
224
|
+
)
|
|
204
225
|
|
|
205
226
|
return xr.map_blocks(encode_decode_data_array_single_chunk, da)
|
|
206
227
|
|
|
@@ -286,3 +307,22 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
|
|
|
286
307
|
|
|
287
308
|
|
|
288
309
|
numcodecs.registry.register_codec(CodecStack)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class _MaybeChunkedNdArray(np.ndarray):
|
|
313
|
+
__slots__ = ("_chunked",)
|
|
314
|
+
_chunked: bool
|
|
315
|
+
|
|
316
|
+
def __new__(cls, array, chunked: bool = True):
|
|
317
|
+
obj = np.asarray(array).view(cls)
|
|
318
|
+
obj._chunked = chunked
|
|
319
|
+
return obj
|
|
320
|
+
|
|
321
|
+
def __array_finalize__(self, obj):
|
|
322
|
+
if obj is None:
|
|
323
|
+
return
|
|
324
|
+
self._chunked = getattr(obj, "chunked", True)
|
|
325
|
+
|
|
326
|
+
@property
|
|
327
|
+
def chunked(self) -> bool:
|
|
328
|
+
return self._chunked
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import numcodecs
|
|
2
|
+
import numcodecs.compat
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
import numcodecs_combinators
|
|
6
|
+
from numcodecs_combinators.best import PickBestCodec
|
|
7
|
+
from numcodecs_combinators.framed import FramedCodecStack
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def assert_config_roundtrip(codec: numcodecs.abc.Codec):
|
|
11
|
+
config = codec.get_config()
|
|
12
|
+
codec2 = numcodecs.get_codec(config)
|
|
13
|
+
assert codec2 == codec
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_init_config():
|
|
17
|
+
best = PickBestCodec()
|
|
18
|
+
assert len(best) == 0
|
|
19
|
+
assert_config_roundtrip(best)
|
|
20
|
+
|
|
21
|
+
best = PickBestCodec(dict(id="zlib", level=9))
|
|
22
|
+
assert len(best) == 1
|
|
23
|
+
assert_config_roundtrip(best)
|
|
24
|
+
|
|
25
|
+
best = PickBestCodec(dict(id="zlib", level=9), numcodecs.CRC32())
|
|
26
|
+
assert len(best) == 2
|
|
27
|
+
assert_config_roundtrip(best)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_encode_decode():
|
|
31
|
+
for best in [
|
|
32
|
+
PickBestCodec(),
|
|
33
|
+
PickBestCodec(dict(id="combinators.framed", codecs=[dict(id="zlib", level=9)])),
|
|
34
|
+
PickBestCodec(
|
|
35
|
+
FramedCodecStack(numcodecs.Zlib(level=9)),
|
|
36
|
+
FramedCodecStack(numcodecs.CRC32()),
|
|
37
|
+
),
|
|
38
|
+
PickBestCodec(
|
|
39
|
+
FramedCodecStack(numcodecs.Zlib(level=9)),
|
|
40
|
+
FramedCodecStack(numcodecs.CRC32()),
|
|
41
|
+
FramedCodecStack(numcodecs.Zstd(level=20)),
|
|
42
|
+
),
|
|
43
|
+
]:
|
|
44
|
+
for data in [
|
|
45
|
+
np.zeros(shape=(0,)),
|
|
46
|
+
np.array(3),
|
|
47
|
+
np.array([97, 98, 99], dtype=np.uint8),
|
|
48
|
+
np.linspace(1, 100, 100).reshape(10, 10),
|
|
49
|
+
np.linspace(1, 100, 100).reshape(10, 10).byteswap(),
|
|
50
|
+
]:
|
|
51
|
+
encoded = best.encode(data)
|
|
52
|
+
if len(best) > 0:
|
|
53
|
+
assert isinstance(encoded, bytes)
|
|
54
|
+
decoded = best.decode(encoded)
|
|
55
|
+
print(best)
|
|
56
|
+
assert np.all(decoded == data)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_map():
|
|
60
|
+
best = PickBestCodec(numcodecs.Zlib(level=9), numcodecs.CRC32())
|
|
61
|
+
|
|
62
|
+
mapped = numcodecs_combinators.map_codec(best, lambda c: c)
|
|
63
|
+
assert mapped == best
|
|
64
|
+
|
|
65
|
+
mapped = numcodecs_combinators.map_codec(best, lambda c: PickBestCodec(c))
|
|
66
|
+
assert mapped == PickBestCodec(
|
|
67
|
+
PickBestCodec(
|
|
68
|
+
PickBestCodec(numcodecs.Zlib(level=9)),
|
|
69
|
+
PickBestCodec(numcodecs.CRC32()),
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
mapped = numcodecs_combinators.map_codec(mapped, lambda c: PickBestCodec(c))
|
|
74
|
+
assert mapped == PickBestCodec(
|
|
75
|
+
PickBestCodec(
|
|
76
|
+
PickBestCodec(
|
|
77
|
+
PickBestCodec(
|
|
78
|
+
PickBestCodec(
|
|
79
|
+
PickBestCodec(PickBestCodec(numcodecs.Zlib(level=9)))
|
|
80
|
+
),
|
|
81
|
+
PickBestCodec(PickBestCodec(PickBestCodec(numcodecs.CRC32()))),
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import numcodecs
|
|
2
2
|
import numpy as np
|
|
3
3
|
import xarray as xr
|
|
4
|
+
from numcodecs.abc import Codec
|
|
4
5
|
|
|
5
6
|
import numcodecs_combinators
|
|
6
7
|
from numcodecs_combinators.stack import CodecStack
|
|
@@ -51,6 +52,39 @@ def test_encode_decode():
|
|
|
51
52
|
assert encoded_decoded.equals(xr.DataArray([1.0, 2.0, 3.0]))
|
|
52
53
|
|
|
53
54
|
|
|
55
|
+
def test_chunked_encode_decode():
|
|
56
|
+
class CheckChunkedCodec(Codec):
|
|
57
|
+
__slots__ = ("is_chunked",)
|
|
58
|
+
is_chunked: bool
|
|
59
|
+
|
|
60
|
+
def __init__(self, is_chunked: bool):
|
|
61
|
+
self.is_chunked = is_chunked
|
|
62
|
+
|
|
63
|
+
def encode(self, buf):
|
|
64
|
+
assert getattr(buf, "chunked", False) == self.is_chunked
|
|
65
|
+
return buf
|
|
66
|
+
|
|
67
|
+
def decode(self, buf, out=None):
|
|
68
|
+
assert getattr(buf, "chunked", False) is False
|
|
69
|
+
assert getattr(out, "chunked", False) == self.is_chunked
|
|
70
|
+
return numcodecs.compat.ndarray_copy(buf, out)
|
|
71
|
+
|
|
72
|
+
stack = CodecStack(CheckChunkedCodec(False))
|
|
73
|
+
|
|
74
|
+
encoded_decoded = stack.encode_decode(np.array([1.0, 2.0, 3.0]))
|
|
75
|
+
assert np.all(encoded_decoded == np.array([1.0, 2.0, 3.0]))
|
|
76
|
+
|
|
77
|
+
encoded_decoded = stack.encode_decode_data_array(xr.DataArray([1.0, 2.0, 3.0]))
|
|
78
|
+
assert encoded_decoded.equals(xr.DataArray([1.0, 2.0, 3.0]))
|
|
79
|
+
|
|
80
|
+
stack = CodecStack(CheckChunkedCodec(True))
|
|
81
|
+
|
|
82
|
+
encoded_decoded = stack.encode_decode_data_array(
|
|
83
|
+
xr.DataArray([1.0, 2.0, 3.0]).chunk(1)
|
|
84
|
+
)
|
|
85
|
+
assert encoded_decoded.equals(xr.DataArray([1.0, 2.0, 3.0]))
|
|
86
|
+
|
|
87
|
+
|
|
54
88
|
def test_map():
|
|
55
89
|
stack = CodecStack(numcodecs.Zlib(level=9), numcodecs.CRC32())
|
|
56
90
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{numcodecs_combinators-0.2.7 → numcodecs_combinators-0.2.9}/src/numcodecs_combinators/py.typed
RENAMED
|
File without changes
|
|
File without changes
|