numcodecs-combinators 0.2.6__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/.github/workflows/ci.yml +15 -2
  2. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/PKG-INFO +4 -1
  3. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/README.md +2 -0
  4. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/docs/index.md +2 -0
  5. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/mkdocs.yml +4 -0
  6. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/pyproject.toml +12 -2
  7. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/src/numcodecs_combinators/__init__.py +9 -1
  8. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/src/numcodecs_combinators/abc.py +1 -2
  9. numcodecs_combinators-0.2.8/src/numcodecs_combinators/best.py +212 -0
  10. numcodecs_combinators-0.2.8/src/numcodecs_combinators/framed.py +260 -0
  11. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/src/numcodecs_combinators/stack.py +19 -8
  12. numcodecs_combinators-0.2.8/tests/test_best.py +85 -0
  13. numcodecs_combinators-0.2.8/tests/test_framed.py +75 -0
  14. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/.github/workflows/publish.yml +0 -0
  15. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/.gitignore +0 -0
  16. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/.python-version +0 -0
  17. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/.readthedocs.yaml +0 -0
  18. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/LICENSE +0 -0
  19. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/docs/requirements.txt +0 -0
  20. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/src/numcodecs_combinators/py.typed +0 -0
  21. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.8}/tests/test_stack.py +0 -0
@@ -6,7 +6,10 @@ on:
6
6
  - main
7
7
  pull_request:
8
8
  branches:
9
- - '*'
9
+ - "*"
10
+
11
+ env:
12
+ CLICOLOR: 1
10
13
 
11
14
  jobs:
12
15
  check:
@@ -21,6 +24,16 @@ jobs:
21
24
  with:
22
25
  args: check
23
26
 
27
+ spelling:
28
+ name: Spellcheck
29
+ runs-on: ubuntu-latest
30
+ steps:
31
+ - name: Checkout the Repository
32
+ uses: actions/checkout@v4
33
+
34
+ - name: Spellcheck repo
35
+ uses: crate-ci/typos@v1.32.0
36
+
24
37
  fmt:
25
38
  name: Formatting
26
39
  runs-on: ubuntu-latest
@@ -50,7 +63,7 @@ jobs:
50
63
  run: uv sync --all-extras --dev && uv pip install .
51
64
 
52
65
  - name: Run tests
53
- run: uv run pytest
66
+ run: uv run pytest -v -W error
54
67
 
55
68
  mypy:
56
69
  name: Typecheck
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: numcodecs-combinators
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: Combinator codecs for the `numcodecs` buffer compression API
5
5
  License: Copyright (c) 2024, Juniper Tyree
6
6
 
@@ -382,6 +382,7 @@ Requires-Python: >=3.10
382
382
  Requires-Dist: numcodecs<0.17,>=0.13.0
383
383
  Requires-Dist: numpy~=2.0
384
384
  Requires-Dist: typing-extensions~=4.6
385
+ Requires-Dist: varint~=1.0
385
386
  Provides-Extra: xarray
386
387
  Requires-Dist: dask>=2024.6; extra == 'xarray'
387
388
  Requires-Dist: xarray>=2024.06; extra == 'xarray'
@@ -400,6 +401,8 @@ Combinator codecs for the [`numcodecs`] buffer compression API.
400
401
  The following combinators, implementing the `CodecCombinatorMixin` are provided:
401
402
 
402
403
  - `CodecStack`: a stack of codecs
404
+ - `FramedCodecStack`: a stack of codecs that is framed with array data type and shape information
405
+ - `PickBestCodec`: pick the best codec to encode the data
403
406
 
404
407
  [`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/
405
408
 
@@ -11,6 +11,8 @@ Combinator codecs for the [`numcodecs`] buffer compression API.
11
11
  The following combinators, implementing the `CodecCombinatorMixin` are provided:
12
12
 
13
13
  - `CodecStack`: a stack of codecs
14
+ - `FramedCodecStack`: a stack of codecs that is framed with array data type and shape information
15
+ - `PickBestCodec`: pick the best codec to encode the data
14
16
 
15
17
  [`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/
16
18
 
@@ -11,6 +11,8 @@ Combinator codecs for the [`numcodecs`][numcodecs] buffer compression API.
11
11
  The following combinators, implementing the [`CodecCombinatorMixin`][numcodecs_combinators.abc.CodecCombinatorMixin] are provided:
12
12
 
13
13
  - [`CodecStack`][numcodecs_combinators.stack.CodecStack]: a stack of codecs
14
+ - [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack]: a stack of codecs that is framed with array data type and shape information
15
+ - [`PickBestCodec`][numcodecs_combinators.best.PickBestCodec]: pick the best codec to encode the data
14
16
 
15
17
  ## Funding
16
18
 
@@ -54,5 +54,9 @@ plugins:
54
54
  modules: true
55
55
  inventories:
56
56
  - https://docs.python.org/3.12/objects.inv
57
+ - https://numpy.org/doc/2.2/objects.inv
57
58
  - https://numcodecs.readthedocs.io/en/v0.15.0/objects.inv
58
59
  - https://docs.xarray.dev/en/v2025.01.0/objects.inv
60
+
61
+ watch:
62
+ - src
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "numcodecs-combinators"
7
- version = "0.2.6"
7
+ version = "0.2.8"
8
8
  description = "Combinator codecs for the `numcodecs` buffer compression API"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -13,6 +13,7 @@ dependencies = [
13
13
  "numcodecs>=0.13.0,<0.17",
14
14
  "numpy~=2.0",
15
15
  "typing-extensions~=4.6",
16
+ "varint~=1.0",
16
17
  ]
17
18
  optional-dependencies.xarray = [ "xarray>=2024.06", "dask>=2024.6" ]
18
19
 
@@ -20,6 +21,8 @@ optional-dependencies.xarray = [ "xarray>=2024.06", "dask>=2024.6" ]
20
21
  dev = ["mypy~=1.14", "pytest~=8.3"]
21
22
 
22
23
  [project.entry-points."numcodecs.codecs"]
24
+ "combinators.best" = "numcodecs_combinators.best:PickBestCodec"
25
+ "combinators.framed" = "numcodecs_combinators.framed:FramedCodecStack"
23
26
  "combinators.stack" = "numcodecs_combinators.stack:CodecStack"
24
27
 
25
28
  [tool.setuptools.packages.find]
@@ -28,6 +31,13 @@ where = ["src"]
28
31
  [tool.setuptools.package-data]
29
32
  "numcodecs_combinators" = ["py.typed"]
30
33
 
34
+ [tool.ruff.lint]
35
+ select = ["E4", "E7", "E9", "F", "I"]
36
+
37
+ [tool.pytest.ini_options]
38
+ addopts = ["--import-mode=importlib"]
39
+ xfail_strict = true
40
+
31
41
  [[tool.mypy.overrides]]
32
- module = ["numcodecs.*"]
42
+ module = ["numcodecs.*", "varint.*"]
33
43
  follow_untyped_imports = true
@@ -1,9 +1,15 @@
1
1
  """
2
2
  Combinator codecs for the [`numcodecs`][numcodecs] buffer compression API.
3
3
 
4
- The following combinators, implementing the [`CodecCombinatorMixin`][numcodecs_combinators.abc.CodecCombinatorMixin] are provided:
4
+ The following combinators, implementing the
5
+ [`CodecCombinatorMixin`][numcodecs_combinators.abc.CodecCombinatorMixin] are
6
+ provided:
5
7
 
6
8
  - [`CodecStack`][numcodecs_combinators.stack.CodecStack]: a stack of codecs
9
+ - [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack]: a stack
10
+ of codecs that is framed with array data type and shape information
11
+ - [`PickBestCodec`][numcodecs_combinators.best.PickBestCodec]: pick the best
12
+ codec to encode the data
7
13
  """
8
14
 
9
15
  __all__ = ["map_codec"]
@@ -14,6 +20,8 @@ from typing import Callable
14
20
  from numcodecs.abc import Codec
15
21
 
16
22
  from . import abc as abc
23
+ from . import best as best
24
+ from . import framed as framed
17
25
  from . import stack as stack
18
26
 
19
27
 
@@ -4,12 +4,11 @@ This module defines the [`CodecCombinatorMixin`][numcodecs_combinators.abc.Codec
4
4
 
5
5
  __all__ = ["CodecCombinatorMixin"]
6
6
 
7
+ from abc import ABC, abstractmethod
7
8
  from typing import Callable
8
9
 
9
10
  from numcodecs.abc import Codec
10
11
 
11
- from abc import ABC, abstractmethod
12
-
13
12
 
14
13
  class CodecCombinatorMixin(ABC):
15
14
  """
@@ -0,0 +1,212 @@
1
+ """
2
+ This module defines the [`PickBestCodec`][numcodecs_combinators.best.PickBestCodec] class, which picks the codec that encoded the data best.
3
+ """
4
+
5
+ __all__ = ["PickBestCodec"]
6
+
7
+ from io import BytesIO
8
+ from typing import Callable, Optional
9
+
10
+ import numcodecs
11
+ import numcodecs.compat
12
+ import numcodecs.registry
13
+ import numpy as np
14
+ import varint
15
+ from numcodecs.abc import Codec
16
+ from typing_extensions import Buffer, Self # MSPV 3.12
17
+
18
+ from .abc import CodecCombinatorMixin
19
+
20
+
21
+ class PickBestCodec(Codec, CodecCombinatorMixin, tuple[Codec]):
22
+ """
23
+ A codec that tries encoding with all combined codecs and then picks the one with the fewest bytes.
24
+
25
+ The inner codecs must all encode to 1D byte arrays. To use a codec not
26
+ encoding to bytes with this combinator, you can wrap it using
27
+ [`FramedCodecStack(codec)`][numcodecs_combinators.framed.FramedCodecStack]
28
+ combinator.
29
+
30
+ This combinator uses the ULEB128 variable length integer encoding to encode
31
+ the index of the codec that was chosen to encode and uses this index as a
32
+ header before the encoded bytes. The header index is only included if this
33
+ combinator wraps at least two codecs. If this combinator wraps zero codecs,
34
+ it passes the original data through unchanged.
35
+ """
36
+
37
+ __slots__ = ()
38
+
39
+ codec_id: str = "combinators.best" # type: ignore
40
+
41
+ def __init__(self, *args: dict | Codec):
42
+ pass
43
+
44
+ def __new__(cls, *args: dict | Codec) -> Self:
45
+ return super(PickBestCodec, cls).__new__(
46
+ cls,
47
+ tuple(
48
+ codec
49
+ if isinstance(codec, Codec)
50
+ else numcodecs.registry.get_codec(codec)
51
+ for codec in args
52
+ ),
53
+ )
54
+
55
+ def encode(self, buf: Buffer) -> bytes:
56
+ """Encode the data in `buf`.
57
+
58
+ Parameters
59
+ ----------
60
+ buf : Buffer
61
+ Data to be encoded. May be any object supporting the new-style
62
+ buffer protocol.
63
+
64
+ Returns
65
+ -------
66
+ enc : bytes
67
+ Encoded and data as a bytestring.
68
+ """
69
+
70
+ if len(self) == 0:
71
+ return buf
72
+
73
+ data = numcodecs.compat.ensure_ndarray(buf)
74
+
75
+ best_size = np.inf
76
+ best_index = None
77
+ best_encoded = None
78
+
79
+ for i, codec in enumerate(self):
80
+ encoded = numcodecs.compat.ensure_ndarray(codec.encode(np.copy(data)))
81
+ assert encoded.dtype == np.dtype("uint8"), (
82
+ f"codec best[{i}] must encode to bytes"
83
+ )
84
+ assert encoded.ndim <= 1, f"codec best[{i}] must encode to 1D bytes"
85
+
86
+ if encoded.nbytes < best_size:
87
+ best_size = encoded.nbytes
88
+ best_index = i
89
+ best_encoded = encoded
90
+
91
+ encoded_index = varint.encode(best_index)
92
+ encoded_bytes = numcodecs.compat.ensure_bytes(best_encoded)
93
+
94
+ if len(self) == 1:
95
+ return encoded_bytes
96
+
97
+ return encoded_index + encoded_bytes
98
+
99
+ def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer:
100
+ """Decode the data in `buf`.
101
+
102
+ Parameters
103
+ ----------
104
+ buf : Buffer
105
+ Encoded data. Must be an object representing a bytestring, e.g.
106
+ [`bytes`][bytes] or a 1D array of [`np.uint8`][numpy.uint8]s etc.
107
+ out : Buffer, optional
108
+ Writeable buffer to store decoded data. N.B. if provided, this buffer must
109
+ be exactly the right size to store the decoded data.
110
+
111
+ Returns
112
+ -------
113
+ dec : Buffer
114
+ Decoded data. May be any object supporting the new-style
115
+ buffer protocol.
116
+ """
117
+
118
+ if len(self) == 0:
119
+ return numcodecs.compat.ndarray_copy(buf, out)
120
+
121
+ b = numcodecs.compat.ensure_bytes(buf)
122
+ b_io = BytesIO(b)
123
+
124
+ if len(self) == 1:
125
+ best_index = 0
126
+ else:
127
+ best_index = varint.decode_stream(b_io)
128
+
129
+ return self[best_index].decode(b_io.read(), out=out)
130
+
131
+ def get_config(self) -> dict:
132
+ """
133
+ Returns the configuration of the best codec combinator.
134
+
135
+ [`numcodecs.registry.get_codec(config)`][numcodecs.registry.get_codec]
136
+ can be used to reconstruct this combinator from the returned config.
137
+
138
+ Returns
139
+ -------
140
+ config : dict
141
+ Configuration of the best codec combinator.
142
+ """
143
+
144
+ return dict(
145
+ id=type(self).codec_id,
146
+ codecs=tuple(codec.get_config() for codec in self),
147
+ )
148
+
149
+ @classmethod
150
+ def from_config(cls, config: dict) -> Self:
151
+ """
152
+ Instantiate the best codec combinator from a configuration [`dict`][dict].
153
+
154
+ Parameters
155
+ ----------
156
+ config : dict
157
+ Configuration of the best codec combinator.
158
+
159
+ Returns
160
+ -------
161
+ best : PickBestCodec
162
+ Instantiated best codec combinator.
163
+ """
164
+
165
+ return cls(*config["codecs"])
166
+
167
+ def __repr__(self) -> str:
168
+ repr = ", ".join(f"{codec!r}" for codec in self)
169
+
170
+ return f"{type(self).__name__}({repr})"
171
+
172
+ def map(self, mapper: Callable[[Codec], Codec]) -> "PickBestCodec":
173
+ """
174
+ Apply the `mapper` to all codecs that are in this combinator.
175
+ In the returned combinator, each codec is replaced by its mapped codec.
176
+
177
+ The `mapper` should recursively apply itself to any inner codecs that
178
+ also implement the [`CodecCombinatorMixin`][numcodecs_combinators.abc.CodecCombinatorMixin]
179
+ mixin.
180
+
181
+ To automatically handle the recursive application as a caller, you can
182
+ use
183
+ ```python
184
+ numcodecs_combinators.map_codec(best, mapper)
185
+ ```
186
+ instead.
187
+
188
+ Parameters
189
+ ----------
190
+ mapper : Callable[[Codec], Codec]
191
+ The callable that should be applied to each codec to map over this
192
+ best codec combinator.
193
+
194
+ Returns
195
+ -------
196
+ mapped : PickBestCodec
197
+ The mapped best codec combinator.
198
+ """
199
+
200
+ return PickBestCodec(*map(mapper, self))
201
+
202
+ def __add__(self, other) -> "PickBestCodec":
203
+ return PickBestCodec(*tuple.__add__(self, other))
204
+
205
+ def __mul__(self, other) -> "PickBestCodec":
206
+ return PickBestCodec(*tuple.__mul__(self, other))
207
+
208
+ def __rmul__(self, other) -> "PickBestCodec":
209
+ return PickBestCodec(*tuple.__rmul__(self, other))
210
+
211
+
212
+ numcodecs.registry.register_codec(PickBestCodec)
@@ -0,0 +1,260 @@
1
+ """
2
+ This module defines the [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack] class, which exposes a framed stack of codecs as a combined codec.
3
+ """
4
+
5
+ __all__ = ["FramedCodecStack"]
6
+
7
+ from io import BytesIO
8
+ from sys import byteorder
9
+ from typing import Callable, Optional
10
+
11
+ import numcodecs
12
+ import numcodecs.compat
13
+ import numcodecs.registry
14
+ import numpy as np
15
+ import varint
16
+ from numcodecs.abc import Codec
17
+ from typing_extensions import Buffer, Self # MSPV 3.12
18
+
19
+ from .abc import CodecCombinatorMixin
20
+
21
+
22
+ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
23
+ """
24
+ A framed stack of codecs, which makes up a combined codec.
25
+
26
+ On encoding, the result of applying the codecs from left to right to encode
27
+ is framed s.t. the data types and shapes of all arrays (input,
28
+ intermediary, encoded) are stored as part of the encoding, which is output
29
+ as a bytestring.
30
+
31
+ On decoding, this framing information is used to apply the codecs from
32
+ right to left to decode into known output data types and shapes.
33
+
34
+ Therefore, the [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack]
35
+ can be used to combine codecs which require knowing the output data type
36
+ and shape during decoding. It can also be used to encode arrays into
37
+ bytestrings.
38
+
39
+ Unlike the [`CodecStack`][numcodecs_combinators.stack.CodecStack], this
40
+ class does *not* provide an additional `encode_decode(buf)` method, since
41
+ it is equivalent to `framed.decode(stack.encode(buf))` due to the framing.
42
+ """
43
+
44
+ __slots__ = ()
45
+
46
+ codec_id: str = "combinators.framed" # type: ignore
47
+
48
+ def __init__(self, *args: dict | Codec):
49
+ pass
50
+
51
+ def __new__(cls, *args: dict | Codec) -> Self:
52
+ return super(FramedCodecStack, cls).__new__(
53
+ cls,
54
+ tuple(
55
+ codec
56
+ if isinstance(codec, Codec)
57
+ else numcodecs.registry.get_codec(codec)
58
+ for codec in args
59
+ ),
60
+ )
61
+
62
+ def encode(self, buf: Buffer) -> bytes:
63
+ """Encode the data in `buf`.
64
+
65
+ Parameters
66
+ ----------
67
+ buf : Buffer
68
+ Data to be encoded. May be any object supporting the new-style
69
+ buffer protocol.
70
+
71
+ Returns
72
+ -------
73
+ enc : bytes
74
+ Encoded and framed data as a bytestring.
75
+ """
76
+
77
+ encoded = buf
78
+ encoded_ndarray = np.asarray(
79
+ numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
80
+ )
81
+
82
+ frames = [(encoded_ndarray.dtype, encoded_ndarray.shape)]
83
+
84
+ for codec in self:
85
+ encoded = codec.encode(encoded_ndarray)
86
+ encoded_ndarray = np.asarray(
87
+ numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
88
+ )
89
+ frames.append((encoded_ndarray.dtype, encoded_ndarray.shape))
90
+
91
+ # convert the encoded array to little endian bytes
92
+ encoded_byteorder = encoded_ndarray.dtype.byteorder
93
+ encoded_byteorder = (
94
+ encoded_byteorder
95
+ if encoded_byteorder in ("<", ">")
96
+ else ("<" if (byteorder == "little") else ">")
97
+ )
98
+ if encoded_byteorder != "<":
99
+ encoded_ndarray = encoded_ndarray.byteswap()
100
+ encoded_bytes = encoded_ndarray.tobytes()
101
+
102
+ message = [varint.encode(len(frames))]
103
+
104
+ for dtype, shape in frames:
105
+ message.append(varint.encode(len(dtype.str)))
106
+ message.append(dtype.str.encode("ascii"))
107
+
108
+ message.append(varint.encode(len(shape)))
109
+ for s in shape:
110
+ message.append(varint.encode(s))
111
+
112
+ message.append(encoded_bytes)
113
+
114
+ return b"".join(message)
115
+
116
+ def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer:
117
+ """Decode the data in `buf`.
118
+
119
+ Parameters
120
+ ----------
121
+ buf : Buffer
122
+ Encoded data. Must be an object representing a bytestring, e.g.
123
+ [`bytes`][bytes] or a 1D array of [`np.uint8`][numpy.uint8]s etc.
124
+ out : Buffer, optional
125
+ Writeable buffer to store decoded data. N.B. if provided, this buffer must
126
+ be exactly the right size to store the decoded data.
127
+
128
+ Returns
129
+ -------
130
+ dec : Buffer
131
+ Decoded data. May be any object supporting the new-style
132
+ buffer protocol.
133
+ """
134
+
135
+ b = numcodecs.compat.ensure_bytes(buf)
136
+
137
+ b_io = BytesIO(b)
138
+
139
+ n_frames = varint.decode_stream(b_io)
140
+ assert n_frames == len(self) + 1, (
141
+ f"encoded data must contain {len(self) + 1} frames, found {n_frames}"
142
+ )
143
+
144
+ frames = []
145
+ for _ in range(n_frames):
146
+ dtype = np.dtype(b_io.read(varint.decode_stream(b_io)).decode("ascii"))
147
+ shape = tuple(
148
+ varint.decode_stream(b_io) for _ in range(varint.decode_stream(b_io))
149
+ )
150
+ frames.append((dtype, shape))
151
+
152
+ # read the decoded array from the little endian bytes
153
+ decoded = np.frombuffer(
154
+ b_io.read(np.prod(shape, dtype=int) * dtype.itemsize),
155
+ dtype=dtype.newbyteorder("<"),
156
+ count=np.prod(shape, dtype=int),
157
+ ).reshape(shape)
158
+ dtype_byteorder = dtype.byteorder
159
+ dtype_byteorder = (
160
+ dtype_byteorder
161
+ if dtype_byteorder in ("<", ">")
162
+ else ("<" if (byteorder == "little") else ">")
163
+ )
164
+ if dtype_byteorder != "<":
165
+ decoded = decoded.byteswap()
166
+
167
+ for codec, (dtype, shape) in zip(reversed(self), frames[:-1][::-1]):
168
+ decoded = (
169
+ codec.decode(
170
+ decoded,
171
+ out=np.empty(shape, dtype),
172
+ )
173
+ .view(dtype)
174
+ .reshape(shape)
175
+ )
176
+
177
+ return numcodecs.compat.ndarray_copy(decoded, out) # type: ignore
178
+
179
+ def get_config(self) -> dict:
180
+ """
181
+ Returns the configuration of the framed codec stack.
182
+
183
+ [`numcodecs.registry.get_codec(config)`][numcodecs.registry.get_codec]
184
+ can be used to reconstruct this stack from the returned config.
185
+
186
+ Returns
187
+ -------
188
+ config : dict
189
+ Configuration of the framed codec stack.
190
+ """
191
+
192
+ return dict(
193
+ id=type(self).codec_id,
194
+ codecs=tuple(codec.get_config() for codec in self),
195
+ )
196
+
197
+ @classmethod
198
+ def from_config(cls, config: dict) -> Self:
199
+ """
200
+ Instantiate the framed codec stack from a configuration [`dict`][dict].
201
+
202
+ Parameters
203
+ ----------
204
+ config : dict
205
+ Configuration of the framed codec stack.
206
+
207
+ Returns
208
+ -------
209
+ stack : FramedCodecStack
210
+ Instantiated framed codec stack.
211
+ """
212
+
213
+ return cls(*config["codecs"])
214
+
215
+ def __repr__(self) -> str:
216
+ repr = ", ".join(f"{codec!r}" for codec in self)
217
+
218
+ return f"{type(self).__name__}({repr})"
219
+
220
+ def map(self, mapper: Callable[[Codec], Codec]) -> "FramedCodecStack":
221
+ """
222
+ Apply the `mapper` to all codecs that are in this framed stack.
223
+ In the returned stack, each codec is replaced by its mapped codec.
224
+
225
+ The `mapper` should recursively apply itself to any inner codecs that
226
+ also implement the [`CodecCombinatorMixin`][numcodecs_combinators.abc.CodecCombinatorMixin]
227
+ mixin.
228
+
229
+ To automatically handle the recursive application as a caller, you can
230
+ use
231
+ ```python
232
+ numcodecs_combinators.map_codec(stack, mapper)
233
+ ```
234
+ instead.
235
+
236
+ Parameters
237
+ ----------
238
+ mapper : Callable[[Codec], Codec]
239
+ The callable that should be applied to each codec to map over this
240
+ framed codec stack.
241
+
242
+ Returns
243
+ -------
244
+ mapped : FramedCodecStack
245
+ The mapped framed codec stack.
246
+ """
247
+
248
+ return FramedCodecStack(*map(mapper, self))
249
+
250
+ def __add__(self, other) -> "FramedCodecStack":
251
+ return FramedCodecStack(*tuple.__add__(self, other))
252
+
253
+ def __mul__(self, other) -> "FramedCodecStack":
254
+ return FramedCodecStack(*tuple.__mul__(self, other))
255
+
256
+ def __rmul__(self, other) -> "FramedCodecStack":
257
+ return FramedCodecStack(*tuple.__rmul__(self, other))
258
+
259
+
260
+ numcodecs.registry.register_codec(FramedCodecStack)
@@ -4,13 +4,13 @@ This module defines the [`CodecStack`][numcodecs_combinators.stack.CodecStack] c
4
4
 
5
5
  __all__ = ["CodecStack"]
6
6
 
7
- from typing import Optional, Callable
8
- from typing_extensions import Buffer, Self # MSPV 3.12
7
+ from typing import Callable, Optional
9
8
 
10
9
  import numcodecs
11
10
  import numcodecs.compat
12
11
  import numcodecs.registry
13
12
  import numpy as np
13
+ from typing_extensions import Buffer, Self # MSPV 3.12
14
14
 
15
15
  try:
16
16
  import xarray as xr
@@ -140,13 +140,17 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
140
140
  buffer protocol.
141
141
  """
142
142
 
143
- encoded = numcodecs.compat.ensure_contiguous_ndarray_like(buf, flatten=False)
143
+ encoded = np.asarray(
144
+ numcodecs.compat.ensure_contiguous_ndarray_like(buf, flatten=False)
145
+ )
144
146
  silhouettes = []
145
147
 
146
148
  for codec in self:
147
- silhouettes.append((encoded.shape, np.dtype(encoded.dtype.name)))
148
- encoded = numcodecs.compat.ensure_contiguous_ndarray_like(
149
- codec.encode((encoded)), flatten=False
149
+ silhouettes.append((encoded.shape, encoded.dtype))
150
+ encoded = np.asarray(
151
+ numcodecs.compat.ensure_contiguous_ndarray_like(
152
+ codec.encode((encoded)), flatten=False
153
+ )
150
154
  )
151
155
 
152
156
  decoded = encoded
@@ -154,7 +158,7 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
154
158
  for codec in reversed(self):
155
159
  shape, dtype = silhouettes.pop()
156
160
  out = np.empty(shape=shape, dtype=dtype)
157
- decoded = codec.decode(decoded, out).reshape(shape)
161
+ decoded = codec.decode(decoded, out).view(dtype).reshape(shape)
158
162
 
159
163
  if isinstance(decoded, type(buf)):
160
164
  return decoded
@@ -163,7 +167,14 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
163
167
 
164
168
  def encode_decode_data_array(self, da: "xr.DataArray") -> "xr.DataArray":
165
169
  """
166
- Encode, then decode the data array in `da`.
170
+ Encode, then decode each chunk (independently) in the data array `da`.
171
+
172
+ Since each chunk is encoded *independently*, this method may cause
173
+ chunk boundary artifacts. Do *not* use this method if the codec
174
+ requires access to the entire data at once or if it needs to access
175
+ a neighbourhood of points across the chunk boundary. In these cases,
176
+ it is preferable to use
177
+ `da.copy(data=stack.encode_decode(da.values))` instead.
167
178
 
168
179
  The encode-decode computation may be deferred until the
169
180
  [`compute`][xarray.DataArray.compute] method is called on the result.
@@ -0,0 +1,85 @@
1
+ import numcodecs
2
+ import numcodecs.compat
3
+ import numpy as np
4
+
5
+ import numcodecs_combinators
6
+ from numcodecs_combinators.best import PickBestCodec
7
+ from numcodecs_combinators.framed import FramedCodecStack
8
+
9
+
10
+ def assert_config_roundtrip(codec: numcodecs.abc.Codec):
11
+ config = codec.get_config()
12
+ codec2 = numcodecs.get_codec(config)
13
+ assert codec2 == codec
14
+
15
+
16
+ def test_init_config():
17
+ best = PickBestCodec()
18
+ assert len(best) == 0
19
+ assert_config_roundtrip(best)
20
+
21
+ best = PickBestCodec(dict(id="zlib", level=9))
22
+ assert len(best) == 1
23
+ assert_config_roundtrip(best)
24
+
25
+ best = PickBestCodec(dict(id="zlib", level=9), numcodecs.CRC32())
26
+ assert len(best) == 2
27
+ assert_config_roundtrip(best)
28
+
29
+
30
+ def test_encode_decode():
31
+ for best in [
32
+ PickBestCodec(),
33
+ PickBestCodec(dict(id="combinators.framed", codecs=[dict(id="zlib", level=9)])),
34
+ PickBestCodec(
35
+ FramedCodecStack(numcodecs.Zlib(level=9)),
36
+ FramedCodecStack(numcodecs.CRC32()),
37
+ ),
38
+ PickBestCodec(
39
+ FramedCodecStack(numcodecs.Zlib(level=9)),
40
+ FramedCodecStack(numcodecs.CRC32()),
41
+ FramedCodecStack(numcodecs.Zstd(level=20)),
42
+ ),
43
+ ]:
44
+ for data in [
45
+ np.zeros(shape=(0,)),
46
+ np.array(3),
47
+ np.array([97, 98, 99], dtype=np.uint8),
48
+ np.linspace(1, 100, 100).reshape(10, 10),
49
+ np.linspace(1, 100, 100).reshape(10, 10).byteswap(),
50
+ ]:
51
+ encoded = best.encode(data)
52
+ if len(best) > 0:
53
+ assert isinstance(encoded, bytes)
54
+ decoded = best.decode(encoded)
55
+ print(best)
56
+ assert np.all(decoded == data)
57
+
58
+
59
+ def test_map():
60
+ best = PickBestCodec(numcodecs.Zlib(level=9), numcodecs.CRC32())
61
+
62
+ mapped = numcodecs_combinators.map_codec(best, lambda c: c)
63
+ assert mapped == best
64
+
65
+ mapped = numcodecs_combinators.map_codec(best, lambda c: PickBestCodec(c))
66
+ assert mapped == PickBestCodec(
67
+ PickBestCodec(
68
+ PickBestCodec(numcodecs.Zlib(level=9)),
69
+ PickBestCodec(numcodecs.CRC32()),
70
+ )
71
+ )
72
+
73
+ mapped = numcodecs_combinators.map_codec(mapped, lambda c: PickBestCodec(c))
74
+ assert mapped == PickBestCodec(
75
+ PickBestCodec(
76
+ PickBestCodec(
77
+ PickBestCodec(
78
+ PickBestCodec(
79
+ PickBestCodec(PickBestCodec(numcodecs.Zlib(level=9)))
80
+ ),
81
+ PickBestCodec(PickBestCodec(PickBestCodec(numcodecs.CRC32()))),
82
+ )
83
+ )
84
+ )
85
+ )
@@ -0,0 +1,75 @@
1
+ import numcodecs
2
+ import numcodecs.compat
3
+ import numpy as np
4
+
5
+ import numcodecs_combinators
6
+ from numcodecs_combinators.framed import FramedCodecStack
7
+
8
+
9
+ def assert_config_roundtrip(codec: numcodecs.abc.Codec):
10
+ config = codec.get_config()
11
+ codec2 = numcodecs.get_codec(config)
12
+ assert codec2 == codec
13
+
14
+
15
+ def test_init_config():
16
+ stack = FramedCodecStack()
17
+ assert len(stack) == 0
18
+ assert_config_roundtrip(stack)
19
+
20
+ stack = FramedCodecStack(dict(id="zlib", level=9))
21
+ assert len(stack) == 1
22
+ assert_config_roundtrip(stack)
23
+
24
+ stack = FramedCodecStack(dict(id="zlib", level=9), numcodecs.CRC32())
25
+ assert len(stack) == 2
26
+ assert_config_roundtrip(stack)
27
+
28
+
29
+ def test_encode_decode():
30
+ for stack in [
31
+ FramedCodecStack(),
32
+ FramedCodecStack(dict(id="zlib", level=9)),
33
+ FramedCodecStack(numcodecs.Zlib(level=9), numcodecs.CRC32()),
34
+ ]:
35
+ for data in [
36
+ b"abc",
37
+ np.array(3),
38
+ np.linspace(1, 100, 100).reshape(10, 10),
39
+ np.linspace(1, 100, 100).reshape(10, 10).byteswap(),
40
+ ]:
41
+ encoded = stack.encode(data)
42
+ assert isinstance(encoded, bytes)
43
+ decoded = stack.decode(encoded)
44
+ assert np.all(decoded == numcodecs.compat.ensure_ndarray_like(data))
45
+
46
+
47
+ def test_map():
48
+ stack = FramedCodecStack(numcodecs.Zlib(level=9), numcodecs.CRC32())
49
+
50
+ mapped = numcodecs_combinators.map_codec(stack, lambda c: c)
51
+ assert mapped == stack
52
+
53
+ mapped = numcodecs_combinators.map_codec(stack, lambda c: FramedCodecStack(c))
54
+ assert mapped == FramedCodecStack(
55
+ FramedCodecStack(
56
+ FramedCodecStack(numcodecs.Zlib(level=9)),
57
+ FramedCodecStack(numcodecs.CRC32()),
58
+ )
59
+ )
60
+
61
+ mapped = numcodecs_combinators.map_codec(mapped, lambda c: FramedCodecStack(c))
62
+ assert mapped == FramedCodecStack(
63
+ FramedCodecStack(
64
+ FramedCodecStack(
65
+ FramedCodecStack(
66
+ FramedCodecStack(
67
+ FramedCodecStack(FramedCodecStack(numcodecs.Zlib(level=9)))
68
+ ),
69
+ FramedCodecStack(
70
+ FramedCodecStack(FramedCodecStack(numcodecs.CRC32()))
71
+ ),
72
+ )
73
+ )
74
+ )
75
+ )