numcodecs-combinators 0.2.6__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/PKG-INFO +3 -1
  2. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/README.md +1 -0
  3. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/docs/index.md +1 -0
  4. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/mkdocs.yml +4 -0
  5. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/pyproject.toml +11 -2
  6. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/src/numcodecs_combinators/__init__.py +6 -1
  7. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/src/numcodecs_combinators/abc.py +1 -2
  8. numcodecs_combinators-0.2.7/src/numcodecs_combinators/framed.py +260 -0
  9. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/src/numcodecs_combinators/stack.py +11 -7
  10. numcodecs_combinators-0.2.7/tests/test_framed.py +75 -0
  11. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/.github/workflows/ci.yml +0 -0
  12. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/.github/workflows/publish.yml +0 -0
  13. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/.gitignore +0 -0
  14. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/.python-version +0 -0
  15. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/.readthedocs.yaml +0 -0
  16. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/LICENSE +0 -0
  17. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/docs/requirements.txt +0 -0
  18. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/src/numcodecs_combinators/py.typed +0 -0
  19. {numcodecs_combinators-0.2.6 → numcodecs_combinators-0.2.7}/tests/test_stack.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: numcodecs-combinators
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: Combinator codecs for the `numcodecs` buffer compression API
5
5
  License: Copyright (c) 2024, Juniper Tyree
6
6
 
@@ -382,6 +382,7 @@ Requires-Python: >=3.10
382
382
  Requires-Dist: numcodecs<0.17,>=0.13.0
383
383
  Requires-Dist: numpy~=2.0
384
384
  Requires-Dist: typing-extensions~=4.6
385
+ Requires-Dist: varint~=1.0
385
386
  Provides-Extra: xarray
386
387
  Requires-Dist: dask>=2024.6; extra == 'xarray'
387
388
  Requires-Dist: xarray>=2024.06; extra == 'xarray'
@@ -400,6 +401,7 @@ Combinator codecs for the [`numcodecs`] buffer compression API.
400
401
  The following combinators, implementing the `CodecCombinatorMixin` are provided:
401
402
 
402
403
  - `CodecStack`: a stack of codecs
404
+ - `FramedCodecStack`: a stack of codecs that is framed with array data type and shape information
403
405
 
404
406
  [`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/
405
407
 
@@ -11,6 +11,7 @@ Combinator codecs for the [`numcodecs`] buffer compression API.
11
11
  The following combinators, implementing the `CodecCombinatorMixin` are provided:
12
12
 
13
13
  - `CodecStack`: a stack of codecs
14
+ - `FramedCodecStack`: a stack of codecs that is framed with array data type and shape information
14
15
 
15
16
  [`numcodecs`]: https://numcodecs.readthedocs.io/en/stable/
16
17
 
@@ -11,6 +11,7 @@ Combinator codecs for the [`numcodecs`][numcodecs] buffer compression API.
11
11
  The following combinators, implementing the [`CodecCombinatorMixin`][numcodecs_combinators.abc.CodecCombinatorMixin] are provided:
12
12
 
13
13
  - [`CodecStack`][numcodecs_combinators.stack.CodecStack]: a stack of codecs
14
+ - [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack]: a stack of codecs that is framed with array data type and shape information
14
15
 
15
16
  ## Funding
16
17
 
@@ -54,5 +54,9 @@ plugins:
54
54
  modules: true
55
55
  inventories:
56
56
  - https://docs.python.org/3.12/objects.inv
57
+ - https://numpy.org/doc/2.2/objects.inv
57
58
  - https://numcodecs.readthedocs.io/en/v0.15.0/objects.inv
58
59
  - https://docs.xarray.dev/en/v2025.01.0/objects.inv
60
+
61
+ watch:
62
+ - src
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "numcodecs-combinators"
7
- version = "0.2.6"
7
+ version = "0.2.7"
8
8
  description = "Combinator codecs for the `numcodecs` buffer compression API"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -13,6 +13,7 @@ dependencies = [
13
13
  "numcodecs>=0.13.0,<0.17",
14
14
  "numpy~=2.0",
15
15
  "typing-extensions~=4.6",
16
+ "varint~=1.0",
16
17
  ]
17
18
  optional-dependencies.xarray = [ "xarray>=2024.06", "dask>=2024.6" ]
18
19
 
@@ -21,6 +22,7 @@ dev = ["mypy~=1.14", "pytest~=8.3"]
21
22
 
22
23
  [project.entry-points."numcodecs.codecs"]
23
24
  "combinators.stack" = "numcodecs_combinators.stack:CodecStack"
25
+ "combinators.framed" = "numcodecs_combinators.framed:FramedCodecStack"
24
26
 
25
27
  [tool.setuptools.packages.find]
26
28
  where = ["src"]
@@ -28,6 +30,13 @@ where = ["src"]
28
30
  [tool.setuptools.package-data]
29
31
  "numcodecs_combinators" = ["py.typed"]
30
32
 
33
+ [tool.ruff.lint]
34
+ select = ["E4", "E7", "E9", "F", "I"]
35
+
36
+ [tool.pytest.ini_options]
37
+ addopts = ["--import-mode=importlib"]
38
+ xfail_strict = true
39
+
31
40
  [[tool.mypy.overrides]]
32
- module = ["numcodecs.*"]
41
+ module = ["numcodecs.*", "varint.*"]
33
42
  follow_untyped_imports = true
@@ -1,9 +1,13 @@
1
1
  """
2
2
  Combinator codecs for the [`numcodecs`][numcodecs] buffer compression API.
3
3
 
4
- The following combinators, implementing the [`CodecCombinatorMixin`][numcodecs_combinators.abc.CodecCombinatorMixin] are provided:
4
+ The following combinators, implementing the
5
+ [`CodecCombinatorMixin`][numcodecs_combinators.abc.CodecCombinatorMixin] are
6
+ provided:
5
7
 
6
8
  - [`CodecStack`][numcodecs_combinators.stack.CodecStack]: a stack of codecs
9
+ - [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack]: a stack
10
+ of codecs that is framed with array data type and shape information
7
11
  """
8
12
 
9
13
  __all__ = ["map_codec"]
@@ -14,6 +18,7 @@ from typing import Callable
14
18
  from numcodecs.abc import Codec
15
19
 
16
20
  from . import abc as abc
21
+ from . import framed as framed
17
22
  from . import stack as stack
18
23
 
19
24
 
@@ -4,12 +4,11 @@ This module defines the [`CodecCombinatorMixin`][numcodecs_combinators.abc.Codec
4
4
 
5
5
  __all__ = ["CodecCombinatorMixin"]
6
6
 
7
+ from abc import ABC, abstractmethod
7
8
  from typing import Callable
8
9
 
9
10
  from numcodecs.abc import Codec
10
11
 
11
- from abc import ABC, abstractmethod
12
-
13
12
 
14
13
  class CodecCombinatorMixin(ABC):
15
14
  """
@@ -0,0 +1,260 @@
1
+ """
2
+ This module defines the [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack] class, which exposes a framed stack of codecs as a combined codec.
3
+ """
4
+
5
+ __all__ = ["FramedCodecStack"]
6
+
7
+ from io import BytesIO
8
+ from sys import byteorder
9
+ from typing import Callable, Optional
10
+
11
+ import numcodecs
12
+ import numcodecs.compat
13
+ import numcodecs.registry
14
+ import numpy as np
15
+ import varint
16
+ from numcodecs.abc import Codec
17
+ from typing_extensions import Buffer, Self # MSPV 3.12
18
+
19
+ from .abc import CodecCombinatorMixin
20
+
21
+
22
+ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
23
+ """
24
+ A framed stack of codecs, which makes up a combined codec.
25
+
26
+ On encoding, the result of applying the codecs from left to right to encode
27
+ is framed s.t. the data types and shapes of all arrays (input,
28
+ intermediary, encoded) are stored as part of the encoding, which is output
29
+ as a bytestring.
30
+
31
+ On deconding, this framing information is used to apply the codecs from
32
+ right to left to decode into known ouput data types and shapes.
33
+
34
+ Therefore, the [`FramedCodecStack`][numcodecs_combinators.framed.FramedCodecStack]
35
+ can be used to combine codecs which require knowing the output data type
36
+ and shape during decoding. It can also be used to encode arrays into
37
+ bytestrings.
38
+
39
+ Unlike the [`CodecStack`][numcodecs_combinators.stack.CodecStack], this
40
+ class does *not* provide an additional `encode_decode(buf)` method, since
41
+ it is equivalent to `framed.decode(stack.encode(buf))` due to the framing.
42
+ """
43
+
44
+ __slots__ = ()
45
+
46
+ codec_id: str = "combinators.framed" # type: ignore
47
+
48
+ def __init__(self, *args: dict | Codec):
49
+ pass
50
+
51
+ def __new__(cls, *args: dict | Codec) -> Self:
52
+ return super(FramedCodecStack, cls).__new__(
53
+ cls,
54
+ tuple(
55
+ codec
56
+ if isinstance(codec, Codec)
57
+ else numcodecs.registry.get_codec(codec)
58
+ for codec in args
59
+ ),
60
+ )
61
+
62
+ def encode(self, buf: Buffer) -> bytes:
63
+ """Encode the data in `buf`.
64
+
65
+ Parameters
66
+ ----------
67
+ buf : Buffer
68
+ Data to be encoded. May be any object supporting the new-style
69
+ buffer protocol.
70
+
71
+ Returns
72
+ -------
73
+ enc : bytes
74
+ Encoded and framed data as a bytestring.
75
+ """
76
+
77
+ encoded = buf
78
+ encoded_ndarray = np.asarray(
79
+ numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
80
+ )
81
+
82
+ frames = [(encoded_ndarray.dtype, encoded_ndarray.shape)]
83
+
84
+ for codec in self:
85
+ encoded = codec.encode(encoded_ndarray)
86
+ encoded_ndarray = np.asarray(
87
+ numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
88
+ )
89
+ frames.append((encoded_ndarray.dtype, encoded_ndarray.shape))
90
+
91
+ # convert the encoded array to little endian bytes
92
+ encoded_byteorder = encoded_ndarray.dtype.byteorder
93
+ encoded_byteorder = (
94
+ encoded_byteorder
95
+ if encoded_byteorder in ("<", ">")
96
+ else ("<" if (byteorder == "little") else ">")
97
+ )
98
+ if encoded_byteorder != "<":
99
+ encoded_ndarray = encoded_ndarray.byteswap()
100
+ encoded_bytes = encoded_ndarray.tobytes()
101
+
102
+ message = [varint.encode(len(frames))]
103
+
104
+ for dtype, shape in frames:
105
+ message.append(varint.encode(len(dtype.str)))
106
+ message.append(dtype.str.encode("ascii"))
107
+
108
+ message.append(varint.encode(len(shape)))
109
+ for s in shape:
110
+ message.append(varint.encode(s))
111
+
112
+ message.append(encoded_bytes)
113
+
114
+ return b"".join(message)
115
+
116
+ def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer:
117
+ """Decode the data in `buf`.
118
+
119
+ Parameters
120
+ ----------
121
+ buf : Buffer
122
+ Encoded data. Must be an object representing a bytestring, e.g.
123
+ [`bytes`][bytes] or a 1D array of [`np.uint8`][numpy.uint8]s etc.
124
+ out : Buffer, optional
125
+ Writeable buffer to store decoded data. N.B. if provided, this buffer must
126
+ be exactly the right size to store the decoded data.
127
+
128
+ Returns
129
+ -------
130
+ dec : Buffer
131
+ Decoded data. May be any object supporting the new-style
132
+ buffer protocol.
133
+ """
134
+
135
+ b = numcodecs.compat.ensure_bytes(buf)
136
+
137
+ b_io = BytesIO(b)
138
+
139
+ n_frames = varint.decode_stream(b_io)
140
+ assert n_frames == len(self) + 1, (
141
+ f"encoded data must contain {len(self) + 1} frames, found {n_frames}"
142
+ )
143
+
144
+ frames = []
145
+ for _ in range(n_frames):
146
+ dtype = np.dtype(b_io.read(varint.decode_stream(b_io)).decode("ascii"))
147
+ shape = tuple(
148
+ varint.decode_stream(b_io) for _ in range(varint.decode_stream(b_io))
149
+ )
150
+ frames.append((dtype, shape))
151
+
152
+ # read the decoded array from the little endian bytes
153
+ decoded = np.frombuffer(
154
+ b_io.read(np.prod(shape, dtype=int) * dtype.itemsize),
155
+ dtype=dtype.newbyteorder("<"),
156
+ count=np.prod(shape, dtype=int),
157
+ ).reshape(shape)
158
+ dtype_byteorder = dtype.byteorder
159
+ dtype_byteorder = (
160
+ dtype_byteorder
161
+ if dtype_byteorder in ("<", ">")
162
+ else ("<" if (byteorder == "little") else ">")
163
+ )
164
+ if dtype_byteorder != "<":
165
+ decoded = decoded.byteswap()
166
+
167
+ for codec, (dtype, shape) in zip(reversed(self), frames[:-1][::-1]):
168
+ decoded = (
169
+ codec.decode(
170
+ decoded,
171
+ out=np.empty(shape, dtype),
172
+ )
173
+ .view(dtype)
174
+ .reshape(shape)
175
+ )
176
+
177
+ return numcodecs.compat.ndarray_copy(decoded, out) # type: ignore
178
+
179
+ def get_config(self) -> dict:
180
+ """
181
+ Returns the configuration of the framed codec stack.
182
+
183
+ [`numcodecs.registry.get_codec(config)`][numcodecs.registry.get_codec]
184
+ can be used to reconstruct this stack from the returned config.
185
+
186
+ Returns
187
+ -------
188
+ config : dict
189
+ Configuration of the framed codec stack.
190
+ """
191
+
192
+ return dict(
193
+ id=type(self).codec_id,
194
+ codecs=tuple(codec.get_config() for codec in self),
195
+ )
196
+
197
+ @classmethod
198
+ def from_config(cls, config: dict) -> Self:
199
+ """
200
+ Instantiate the framed codec stack from a configuration [`dict`][dict].
201
+
202
+ Parameters
203
+ ----------
204
+ config : dict
205
+ Configuration of the framed codec stack.
206
+
207
+ Returns
208
+ -------
209
+ stack : FramedCodecStack
210
+ Instantiated framed codec stack.
211
+ """
212
+
213
+ return cls(*config["codecs"])
214
+
215
+ def __repr__(self) -> str:
216
+ repr = ", ".join(f"{codec!r}" for codec in self)
217
+
218
+ return f"{type(self).__name__}({repr})"
219
+
220
+ def map(self, mapper: Callable[[Codec], Codec]) -> "FramedCodecStack":
221
+ """
222
+ Apply the `mapper` to all codecs that are in this framed stack.
223
+ In the returned stack, each codec is replaced by its mapped codec.
224
+
225
+ The `mapper` should recursively apply itself to any inner codecs that
226
+ also implement the [`CodecCombinatorMixin`][numcodecs_combinators.abc.CodecCombinatorMixin]
227
+ mixin.
228
+
229
+ To automatically handle the recursive application as a caller, you can
230
+ use
231
+ ```python
232
+ numcodecs_combinators.map_codec(stack, mapper)
233
+ ```
234
+ instead.
235
+
236
+ Parameters
237
+ ----------
238
+ mapper : Callable[[Codec], Codec]
239
+ The callable that should be applied to each codec to map over this
240
+ framed codec stack.
241
+
242
+ Returns
243
+ -------
244
+ mapped : FramedCodecStack
245
+ The mapped framed codec stack.
246
+ """
247
+
248
+ return FramedCodecStack(*map(mapper, self))
249
+
250
+ def __add__(self, other) -> "FramedCodecStack":
251
+ return FramedCodecStack(*tuple.__add__(self, other))
252
+
253
+ def __mul__(self, other) -> "FramedCodecStack":
254
+ return FramedCodecStack(*tuple.__mul__(self, other))
255
+
256
+ def __rmul__(self, other) -> "FramedCodecStack":
257
+ return FramedCodecStack(*tuple.__rmul__(self, other))
258
+
259
+
260
+ numcodecs.registry.register_codec(FramedCodecStack)
@@ -4,13 +4,13 @@ This module defines the [`CodecStack`][numcodecs_combinators.stack.CodecStack] c
4
4
 
5
5
  __all__ = ["CodecStack"]
6
6
 
7
- from typing import Optional, Callable
8
- from typing_extensions import Buffer, Self # MSPV 3.12
7
+ from typing import Callable, Optional
9
8
 
10
9
  import numcodecs
11
10
  import numcodecs.compat
12
11
  import numcodecs.registry
13
12
  import numpy as np
13
+ from typing_extensions import Buffer, Self # MSPV 3.12
14
14
 
15
15
  try:
16
16
  import xarray as xr
@@ -140,13 +140,17 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
140
140
  buffer protocol.
141
141
  """
142
142
 
143
- encoded = numcodecs.compat.ensure_contiguous_ndarray_like(buf, flatten=False)
143
+ encoded = np.asarray(
144
+ numcodecs.compat.ensure_contiguous_ndarray_like(buf, flatten=False)
145
+ )
144
146
  silhouettes = []
145
147
 
146
148
  for codec in self:
147
- silhouettes.append((encoded.shape, np.dtype(encoded.dtype.name)))
148
- encoded = numcodecs.compat.ensure_contiguous_ndarray_like(
149
- codec.encode((encoded)), flatten=False
149
+ silhouettes.append((encoded.shape, encoded.dtype))
150
+ encoded = np.asarray(
151
+ numcodecs.compat.ensure_contiguous_ndarray_like(
152
+ codec.encode((encoded)), flatten=False
153
+ )
150
154
  )
151
155
 
152
156
  decoded = encoded
@@ -154,7 +158,7 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
154
158
  for codec in reversed(self):
155
159
  shape, dtype = silhouettes.pop()
156
160
  out = np.empty(shape=shape, dtype=dtype)
157
- decoded = codec.decode(decoded, out).reshape(shape)
161
+ decoded = codec.decode(decoded, out).view(dtype).reshape(shape)
158
162
 
159
163
  if isinstance(decoded, type(buf)):
160
164
  return decoded
@@ -0,0 +1,75 @@
1
+ import numcodecs
2
+ import numcodecs.compat
3
+ import numpy as np
4
+
5
+ import numcodecs_combinators
6
+ from numcodecs_combinators.framed import FramedCodecStack
7
+
8
+
9
+ def assert_config_roundtrip(codec: numcodecs.abc.Codec):
10
+ config = codec.get_config()
11
+ codec2 = numcodecs.get_codec(config)
12
+ assert codec2 == codec
13
+
14
+
15
+ def test_init_config():
16
+ stack = FramedCodecStack()
17
+ assert len(stack) == 0
18
+ assert_config_roundtrip(stack)
19
+
20
+ stack = FramedCodecStack(dict(id="zlib", level=9))
21
+ assert len(stack) == 1
22
+ assert_config_roundtrip(stack)
23
+
24
+ stack = FramedCodecStack(dict(id="zlib", level=9), numcodecs.CRC32())
25
+ assert len(stack) == 2
26
+ assert_config_roundtrip(stack)
27
+
28
+
29
+ def test_encode_decode():
30
+ for stack in [
31
+ FramedCodecStack(),
32
+ FramedCodecStack(dict(id="zlib", level=9)),
33
+ FramedCodecStack(numcodecs.Zlib(level=9), numcodecs.CRC32()),
34
+ ]:
35
+ for data in [
36
+ b"abc",
37
+ np.array(3),
38
+ np.linspace(1, 100, 100).reshape(10, 10),
39
+ np.linspace(1, 100, 100).reshape(10, 10).byteswap(),
40
+ ]:
41
+ encoded = stack.encode(data)
42
+ assert isinstance(encoded, bytes)
43
+ decoded = stack.decode(encoded)
44
+ assert np.all(decoded == numcodecs.compat.ensure_ndarray_like(data))
45
+
46
+
47
+ def test_map():
48
+ stack = FramedCodecStack(numcodecs.Zlib(level=9), numcodecs.CRC32())
49
+
50
+ mapped = numcodecs_combinators.map_codec(stack, lambda c: c)
51
+ assert mapped == stack
52
+
53
+ mapped = numcodecs_combinators.map_codec(stack, lambda c: FramedCodecStack(c))
54
+ assert mapped == FramedCodecStack(
55
+ FramedCodecStack(
56
+ FramedCodecStack(numcodecs.Zlib(level=9)),
57
+ FramedCodecStack(numcodecs.CRC32()),
58
+ )
59
+ )
60
+
61
+ mapped = numcodecs_combinators.map_codec(mapped, lambda c: FramedCodecStack(c))
62
+ assert mapped == FramedCodecStack(
63
+ FramedCodecStack(
64
+ FramedCodecStack(
65
+ FramedCodecStack(
66
+ FramedCodecStack(
67
+ FramedCodecStack(FramedCodecStack(numcodecs.Zlib(level=9)))
68
+ ),
69
+ FramedCodecStack(
70
+ FramedCodecStack(FramedCodecStack(numcodecs.CRC32()))
71
+ ),
72
+ )
73
+ )
74
+ )
75
+ )