numcodecs-combinators 0.2.8__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/PKG-INFO +1 -1
  2. numcodecs_combinators-0.2.10/_typos.toml +2 -0
  3. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/mkdocs.yml +1 -0
  4. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/pyproject.toml +1 -1
  5. numcodecs_combinators-0.2.10/src/numcodecs_combinators/_chunked.py +12 -0
  6. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/src/numcodecs_combinators/abc.py +2 -0
  7. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/src/numcodecs_combinators/best.py +3 -1
  8. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/src/numcodecs_combinators/framed.py +10 -2
  9. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/src/numcodecs_combinators/stack.py +35 -7
  10. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/tests/test_stack.py +34 -0
  11. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/.github/workflows/ci.yml +0 -0
  12. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/.github/workflows/publish.yml +0 -0
  13. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/.gitignore +0 -0
  14. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/.python-version +0 -0
  15. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/.readthedocs.yaml +0 -0
  16. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/LICENSE +0 -0
  17. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/README.md +0 -0
  18. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/docs/index.md +0 -0
  19. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/docs/requirements.txt +0 -0
  20. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/src/numcodecs_combinators/__init__.py +0 -0
  21. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/src/numcodecs_combinators/py.typed +0 -0
  22. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/tests/test_best.py +0 -0
  23. {numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/tests/test_framed.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: numcodecs-combinators
3
- Version: 0.2.8
3
+ Version: 0.2.10
4
4
  Summary: Combinator codecs for the `numcodecs` buffer compression API
5
5
  License: Copyright (c) 2024, Juniper Tyree
6
6
 
@@ -0,0 +1,2 @@
1
+ [default.extend-identifiers]
2
+ ChunkedNdArray = "ChunkedNdArray"
@@ -24,6 +24,7 @@ plugins:
24
24
  source_dirs:
25
25
  - nav_heading: [Documentation]
26
26
  base: src
27
+ ignore: ["_chunked.py"]
27
28
  - exclude:
28
29
  glob:
29
30
  - requirements.txt
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "numcodecs-combinators"
7
- version = "0.2.8"
7
+ version = "0.2.10"
8
8
  description = "Combinator codecs for the `numcodecs` buffer compression API"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+
3
+
4
+ class ChunkedNdArray(np.ndarray):
5
+ __slots__ = ()
6
+
7
+ def __new__(cls, array):
8
+ return np.asarray(array).view(cls)
9
+
10
+ @property
11
+ def chunked(self) -> bool:
12
+ return True
@@ -15,6 +15,8 @@ class CodecCombinatorMixin(ABC):
15
15
  Mixin class for combinators over [`Codec`][numcodecs.abc.Codec]s.
16
16
  """
17
17
 
18
+ __slots__ = ()
19
+
18
20
  @abstractmethod
19
21
  def map(self, mapper: Callable[[Codec], Codec]) -> Codec:
20
22
  """
@@ -70,7 +70,9 @@ class PickBestCodec(Codec, CodecCombinatorMixin, tuple[Codec]):
70
70
  if len(self) == 0:
71
71
  return buf
72
72
 
73
- data = numcodecs.compat.ensure_ndarray(buf)
73
+ data = (
74
+ buf if isinstance(buf, np.ndarray) else numcodecs.compat.ensure_ndarray(buf)
75
+ )
74
76
 
75
77
  best_size = np.inf
76
78
  best_index = None
@@ -16,6 +16,7 @@ import varint
16
16
  from numcodecs.abc import Codec
17
17
  from typing_extensions import Buffer, Self # MSPV 3.12
18
18
 
19
+ from ._chunked import ChunkedNdArray
19
20
  from .abc import CodecCombinatorMixin
20
21
 
21
22
 
@@ -74,6 +75,8 @@ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
74
75
  Encoded and framed data as a bytestring.
75
76
  """
76
77
 
78
+ chunked = getattr(buf, "chunked", False)
79
+
77
80
  encoded = buf
78
81
  encoded_ndarray = np.asarray(
79
82
  numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
@@ -82,7 +85,9 @@ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
82
85
  frames = [(encoded_ndarray.dtype, encoded_ndarray.shape)]
83
86
 
84
87
  for codec in self:
85
- encoded = codec.encode(encoded_ndarray)
88
+ encoded = codec.encode(
89
+ ChunkedNdArray(encoded_ndarray) if chunked else encoded_ndarray
90
+ )
86
91
  encoded_ndarray = np.asarray(
87
92
  numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
88
93
  )
@@ -132,6 +137,8 @@ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
132
137
  buffer protocol.
133
138
  """
134
139
 
140
+ chunked = getattr(out, "chunked", False)
141
+
135
142
  b = numcodecs.compat.ensure_bytes(buf)
136
143
 
137
144
  b_io = BytesIO(b)
@@ -165,10 +172,11 @@ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
165
172
  decoded = decoded.byteswap()
166
173
 
167
174
  for codec, (dtype, shape) in zip(reversed(self), frames[:-1][::-1]):
175
+ empty = np.empty(shape, dtype)
168
176
  decoded = (
169
177
  codec.decode(
170
178
  decoded,
171
- out=np.empty(shape, dtype),
179
+ out=ChunkedNdArray(empty) if chunked else empty,
172
180
  )
173
181
  .view(dtype)
174
182
  .reshape(shape)
@@ -19,6 +19,7 @@ except ImportError:
19
19
 
20
20
  from numcodecs.abc import Codec
21
21
 
22
+ from ._chunked import ChunkedNdArray
22
23
  from .abc import CodecCombinatorMixin
23
24
 
24
25
 
@@ -89,11 +90,22 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
89
90
  protocol.
90
91
  """
91
92
 
93
+ if len(self) == 0:
94
+ return buf
95
+
96
+ chunked = getattr(buf, "chunked", False)
97
+
92
98
  encoded = buf
93
99
  for codec in self:
94
- encoded = codec.encode(
100
+ encoded_ndarray = np.asarray(
95
101
  numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
96
102
  )
103
+ encoded = codec.encode(
104
+ ChunkedNdArray(encoded_ndarray) if chunked else encoded_ndarray
105
+ )
106
+
107
+ if getattr(encoded, "chunked", False):
108
+ return np.array(encoded).view(np.ndarray) # type: ignore
97
109
  return encoded
98
110
 
99
111
  def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer:
@@ -140,6 +152,8 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
140
152
  buffer protocol.
141
153
  """
142
154
 
155
+ chunked = getattr(buf, "chunked", False)
156
+
143
157
  encoded = np.asarray(
144
158
  numcodecs.compat.ensure_contiguous_ndarray_like(buf, flatten=False)
145
159
  )
@@ -149,16 +163,24 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
149
163
  silhouettes.append((encoded.shape, encoded.dtype))
150
164
  encoded = np.asarray(
151
165
  numcodecs.compat.ensure_contiguous_ndarray_like(
152
- codec.encode((encoded)), flatten=False
166
+ codec.encode(ChunkedNdArray(encoded) if chunked else encoded),
167
+ flatten=False,
153
168
  )
154
169
  )
155
170
 
156
- decoded = encoded
171
+ decoded = encoded.view(np.ndarray)
157
172
 
158
173
  for codec in reversed(self):
159
174
  shape, dtype = silhouettes.pop()
160
175
  out = np.empty(shape=shape, dtype=dtype)
161
- decoded = codec.decode(decoded, out).view(dtype).reshape(shape)
176
+ decoded = (
177
+ codec.decode(decoded, ChunkedNdArray(out) if chunked else out)
178
+ .view(dtype)
179
+ .reshape(shape)
180
+ )
181
+
182
+ if getattr(decoded, "chunked", False):
183
+ decoded = decoded.view(np.ndarray)
162
184
 
163
185
  if isinstance(decoded, type(buf)):
164
186
  return decoded
@@ -167,7 +189,8 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
167
189
 
168
190
  def encode_decode_data_array(self, da: "xr.DataArray") -> "xr.DataArray":
169
191
  """
170
- Encode, then decode each chunk (independently) in the data array `da`.
192
+ Encode, then decode the data array `da`. If `da` is chunked, each chunk
193
+ is encoded and decoded *independently*.
171
194
 
172
195
  Since each chunk is encoded *independently*, this method may cause
173
196
  chunk boundary artifacts. Do *not* use this method if the codec
@@ -195,6 +218,9 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
195
218
 
196
219
  import xarray as xr
197
220
 
221
+ if da.chunks is None:
222
+ return da.copy(data=self.encode_decode(da.values)) # type: ignore
223
+
198
224
  def encode_decode_data_array_single_chunk(
199
225
  da: xr.DataArray,
200
226
  ) -> xr.DataArray:
@@ -205,9 +231,11 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
205
231
  return da.copy(deep=False).chunk(single_chunk)
206
232
 
207
233
  # eagerly compute the input chunk and encode and decode it
208
- decoded = self.encode_decode(da.values) # type: ignore
234
+ decoded = self.encode_decode(ChunkedNdArray(da.values)) # type: ignore
209
235
 
210
- return da.copy(deep=False, data=decoded).chunk(single_chunk)
236
+ return da.copy(deep=False, data=np.array(decoded).view(np.ndarray)).chunk(
237
+ single_chunk
238
+ )
211
239
 
212
240
  return xr.map_blocks(encode_decode_data_array_single_chunk, da)
213
241
 
@@ -1,6 +1,7 @@
1
1
  import numcodecs
2
2
  import numpy as np
3
3
  import xarray as xr
4
+ from numcodecs.abc import Codec
4
5
 
5
6
  import numcodecs_combinators
6
7
  from numcodecs_combinators.stack import CodecStack
@@ -51,6 +52,39 @@ def test_encode_decode():
51
52
  assert encoded_decoded.equals(xr.DataArray([1.0, 2.0, 3.0]))
52
53
 
53
54
 
55
+ def test_chunked_encode_decode():
56
+ class CheckChunkedCodec(Codec):
57
+ __slots__ = ("is_chunked",)
58
+ is_chunked: bool
59
+
60
+ def __init__(self, is_chunked: bool):
61
+ self.is_chunked = is_chunked
62
+
63
+ def encode(self, buf):
64
+ assert getattr(buf, "chunked", False) == self.is_chunked
65
+ return buf
66
+
67
+ def decode(self, buf, out=None):
68
+ assert getattr(buf, "chunked", False) is False
69
+ assert getattr(out, "chunked", False) == self.is_chunked
70
+ return numcodecs.compat.ndarray_copy(buf, out)
71
+
72
+ stack = CodecStack(CheckChunkedCodec(False))
73
+
74
+ encoded_decoded = stack.encode_decode(np.array([1.0, 2.0, 3.0]))
75
+ assert np.all(encoded_decoded == np.array([1.0, 2.0, 3.0]))
76
+
77
+ encoded_decoded = stack.encode_decode_data_array(xr.DataArray([1.0, 2.0, 3.0]))
78
+ assert encoded_decoded.equals(xr.DataArray([1.0, 2.0, 3.0]))
79
+
80
+ stack = CodecStack(CheckChunkedCodec(True))
81
+
82
+ encoded_decoded = stack.encode_decode_data_array(
83
+ xr.DataArray([1.0, 2.0, 3.0]).chunk(1)
84
+ )
85
+ assert encoded_decoded.equals(xr.DataArray([1.0, 2.0, 3.0]))
86
+
87
+
54
88
  def test_map():
55
89
  stack = CodecStack(numcodecs.Zlib(level=9), numcodecs.CRC32())
56
90