PyPI - numcodecs-combinators - Versions diffs - 0.2.8__tar.gz → 0.2.10__tar.gz - Mend

numcodecs-combinators 0.2.8tar.gz → 0.2.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: numcodecs-combinators
-Version: 0.2.8
+Version: 0.2.10
 Summary: Combinator codecs for the `numcodecs` buffer compression API
 License: Copyright (c) 2024, Juniper Tyree

numcodecs_combinators-0.2.10/_typos.toml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [default.extend-identifiers]
2	+ ChunkedNdArray = "ChunkedNdArray"

{numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/mkdocs.yml RENAMED Viewed

@@ -24,6 +24,7 @@ plugins:
       source_dirs:
         - nav_heading: [Documentation]
           base: src
+          ignore: ["_chunked.py"]
   - exclude:
       glob:
         - requirements.txt

{numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "numcodecs-combinators"
-version = "0.2.8"
+version = "0.2.10"
 description = "Combinator codecs for the `numcodecs` buffer compression API"
 readme = "README.md"
 license = { file = "LICENSE" }

numcodecs_combinators-0.2.10/src/numcodecs_combinators/_chunked.py ADDED Viewed

@@ -0,0 +1,12 @@
+import numpy as np
+class ChunkedNdArray(np.ndarray):
+    __slots__ = ()
+    def __new__(cls, array):
+        return np.asarray(array).view(cls)
+    @property
+    def chunked(self) -> bool:
+        return True

{numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/src/numcodecs_combinators/abc.py RENAMED Viewed

@@ -15,6 +15,8 @@ class CodecCombinatorMixin(ABC):
     Mixin class for combinators over [`Codec`][numcodecs.abc.Codec]s.
     """
+    __slots__ = ()
     @abstractmethod
     def map(self, mapper: Callable[[Codec], Codec]) -> Codec:
         """

{numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/src/numcodecs_combinators/best.py RENAMED Viewed

@@ -70,7 +70,9 @@ class PickBestCodec(Codec, CodecCombinatorMixin, tuple[Codec]):
         if len(self) == 0:
             return buf
-        data = numcodecs.compat.ensure_ndarray(buf)
+        data = (
+            buf if isinstance(buf, np.ndarray) else numcodecs.compat.ensure_ndarray(buf)
+        )
         best_size = np.inf
         best_index = None

{numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/src/numcodecs_combinators/framed.py RENAMED Viewed

@@ -16,6 +16,7 @@ import varint
 from numcodecs.abc import Codec
 from typing_extensions import Buffer, Self  # MSPV 3.12
+from ._chunked import ChunkedNdArray
 from .abc import CodecCombinatorMixin
@@ -74,6 +75,8 @@ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
             Encoded and framed data as a bytestring.
         """
+        chunked = getattr(buf, "chunked", False)
         encoded = buf
         encoded_ndarray = np.asarray(
             numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
@@ -82,7 +85,9 @@ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
         frames = [(encoded_ndarray.dtype, encoded_ndarray.shape)]
         for codec in self:
-            encoded = codec.encode(encoded_ndarray)
+            encoded = codec.encode(
+                ChunkedNdArray(encoded_ndarray) if chunked else encoded_ndarray
+            )
             encoded_ndarray = np.asarray(
                 numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
             )
@@ -132,6 +137,8 @@ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
             buffer protocol.
         """
+        chunked = getattr(out, "chunked", False)
         b = numcodecs.compat.ensure_bytes(buf)
         b_io = BytesIO(b)
@@ -165,10 +172,11 @@ class FramedCodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
             decoded = decoded.byteswap()
         for codec, (dtype, shape) in zip(reversed(self), frames[:-1][::-1]):
+            empty = np.empty(shape, dtype)
             decoded = (
                 codec.decode(
                     decoded,
-                    out=np.empty(shape, dtype),
+                    out=ChunkedNdArray(empty) if chunked else empty,
                 )
                 .view(dtype)
                 .reshape(shape)

{numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/src/numcodecs_combinators/stack.py RENAMED Viewed

@@ -19,6 +19,7 @@ except ImportError:
 from numcodecs.abc import Codec
+from ._chunked import ChunkedNdArray
 from .abc import CodecCombinatorMixin
@@ -89,11 +90,22 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
             protocol.
         """
+        if len(self) == 0:
+            return buf
+        chunked = getattr(buf, "chunked", False)
         encoded = buf
         for codec in self:
-            encoded = codec.encode(
+            encoded_ndarray = np.asarray(
                 numcodecs.compat.ensure_contiguous_ndarray_like(encoded, flatten=False)
             )
+            encoded = codec.encode(
+                ChunkedNdArray(encoded_ndarray) if chunked else encoded_ndarray
+            )
+        if getattr(encoded, "chunked", False):
+            return np.array(encoded).view(np.ndarray)  # type: ignore
         return encoded
     def decode(self, buf: Buffer, out: Optional[Buffer] = None) -> Buffer:
@@ -140,6 +152,8 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
             buffer protocol.
         """
+        chunked = getattr(buf, "chunked", False)
         encoded = np.asarray(
             numcodecs.compat.ensure_contiguous_ndarray_like(buf, flatten=False)
         )
@@ -149,16 +163,24 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
             silhouettes.append((encoded.shape, encoded.dtype))
             encoded = np.asarray(
                 numcodecs.compat.ensure_contiguous_ndarray_like(
-                    codec.encode((encoded)), flatten=False
+                    codec.encode(ChunkedNdArray(encoded) if chunked else encoded),
+                    flatten=False,
                 )
             )
-        decoded = encoded
+        decoded = encoded.view(np.ndarray)
         for codec in reversed(self):
             shape, dtype = silhouettes.pop()
             out = np.empty(shape=shape, dtype=dtype)
-            decoded = codec.decode(decoded, out).view(dtype).reshape(shape)
+            decoded = (
+                codec.decode(decoded, ChunkedNdArray(out) if chunked else out)
+                .view(dtype)
+                .reshape(shape)
+            )
+        if getattr(decoded, "chunked", False):
+            decoded = decoded.view(np.ndarray)
         if isinstance(decoded, type(buf)):
             return decoded
@@ -167,7 +189,8 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
     def encode_decode_data_array(self, da: "xr.DataArray") -> "xr.DataArray":
         """
-        Encode, then decode each chunk (independently) in the data array `da`.
+        Encode, then decode the data array `da`. If `da` is chunked, each chunk
+        is encoded and decoded *independently*.
         Since each chunk is encoded *independently*, this method may cause
         chunk boundary artifacts. Do *not* use this method if the codec
@@ -195,6 +218,9 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
         import xarray as xr
+        if da.chunks is None:
+            return da.copy(data=self.encode_decode(da.values))  # type: ignore
         def encode_decode_data_array_single_chunk(
             da: xr.DataArray,
         ) -> xr.DataArray:
@@ -205,9 +231,11 @@ class CodecStack(Codec, CodecCombinatorMixin, tuple[Codec]):
                 return da.copy(deep=False).chunk(single_chunk)
             # eagerly compute the input chunk and encode and decode it
-            decoded = self.encode_decode(da.values)  # type: ignore
+            decoded = self.encode_decode(ChunkedNdArray(da.values))  # type: ignore
-            return da.copy(deep=False, data=decoded).chunk(single_chunk)
+            return da.copy(deep=False, data=np.array(decoded).view(np.ndarray)).chunk(
+                single_chunk
+            )
         return xr.map_blocks(encode_decode_data_array_single_chunk, da)

{numcodecs_combinators-0.2.8 → numcodecs_combinators-0.2.10}/tests/test_stack.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import numcodecs
 import numpy as np
 import xarray as xr
+from numcodecs.abc import Codec
 import numcodecs_combinators
 from numcodecs_combinators.stack import CodecStack
@@ -51,6 +52,39 @@ def test_encode_decode():
     assert encoded_decoded.equals(xr.DataArray([1.0, 2.0, 3.0]))
+def test_chunked_encode_decode():
+    class CheckChunkedCodec(Codec):
+        __slots__ = ("is_chunked",)
+        is_chunked: bool
+        def __init__(self, is_chunked: bool):
+            self.is_chunked = is_chunked
+        def encode(self, buf):
+            assert getattr(buf, "chunked", False) == self.is_chunked
+            return buf
+        def decode(self, buf, out=None):
+            assert getattr(buf, "chunked", False) is False
+            assert getattr(out, "chunked", False) == self.is_chunked
+            return numcodecs.compat.ndarray_copy(buf, out)
+    stack = CodecStack(CheckChunkedCodec(False))
+    encoded_decoded = stack.encode_decode(np.array([1.0, 2.0, 3.0]))
+    assert np.all(encoded_decoded == np.array([1.0, 2.0, 3.0]))
+    encoded_decoded = stack.encode_decode_data_array(xr.DataArray([1.0, 2.0, 3.0]))
+    assert encoded_decoded.equals(xr.DataArray([1.0, 2.0, 3.0]))
+    stack = CodecStack(CheckChunkedCodec(True))
+    encoded_decoded = stack.encode_decode_data_array(
+        xr.DataArray([1.0, 2.0, 3.0]).chunk(1)
+    )
+    assert encoded_decoded.equals(xr.DataArray([1.0, 2.0, 3.0]))
 def test_map():
     stack = CodecStack(numcodecs.Zlib(level=9), numcodecs.CRC32())