PyPI - numcodecs - Versions diffs - 0.14.1__cp311-cp311-win_amd64.whl → 0.15.1__cp311-cp311-win_amd64.whl - Mend

numcodecs 0.14.1__cp311-cp311-win_amd64.whl → 0.15.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of numcodecs might be problematic. Click here for more details.

Files changed (44) hide show

numcodecs/__init__.py +43 -46
numcodecs/_shuffle.cp311-win_amd64.pyd +0 -0
numcodecs/abc.py +1 -1
numcodecs/astype.py +2 -6
numcodecs/base64.py +1 -2
numcodecs/blosc.cp311-win_amd64.pyd +0 -0
numcodecs/categorize.py +7 -10
numcodecs/checksum32.py +1 -1
numcodecs/compat_ext.cp311-win_amd64.pyd +0 -0
numcodecs/delta.py +3 -10
numcodecs/errors.py +26 -0
numcodecs/fixedscaleoffset.py +8 -10
numcodecs/fletcher32.cp311-win_amd64.pyd +0 -0
numcodecs/gzip.py +1 -3
numcodecs/jenkins.cp311-win_amd64.pyd +0 -0
numcodecs/json.py +12 -12
numcodecs/lz4.cp311-win_amd64.pyd +0 -0
numcodecs/lzma.py +1 -1
numcodecs/msgpacks.py +6 -6
numcodecs/ndarray_like.py +2 -2
numcodecs/pcodec.py +59 -29
numcodecs/pickles.py +1 -1
numcodecs/quantize.py +9 -11
numcodecs/registry.py +3 -2
numcodecs/tests/common.py +3 -4
numcodecs/tests/test_blosc.py +9 -11
numcodecs/tests/test_lzma.py +1 -1
numcodecs/tests/test_pcodec.py +18 -8
numcodecs/tests/test_registry.py +4 -3
numcodecs/tests/test_shuffle.py +2 -4
numcodecs/tests/test_vlen_bytes.py +3 -0
numcodecs/tests/test_zarr3.py +73 -40
numcodecs/version.py +2 -2
numcodecs/vlen.cp311-win_amd64.pyd +0 -0
numcodecs/zarr3.py +49 -27
numcodecs/zfpy.py +1 -1
numcodecs/zstd.cp311-win_amd64.pyd +0 -0
{numcodecs-0.14.1.dist-info → numcodecs-0.15.1.dist-info}/METADATA +4 -4
numcodecs-0.15.1.dist-info/RECORD +79 -0
{numcodecs-0.14.1.dist-info → numcodecs-0.15.1.dist-info}/WHEEL +1 -1
numcodecs-0.14.1.dist-info/RECORD +0 -78
{numcodecs-0.14.1.dist-info → numcodecs-0.15.1.dist-info}/LICENSE.txt +0 -0
{numcodecs-0.14.1.dist-info → numcodecs-0.15.1.dist-info}/entry_points.txt +0 -0
{numcodecs-0.14.1.dist-info → numcodecs-0.15.1.dist-info}/top_level.txt +0 -0

numcodecs/__init__.py CHANGED Viewed

@@ -36,41 +36,32 @@ from numcodecs.bz2 import BZ2
 register_codec(BZ2)
-with suppress(ImportError):
-    from numcodecs.lzma import LZMA
+from numcodecs.lzma import LZMA
-    register_codec(LZMA)
+register_codec(LZMA)
-with suppress(ImportError):
-    from numcodecs import blosc
-    from numcodecs.blosc import Blosc
-    register_codec(Blosc)
-    # initialize blosc
-    try:
-        ncores = multiprocessing.cpu_count()
-    except OSError:  # pragma: no cover
-        ncores = 1
-    blosc.init()
-    blosc.set_nthreads(min(8, ncores))
-    atexit.register(blosc.destroy)
+from numcodecs import blosc
+from numcodecs.blosc import Blosc
-with suppress(ImportError):
-    from numcodecs import zstd as zstd
-    from numcodecs.zstd import Zstd
+register_codec(Blosc)
+# initialize blosc
+try:
+    ncores = multiprocessing.cpu_count()
+except OSError:  # pragma: no cover
+    ncores = 1
+blosc._init()
+blosc.set_nthreads(min(8, ncores))
+atexit.register(blosc.destroy)
-    register_codec(Zstd)
+from numcodecs import zstd as zstd
+from numcodecs.zstd import Zstd
-with suppress(ImportError):
-    from numcodecs import lz4 as lz4
-    from numcodecs.lz4 import LZ4
+register_codec(Zstd)
-    register_codec(LZ4)
+from numcodecs import lz4 as lz4
+from numcodecs.lz4 import LZ4
-with suppress(ImportError):
-    from numcodecs.zfpy import ZFPY
-    register_codec(ZFPY)
+register_codec(LZ4)
 from numcodecs.astype import AsType
@@ -112,38 +103,44 @@ from numcodecs.bitround import BitRound
 register_codec(BitRound)
-with suppress(ImportError):
-    from numcodecs.msgpacks import MsgPack
-    register_codec(MsgPack)
 from numcodecs.checksum32 import CRC32, Adler32, JenkinsLookup3
 register_codec(CRC32)
 register_codec(Adler32)
 register_codec(JenkinsLookup3)
-with suppress(ImportError):
-    from numcodecs.checksum32 import CRC32C
-    register_codec(CRC32C)
 from numcodecs.json import JSON
 register_codec(JSON)
-with suppress(ImportError):
-    from numcodecs import vlen as vlen
-    from numcodecs.vlen import VLenArray, VLenBytes, VLenUTF8
+from numcodecs import vlen as vlen
+from numcodecs.vlen import VLenArray, VLenBytes, VLenUTF8
-    register_codec(VLenUTF8)
-    register_codec(VLenBytes)
-    register_codec(VLenArray)
+register_codec(VLenUTF8)
+register_codec(VLenBytes)
+register_codec(VLenArray)
 from numcodecs.fletcher32 import Fletcher32
 register_codec(Fletcher32)
-from numcodecs.pcodec import PCodec
+# Optional depenedencies
+with suppress(ImportError):
+    from numcodecs.zfpy import ZFPY
+    register_codec(ZFPY)
+with suppress(ImportError):
+    from numcodecs.msgpacks import MsgPack
+    register_codec(MsgPack)
+with suppress(ImportError):
+    from numcodecs.checksum32 import CRC32C
+    register_codec(CRC32C)
+with suppress(ImportError):
+    from numcodecs.pcodec import PCodec
-register_codec(PCodec)
+    register_codec(PCodec)

numcodecs/_shuffle.cp311-win_amd64.pyd CHANGED Viewed

Binary file

numcodecs/abc.py CHANGED Viewed

@@ -84,7 +84,7 @@ class Codec(ABC):
         # override in sub-class if need special encoding of config values
         # setup config object
-        config = dict(id=self.codec_id)
+        config = {'id': self.codec_id}
         # by default, assume all non-private members are configuration
         # parameters - override this in sub-class if not the case

numcodecs/astype.py CHANGED Viewed

@@ -49,9 +49,7 @@ class AsType(Codec):
         arr = ensure_ndarray(buf).view(self.decode_dtype)
         # convert and copy
-        enc = arr.astype(self.encode_dtype)
-        return enc
+        return arr.astype(self.encode_dtype)
     def decode(self, buf, out=None):
         # normalise input
@@ -61,9 +59,7 @@ class AsType(Codec):
         dec = enc.astype(self.decode_dtype)
         # handle output
-        out = ndarray_copy(dec, out)
-        return out
+        return ndarray_copy(dec, out)
     def get_config(self):
         return {

numcodecs/base64.py CHANGED Viewed

@@ -13,8 +13,7 @@ class Base64(Codec):
         # normalise inputs
         buf = ensure_contiguous_ndarray(buf)
         # do compression
-        compressed = _base64.standard_b64encode(buf)
-        return compressed
+        return _base64.standard_b64encode(buf)
     def decode(self, buf, out=None):
         # normalise inputs

numcodecs/blosc.cp311-win_amd64.pyd CHANGED Viewed

Binary file

numcodecs/categorize.py CHANGED Viewed

@@ -80,18 +80,15 @@ class Categorize(Codec):
             dec[enc == (i + 1)] = label
         # handle output
-        dec = ndarray_copy(dec, out)
-        return dec
+        return ndarray_copy(dec, out)
     def get_config(self):
-        config = dict(
-            id=self.codec_id,
-            labels=self.labels,
-            dtype=self.dtype.str,
-            astype=self.astype.str,
-        )
-        return config
+        return {
+            'id': self.codec_id,
+            'labels': self.labels,
+            'dtype': self.dtype.str,
+            'astype': self.astype.str,
+        }
     def __repr__(self):
         # make sure labels part is not too long

numcodecs/checksum32.py CHANGED Viewed

@@ -13,7 +13,7 @@ from .jenkins import jenkins_lookup3
 _crc32c: Optional[ModuleType] = None
 with suppress(ImportError):
-    import crc32c as _crc32c  # type: ignore[no-redef]
+    import crc32c as _crc32c  # type: ignore[no-redef, unused-ignore]
 if TYPE_CHECKING:  # pragma: no cover
     from typing_extensions import Buffer

numcodecs/compat_ext.cp311-win_amd64.pyd CHANGED Viewed

Binary file

numcodecs/delta.py CHANGED Viewed

@@ -63,12 +63,7 @@ class Delta(Codec):
         enc[0] = arr[0]
         # compute differences
-        # using np.subtract for in-place operations
-        if arr.dtype == bool:
-            np.not_equal(arr[1:], arr[:-1], out=enc[1:])
-        else:
-            np.subtract(arr[1:], arr[:-1], out=enc[1:])
+        enc[1:] = np.diff(arr)
         return enc
     def decode(self, buf, out=None):
@@ -85,13 +80,11 @@ class Delta(Codec):
         np.cumsum(enc, out=dec)
         # handle output
-        out = ndarray_copy(dec, out)
-        return out
+        return ndarray_copy(dec, out)
     def get_config(self):
         # override to handle encoding dtypes
-        return dict(id=self.codec_id, dtype=self.dtype.str, astype=self.astype.str)
+        return {'id': self.codec_id, 'dtype': self.dtype.str, 'astype': self.astype.str}
     def __repr__(self):
         r = f'{type(self).__name__}(dtype={self.dtype.str!r}'

numcodecs/errors.py ADDED Viewed

@@ -0,0 +1,26 @@
+"""
+This module defines custom exceptions that are raised in the `numcodecs` codebase.
+"""
+class UnknownCodecError(ValueError):
+    """
+    An exception that is raised when trying to receive a codec that has not been registered.
+    Parameters
+    ----------
+    codec_id : str
+        Codec identifier.
+    Examples
+    ----------
+    >>> import numcodecs
+    >>> numcodecs.get_codec({"codec_id": "unknown"})
+    Traceback (most recent call last):
+        ...
+    UnknownCodecError: codec not available: 'unknown'
+    """
+    def __init__(self, codec_id: str):
+        self.codec_id = codec_id
+        super().__init__(f"codec not available: '{codec_id}'")

numcodecs/fixedscaleoffset.py CHANGED Viewed

@@ -94,9 +94,7 @@ class FixedScaleOffset(Codec):
         enc = np.around(enc)
         # convert dtype
-        enc = enc.astype(self.astype, copy=False)
-        return enc
+        return enc.astype(self.astype, copy=False)
     def decode(self, buf, out=None):
         # interpret buffer as numpy array
@@ -116,13 +114,13 @@ class FixedScaleOffset(Codec):
     def get_config(self):
         # override to handle encoding dtypes
-        return dict(
-            id=self.codec_id,
-            scale=self.scale,
-            offset=self.offset,
-            dtype=self.dtype.str,
-            astype=self.astype.str,
-        )
+        return {
+            'id': self.codec_id,
+            'scale': self.scale,
+            'offset': self.offset,
+            'dtype': self.dtype.str,
+            'astype': self.astype.str,
+        }
     def __repr__(self):
         r = f'{type(self).__name__}(scale={self.scale}, offset={self.offset}, dtype={self.dtype.str!r}'

numcodecs/fletcher32.cp311-win_amd64.pyd CHANGED Viewed

Binary file

numcodecs/gzip.py CHANGED Viewed

@@ -28,9 +28,7 @@ class GZip(Codec):
         compressed = io.BytesIO()
         with _gzip.GzipFile(fileobj=compressed, mode='wb', compresslevel=self.level) as compressor:
             compressor.write(buf)
-        compressed = compressed.getvalue()
-        return compressed
+        return compressed.getvalue()
     # noinspection PyMethodMayBeStatic
     def decode(self, buf, out=None):

numcodecs/jenkins.cp311-win_amd64.pyd CHANGED Viewed

Binary file

numcodecs/json.py CHANGED Viewed

@@ -52,23 +52,23 @@ class JSON(Codec):
             else:
                 separators = ', ', ': '
         separators = tuple(separators)
-        self._encoder_config = dict(
-            skipkeys=skipkeys,
-            ensure_ascii=ensure_ascii,
-            check_circular=check_circular,
-            allow_nan=allow_nan,
-            indent=indent,
-            separators=separators,
-            sort_keys=sort_keys,
-        )
+        self._encoder_config = {
+            'skipkeys': skipkeys,
+            'ensure_ascii': ensure_ascii,
+            'check_circular': check_circular,
+            'allow_nan': allow_nan,
+            'indent': indent,
+            'separators': separators,
+            'sort_keys': sort_keys,
+        }
         self._encoder = _json.JSONEncoder(**self._encoder_config)
-        self._decoder_config = dict(strict=strict)
+        self._decoder_config = {'strict': strict}
         self._decoder = _json.JSONDecoder(**self._decoder_config)
     def encode(self, buf):
         try:
             buf = np.asarray(buf)
-        except ValueError:
+        except ValueError:  # pragma: no cover
             buf = np.asarray(buf, dtype=object)
         items = np.atleast_1d(buf).tolist()
         items.append(buf.dtype.str)
@@ -89,7 +89,7 @@ class JSON(Codec):
             return dec
     def get_config(self):
-        config = dict(id=self.codec_id, encoding=self._text_encoding)
+        config = {'id': self.codec_id, 'encoding': self._text_encoding}
         config.update(self._encoder_config)
         config.update(self._decoder_config)
         return config

numcodecs/lz4.cp311-win_amd64.pyd CHANGED Viewed

Binary file

numcodecs/lzma.py CHANGED Viewed

@@ -5,7 +5,7 @@ _lzma: Optional[ModuleType] = None
 try:
     import lzma as _lzma
 except ImportError:  # pragma: no cover
-    try:
+    try:  # noqa: SIM105
         from backports import lzma as _lzma  # type: ignore[no-redef]
     except ImportError:
         pass

numcodecs/msgpacks.py CHANGED Viewed

@@ -75,12 +75,12 @@ class MsgPack(Codec):
             return dec
     def get_config(self):
-        return dict(
-            id=self.codec_id,
-            raw=self.raw,
-            use_single_float=self.use_single_float,
-            use_bin_type=self.use_bin_type,
-        )
+        return {
+            'id': self.codec_id,
+            'raw': self.raw,
+            'use_single_float': self.use_single_float,
+            'use_bin_type': self.use_bin_type,
+        }
     def __repr__(self):
         return f'MsgPack(raw={self.raw!r}, use_bin_type={self.use_bin_type!r}, use_single_float={self.use_single_float!r})'

numcodecs/ndarray_like.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, ClassVar, Optional, Protocol, runtime_checkable
+from typing import Any, ClassVar, Protocol, runtime_checkable
 class _CachedProtocolMeta(Protocol.__class__):  # type: ignore[name-defined]
@@ -53,7 +53,7 @@ class NDArrayLike(Protocol, metaclass=_CachedProtocolMeta):
     def __setitem__(self, key, value): ...  # pragma: no cover
-    def tobytes(self, order: Optional[str] = ...) -> bytes: ...  # pragma: no cover
+    def tobytes(self, order: str | None = ...) -> bytes: ...  # pragma: no cover
     def reshape(self, *shape: int, order: str = ...) -> "NDArrayLike": ...  # pragma: no cover

numcodecs/pcodec.py CHANGED Viewed

@@ -1,13 +1,8 @@
-from typing import Literal, Optional
+from typing import Literal
 from numcodecs.abc import Codec
 from numcodecs.compat import ensure_contiguous_ndarray
-try:
-    from pcodec import ChunkConfig, ModeSpec, PagingSpec, standalone
-except ImportError:  # pragma: no cover
-    standalone = None
+from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
 DEFAULT_MAX_PAGE_N = 262144
@@ -27,14 +22,21 @@ class PCodec(Codec):
     level : int
         A compression level from 0-12, where 12 take the longest and compresses
         the most.
-    delta_encoding_order : init or None
-        Either a delta encoding level from 0-7 or None. If set to None, pcodec
-        will try to infer the optimal delta encoding order.
-    mode_spec : {'auto', 'classic'}
+    mode_spec : {"auto", "classic"}
         Configures whether Pcodec should try to infer the best "mode" or
         structure of the data (e.g. approximate multiples of 0.1) to improve
         compression ratio, or skip this step and just use the numbers as-is
-        (Classic mode).
+        (Classic mode). Note that the "try*" specs are not currently supported.
+    delta_spec : {"auto", "none", "try_consecutive", "try_lookback"}
+        Configures the delta encoding strategy. By default, uses "auto" which
+        will try to infer the best encoding order.
+    paging_spec : {"equal_pages_up_to"}
+        Configures the paging strategy. Only "equal_pages_up_to" is currently
+        supported.
+    delta_encoding_order : int or None
+        Explicit delta encoding level from 0-7. Only valid if delta_spec is
+        "try_consecutive" or "auto" (to support backwards compatibility with
+        older versions of this codec).
     equal_pages_up_to : int
         Divide the chunk into equal pages of up to this many numbers.
     """
@@ -44,39 +46,67 @@ class PCodec(Codec):
     def __init__(
         self,
         level: int = 8,
-        delta_encoding_order: Optional[int] = None,
-        equal_pages_up_to: int = 262144,
-        # TODO one day, add support for the Try* mode specs
-        mode_spec: Literal['auto', 'classic'] = 'auto',
+        *,
+        mode_spec: Literal["auto", "classic"] = "auto",
+        delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] = "auto",
+        paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
+        delta_encoding_order: int | None = None,
+        equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
     ):
-        if standalone is None:  # pragma: no cover
-            raise ImportError("pcodec must be installed to use the PCodec codec.")
         # note that we use `level` instead of `compression_level` to
         # match other codecs
         self.level = level
+        self.mode_spec = mode_spec
+        self.delta_spec = delta_spec
+        self.paging_spec = paging_spec
         self.delta_encoding_order = delta_encoding_order
         self.equal_pages_up_to = equal_pages_up_to
-        self.mode_spec = mode_spec
-    def encode(self, buf):
-        buf = ensure_contiguous_ndarray(buf)
+    def _get_chunk_config(self):
         match self.mode_spec:
-            case 'auto':
+            case "auto":
                 mode_spec = ModeSpec.auto()
-            case 'classic':
+            case "classic":
                 mode_spec = ModeSpec.classic()
             case _:
-                raise ValueError(f"unknown value for mode_spec: {self.mode_spec}")
-        paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
+                raise ValueError(f"mode_spec {self.mode_spec} is not supported")
+        if self.delta_encoding_order is not None and self.delta_spec == "auto":
+            # backwards compat for before delta_spec was introduced
+            delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
+        elif self.delta_encoding_order is not None and self.delta_spec != "try_consecutive":
+            raise ValueError(
+                "delta_encoding_order can only be set for delta_spec='try_consecutive'"
+            )
+        else:
+            match self.delta_spec:
+                case "auto":
+                    delta_spec = DeltaSpec.auto()
+                case "none":
+                    delta_spec = DeltaSpec.none()
+                case "try_consecutive":
+                    delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
+                case "try_lookback":
+                    delta_spec = DeltaSpec.try_lookback()
+                case _:
+                    raise ValueError(f"delta_spec {self.delta_spec} is not supported")
+        match self.paging_spec:
+            case "equal_pages_up_to":
+                paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
+            case _:
+                raise ValueError(f"paging_spec {self.paging_spec} is not supported")
-        config = ChunkConfig(
+        return ChunkConfig(
             compression_level=self.level,
-            delta_encoding_order=self.delta_encoding_order,
+            delta_spec=delta_spec,
             mode_spec=mode_spec,
             paging_spec=paging_spec,
         )
+    def encode(self, buf):
+        buf = ensure_contiguous_ndarray(buf)
+        config = self._get_chunk_config()
         return standalone.simple_compress(buf, config)
     def decode(self, buf, out=None):

numcodecs/pickles.py CHANGED Viewed

@@ -49,7 +49,7 @@ class Pickle(Codec):
             return dec
     def get_config(self):
-        return dict(id=self.codec_id, protocol=self.protocol)
+        return {'id': self.codec_id, 'protocol': self.protocol}
     def __repr__(self):
         return f'Pickle(protocol={self.protocol})'

numcodecs/quantize.py CHANGED Viewed

@@ -65,17 +65,15 @@ class Quantize(Codec):
         precision = 10.0**-self.digits
         exp = math.log10(precision)
         if exp < 0:
-            exp = int(math.floor(exp))
+            exp = math.floor(exp)
         else:
-            exp = int(math.ceil(exp))
+            exp = math.ceil(exp)
         bits = math.ceil(math.log2(10.0**-exp))
         scale = 2.0**bits
         enc = np.around(scale * arr) / scale
         # cast dtype
-        enc = enc.astype(self.astype, copy=False)
-        return enc
+        return enc.astype(self.astype, copy=False)
     def decode(self, buf, out=None):
         # filter is lossy, decoding is no-op
@@ -85,12 +83,12 @@ class Quantize(Codec):
     def get_config(self):
         # override to handle encoding dtypes
-        return dict(
-            id=self.codec_id,
-            digits=self.digits,
-            dtype=self.dtype.str,
-            astype=self.astype.str,
-        )
+        return {
+            'id': self.codec_id,
+            'digits': self.digits,
+            'dtype': self.dtype.str,
+            'astype': self.astype.str,
+        }
     def __repr__(self):
         r = f'{type(self).__name__}(digits={self.digits}, dtype={self.dtype.str!r}'

numcodecs/registry.py CHANGED Viewed

@@ -5,10 +5,11 @@ import logging
 from importlib.metadata import EntryPoints, entry_points
 from numcodecs.abc import Codec
+from numcodecs.errors import UnknownCodecError
 logger = logging.getLogger("numcodecs")
 codec_registry: dict[str, Codec] = {}
-entries: dict[str, "EntryPoints"] = {}
+entries: dict[str, EntryPoints] = {}
 def run_entrypoints():
@@ -50,7 +51,7 @@ def get_codec(config):
         register_codec(cls, codec_id=codec_id)
     if cls:
         return cls.from_config(config)
-    raise ValueError(f'codec not available: {codec_id!r}')
+    raise UnknownCodecError(f"{codec_id!r}")
 def register_codec(cls, codec_id=None):

numcodecs/tests/common.py CHANGED Viewed

@@ -7,8 +7,7 @@ import numpy as np
 import pytest
 from numpy.testing import assert_array_almost_equal, assert_array_equal
-# star import needed for repr tests so eval finds names
-from numcodecs import *  # noqa: F403
+from numcodecs import *  # noqa: F403  # for eval to find names in repr tests
 from numcodecs.compat import ensure_bytes, ensure_ndarray
 from numcodecs.registry import get_codec
@@ -19,9 +18,9 @@ greetings = [
     'Hei maailma!',
     'Xin chào thế giới',
     'Njatjeta Botë!',
-    'Γεια σου κόσμε!',
+    'Γεια σου κόσμε!',  # noqa: RUF001
     'こんにちは世界',
-    '世界，你好！',
+    '世界，你好！',  # noqa: RUF001
     'Helló, világ!',
     'Zdravo svete!',
     'เฮลโลเวิลด์',