cast-value 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cast_value/__init__.py +10 -0
- cast_value/_version.py +24 -0
- cast_value/_version.pyi +2 -0
- cast_value/core.py +247 -0
- cast_value/py.typed +0 -0
- cast_value/types.py +28 -0
- cast_value/zarr_compat/__init__.py +14 -0
- cast_value/zarr_compat/v1/__init__.py +23 -0
- cast_value/zarr_compat/v1/_base.py +234 -0
- cast_value/zarr_compat/v1/numpy_codec.py +34 -0
- cast_value/zarr_compat/v1/rust_codec.py +59 -0
- cast_value-0.1.0.dist-info/METADATA +69 -0
- cast_value-0.1.0.dist-info/RECORD +15 -0
- cast_value-0.1.0.dist-info/WHEEL +4 -0
- cast_value-0.1.0.dist-info/licenses/LICENSE +19 -0
cast_value/__init__.py
ADDED
cast_value/_version.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# file generated by vcs-versioning
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"__version__",
|
|
7
|
+
"__version_tuple__",
|
|
8
|
+
"version",
|
|
9
|
+
"version_tuple",
|
|
10
|
+
"__commit_id__",
|
|
11
|
+
"commit_id",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
version: str
|
|
15
|
+
__version__: str
|
|
16
|
+
__version_tuple__: tuple[int | str, ...]
|
|
17
|
+
version_tuple: tuple[int | str, ...]
|
|
18
|
+
commit_id: str | None
|
|
19
|
+
__commit_id__: str | None
|
|
20
|
+
|
|
21
|
+
__version__ = version = '0.1.0'
|
|
22
|
+
__version_tuple__ = version_tuple = (0, 1, 0)
|
|
23
|
+
|
|
24
|
+
__commit_id__ = commit_id = None
|
cast_value/_version.pyi
ADDED
cast_value/core.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Literal
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from cast_value.types import (
|
|
9
|
+
MapEntry,
|
|
10
|
+
OutOfRangeMode,
|
|
11
|
+
RoundingMode,
|
|
12
|
+
ScalarMapJSON,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def apply_scalar_map(work: np.ndarray, entries: list[MapEntry]) -> None:
|
|
17
|
+
"""Apply scalar map entries in-place. Single pass per entry."""
|
|
18
|
+
for src, tgt in entries:
|
|
19
|
+
if isinstance(src, (float, np.floating)) and np.isnan(src):
|
|
20
|
+
mask = np.isnan(work)
|
|
21
|
+
else:
|
|
22
|
+
mask = work == src
|
|
23
|
+
work[mask] = tgt
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def round_inplace(arr: np.ndarray, mode: RoundingMode) -> np.ndarray:
|
|
27
|
+
"""Round array, returning result (may or may not be a new array).
|
|
28
|
+
|
|
29
|
+
For nearest-away, requires 3 numpy ops. All others are a single op.
|
|
30
|
+
"""
|
|
31
|
+
match mode:
|
|
32
|
+
case "nearest-even":
|
|
33
|
+
return np.rint(arr) # type: ignore [no-any-return]
|
|
34
|
+
case "towards-zero":
|
|
35
|
+
return np.trunc(arr) # type: ignore [no-any-return]
|
|
36
|
+
case "towards-positive":
|
|
37
|
+
return np.ceil(arr) # type: ignore [no-any-return]
|
|
38
|
+
case "towards-negative":
|
|
39
|
+
return np.floor(arr) # type: ignore [no-any-return]
|
|
40
|
+
case "nearest-away":
|
|
41
|
+
return np.sign(arr) * np.floor(np.abs(arr) + 0.5) # type: ignore [no-any-return]
|
|
42
|
+
msg = f"Unknown rounding mode: {mode}"
|
|
43
|
+
raise ValueError(msg)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def cast_array(
|
|
47
|
+
arr: np.ndarray,
|
|
48
|
+
*,
|
|
49
|
+
target_dtype: np.dtype,
|
|
50
|
+
rounding_mode: RoundingMode,
|
|
51
|
+
out_of_range_mode: OutOfRangeMode | None,
|
|
52
|
+
scalar_map_entries: list[MapEntry] | None,
|
|
53
|
+
) -> np.ndarray:
|
|
54
|
+
"""Cast an array to target_dtype with rounding, out-of-range, and scalar_map handling.
|
|
55
|
+
|
|
56
|
+
Optimized to minimize allocations and passes over the data.
|
|
57
|
+
For the simple case (no scalar_map, no rounding needed, no out-of-range),
|
|
58
|
+
this is essentially just ``arr.astype(target_dtype)``.
|
|
59
|
+
|
|
60
|
+
All casts are performed under ``np.errstate(over='raise', invalid='raise')``
|
|
61
|
+
so that numpy overflow or invalid-value warnings become hard errors instead
|
|
62
|
+
of being silently swallowed.
|
|
63
|
+
"""
|
|
64
|
+
with np.errstate(over="raise", invalid="raise"):
|
|
65
|
+
return _cast_array_impl(
|
|
66
|
+
arr,
|
|
67
|
+
target_dtype=target_dtype,
|
|
68
|
+
rounding=rounding_mode,
|
|
69
|
+
out_of_range=out_of_range_mode,
|
|
70
|
+
scalar_map_entries=scalar_map_entries,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def check_int_range(
|
|
75
|
+
work: np.ndarray,
|
|
76
|
+
*,
|
|
77
|
+
target_dtype: np.dtype,
|
|
78
|
+
out_of_range: OutOfRangeMode | None,
|
|
79
|
+
) -> np.ndarray:
|
|
80
|
+
"""Check integer range and apply out-of-range handling, then cast."""
|
|
81
|
+
info = np.iinfo(target_dtype)
|
|
82
|
+
lo, hi = int(info.min), int(info.max)
|
|
83
|
+
w_min, w_max = int(work.min()), int(work.max())
|
|
84
|
+
if w_min >= lo and w_max <= hi:
|
|
85
|
+
return work.astype(target_dtype)
|
|
86
|
+
match out_of_range:
|
|
87
|
+
case "clamp":
|
|
88
|
+
return np.clip(work, lo, hi).astype(target_dtype)
|
|
89
|
+
case "wrap":
|
|
90
|
+
range_size = hi - lo + 1
|
|
91
|
+
return ((work.astype(np.int64) - lo) % range_size + lo).astype(target_dtype)
|
|
92
|
+
case None:
|
|
93
|
+
oor_vals = work[(work < lo) | (work > hi)]
|
|
94
|
+
msg = (
|
|
95
|
+
f"Values out of range for {target_dtype} (valid range: [{lo}, {hi}]), "
|
|
96
|
+
f"got values in [{w_min}, {w_max}]. "
|
|
97
|
+
f"Out-of-range values: {oor_vals.ravel()!r}. "
|
|
98
|
+
f"Set out_of_range='clamp' or out_of_range='wrap' to handle this."
|
|
99
|
+
)
|
|
100
|
+
raise ValueError(msg)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _cast_float(
|
|
104
|
+
arr: np.ndarray,
|
|
105
|
+
target_dtype: np.dtype,
|
|
106
|
+
rounding: RoundingMode,
|
|
107
|
+
) -> np.ndarray:
|
|
108
|
+
"""Cast a float (or int) array to a float target dtype, respecting the rounding mode.
|
|
109
|
+
|
|
110
|
+
numpy's ``astype`` always uses nearest-even. For other rounding modes we
|
|
111
|
+
detect which values lost precision and correct them by choosing between
|
|
112
|
+
the two adjacent representable values in the target dtype.
|
|
113
|
+
"""
|
|
114
|
+
result = arr.astype(target_dtype)
|
|
115
|
+
|
|
116
|
+
if rounding == "nearest-even":
|
|
117
|
+
return result
|
|
118
|
+
|
|
119
|
+
# Widen source to a float type so we can compare. For integer sources,
|
|
120
|
+
# float64 is the widest available; for float sources, keep the original dtype.
|
|
121
|
+
wide_dtype = np.float64 if np.issubdtype(arr.dtype, np.integer) else arr.dtype
|
|
122
|
+
|
|
123
|
+
wide_src = arr.astype(wide_dtype)
|
|
124
|
+
roundtrip = result.astype(wide_dtype)
|
|
125
|
+
inexact = roundtrip != wide_src
|
|
126
|
+
|
|
127
|
+
# Skip NaN/Inf — they are exact in any float type that supports them.
|
|
128
|
+
if np.issubdtype(wide_dtype, np.floating):
|
|
129
|
+
inexact &= np.isfinite(wide_src)
|
|
130
|
+
|
|
131
|
+
if not inexact.any():
|
|
132
|
+
return result
|
|
133
|
+
|
|
134
|
+
# For inexact values, ``result`` holds the nearest-even candidate.
|
|
135
|
+
# The other adjacent representable value is one ULP towards the original.
|
|
136
|
+
ne = result[inexact] # nearest-even candidates
|
|
137
|
+
src = wide_src[inexact] # original values in wide dtype
|
|
138
|
+
ne_wide = ne.astype(wide_dtype)
|
|
139
|
+
|
|
140
|
+
# If nearest-even rounded up (ne > original), the other candidate is one ULP lower.
|
|
141
|
+
# If nearest-even rounded down (ne < original), the other candidate is one ULP higher.
|
|
142
|
+
toward = np.where(ne_wide > src, np.float64(-np.inf), np.float64(np.inf)).astype(
|
|
143
|
+
target_dtype
|
|
144
|
+
)
|
|
145
|
+
other = np.nextafter(ne, toward)
|
|
146
|
+
other_wide = other.astype(wide_dtype)
|
|
147
|
+
|
|
148
|
+
match rounding:
|
|
149
|
+
case "towards-zero":
|
|
150
|
+
use_other = np.abs(other_wide) < np.abs(ne_wide)
|
|
151
|
+
case "towards-positive":
|
|
152
|
+
use_other = other_wide > ne_wide
|
|
153
|
+
case "towards-negative":
|
|
154
|
+
use_other = other_wide < ne_wide
|
|
155
|
+
case "nearest-away":
|
|
156
|
+
use_other = np.abs(other_wide) > np.abs(ne_wide)
|
|
157
|
+
|
|
158
|
+
corrected = result.copy()
|
|
159
|
+
indices = np.where(inexact)[0]
|
|
160
|
+
corrected[indices[use_other]] = other[use_other]
|
|
161
|
+
return corrected
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _cast_array_impl(
|
|
165
|
+
arr: np.ndarray,
|
|
166
|
+
*,
|
|
167
|
+
target_dtype: np.dtype,
|
|
168
|
+
rounding: RoundingMode,
|
|
169
|
+
out_of_range: OutOfRangeMode | None,
|
|
170
|
+
scalar_map_entries: list[MapEntry] | None,
|
|
171
|
+
) -> np.ndarray:
|
|
172
|
+
src_type: Literal["int", "float"] = (
|
|
173
|
+
"int" if np.issubdtype(arr.dtype, np.integer) else "float"
|
|
174
|
+
)
|
|
175
|
+
tgt_type: Literal["int", "float"] = (
|
|
176
|
+
"int" if np.issubdtype(target_dtype, np.integer) else "float"
|
|
177
|
+
)
|
|
178
|
+
has_map = bool(scalar_map_entries)
|
|
179
|
+
|
|
180
|
+
match (src_type, tgt_type, has_map):
|
|
181
|
+
# float→float or int→float without scalar_map
|
|
182
|
+
case (_, "float", False):
|
|
183
|
+
return _cast_float(arr, target_dtype, rounding)
|
|
184
|
+
|
|
185
|
+
# int→float with scalar_map — widen to float64, apply map, cast
|
|
186
|
+
case ("int", "float", True):
|
|
187
|
+
assert scalar_map_entries is not None
|
|
188
|
+
work = arr.astype(np.float64)
|
|
189
|
+
apply_scalar_map(work, scalar_map_entries)
|
|
190
|
+
return _cast_float(work, target_dtype, rounding)
|
|
191
|
+
|
|
192
|
+
# float→float with scalar_map — copy, apply map, cast
|
|
193
|
+
case ("float", "float", True):
|
|
194
|
+
assert scalar_map_entries is not None
|
|
195
|
+
work = arr.copy()
|
|
196
|
+
apply_scalar_map(work, scalar_map_entries)
|
|
197
|
+
return _cast_float(work, target_dtype, rounding)
|
|
198
|
+
|
|
199
|
+
# int→int without scalar_map — range check then astype
|
|
200
|
+
case ("int", "int", False):
|
|
201
|
+
if arr.dtype.itemsize > target_dtype.itemsize or arr.dtype != target_dtype:
|
|
202
|
+
return check_int_range(
|
|
203
|
+
arr, target_dtype=target_dtype, out_of_range=out_of_range
|
|
204
|
+
)
|
|
205
|
+
return arr.astype(target_dtype)
|
|
206
|
+
|
|
207
|
+
# int→int with scalar_map — widen to int64, apply map, range check
|
|
208
|
+
case ("int", "int", True):
|
|
209
|
+
assert scalar_map_entries is not None
|
|
210
|
+
work = arr.astype(np.int64)
|
|
211
|
+
apply_scalar_map(work, scalar_map_entries)
|
|
212
|
+
return check_int_range(
|
|
213
|
+
work, target_dtype=target_dtype, out_of_range=out_of_range
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# float→int (with or without scalar_map) — rounding + range check
|
|
217
|
+
case ("float", "int", _):
|
|
218
|
+
work = arr.astype(np.float64) if arr.dtype != np.float64 else arr.copy()
|
|
219
|
+
|
|
220
|
+
if scalar_map_entries:
|
|
221
|
+
apply_scalar_map(work, scalar_map_entries)
|
|
222
|
+
|
|
223
|
+
bad = np.isnan(work) | np.isinf(work)
|
|
224
|
+
if bad.any():
|
|
225
|
+
msg = "Cannot cast NaN or Infinity to integer type without scalar_map"
|
|
226
|
+
raise ValueError(msg)
|
|
227
|
+
|
|
228
|
+
work = round_inplace(work, rounding)
|
|
229
|
+
return check_int_range(
|
|
230
|
+
work, target_dtype=target_dtype, out_of_range=out_of_range
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
msg = f"Unhandled type combination: src={src_type}, tgt={tgt_type}" # pragma: no cover
|
|
234
|
+
raise AssertionError(msg) # pragma: no cover
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def extract_raw_map(
|
|
238
|
+
data: ScalarMapJSON | None, direction: str
|
|
239
|
+
) -> dict[str, str] | None:
|
|
240
|
+
"""Extract raw string mapping from scalar_map JSON for 'encode' or 'decode'."""
|
|
241
|
+
if data is None:
|
|
242
|
+
return None
|
|
243
|
+
raw: dict[str, str] = {}
|
|
244
|
+
pairs = data.get(direction, [])
|
|
245
|
+
for src, tgt in pairs: # type: ignore[attr-defined]
|
|
246
|
+
raw[str(src)] = str(tgt)
|
|
247
|
+
return raw or None
|
cast_value/py.typed
ADDED
|
File without changes
|
cast_value/types.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Literal, NotRequired, TypeAlias, TypedDict
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
NumericScalar: TypeAlias = np.integer | np.floating
|
|
8
|
+
|
|
9
|
+
RoundingMode = Literal[
|
|
10
|
+
"nearest-even",
|
|
11
|
+
"towards-zero",
|
|
12
|
+
"towards-positive",
|
|
13
|
+
"towards-negative",
|
|
14
|
+
"nearest-away",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
OutOfRangeMode = Literal["clamp", "wrap"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ScalarMapJSON(TypedDict):
|
|
21
|
+
"""JSON representation of the scalar_map codec configuration field."""
|
|
22
|
+
|
|
23
|
+
encode: NotRequired[list[tuple[object, object]]]
|
|
24
|
+
decode: NotRequired[list[tuple[object, object]]]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Pre-parsed scalar map entry: (source_scalar, target_scalar)
|
|
28
|
+
MapEntry = tuple[NumericScalar, NumericScalar]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
import zarr # noqa: F401
|
|
5
|
+
except ImportError as e:
|
|
6
|
+
_MSG = (
|
|
7
|
+
"The 'zarr' package is required to use cast_value.zarr_compat. "
|
|
8
|
+
"Install it with: pip install 'cast-value[zarr]'"
|
|
9
|
+
)
|
|
10
|
+
raise ImportError(_MSG) from e
|
|
11
|
+
|
|
12
|
+
from cast_value.zarr_compat.v1 import CastValue, CastValueNumpy, CastValueRust
|
|
13
|
+
|
|
14
|
+
__all__ = ["CastValue", "CastValueNumpy", "CastValueRust"]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""cast_value codec implementations for the current zarr-python codec API.
|
|
2
|
+
|
|
3
|
+
Everything in this package depends on zarr-python's ``ArrayArrayCodec`` ABC
|
|
4
|
+
and related types. When zarr-python ships a new codec interface, a ``v2``
|
|
5
|
+
package can be added alongside this one.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from cast_value.zarr_compat.v1._base import CastValueBase, parse_map_entries
|
|
11
|
+
from cast_value.zarr_compat.v1.numpy_codec import CastValueNumpy
|
|
12
|
+
from cast_value.zarr_compat.v1.rust_codec import CastValueRust
|
|
13
|
+
|
|
14
|
+
# Backwards-compatible alias
|
|
15
|
+
CastValue = CastValueNumpy
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"CastValue",
|
|
19
|
+
"CastValueBase",
|
|
20
|
+
"CastValueNumpy",
|
|
21
|
+
"CastValueRust",
|
|
22
|
+
"parse_map_entries",
|
|
23
|
+
]
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""Base class and helpers for the cast_value zarr codec (v1 API)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, replace
|
|
6
|
+
from typing import TYPE_CHECKING, cast
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from zarr.abc.codec import ArrayArrayCodec
|
|
10
|
+
from zarr.core.common import JSON, parse_named_configuration
|
|
11
|
+
from zarr.core.dtype import get_data_type_from_json
|
|
12
|
+
|
|
13
|
+
from cast_value.core import extract_raw_map
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from typing import Self
|
|
17
|
+
|
|
18
|
+
from zarr.core.array_spec import ArraySpec
|
|
19
|
+
from zarr.core.buffer import NDBuffer
|
|
20
|
+
from zarr.core.chunk_grids import ChunkGrid
|
|
21
|
+
from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
|
|
22
|
+
|
|
23
|
+
from cast_value.types import (
|
|
24
|
+
MapEntry,
|
|
25
|
+
OutOfRangeMode,
|
|
26
|
+
RoundingMode,
|
|
27
|
+
ScalarMapJSON,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def parse_map_entries(
|
|
32
|
+
mapping: dict[str, str],
|
|
33
|
+
src_dtype: ZDType[TBaseDType, TBaseScalar],
|
|
34
|
+
tgt_dtype: ZDType[TBaseDType, TBaseScalar],
|
|
35
|
+
) -> list[MapEntry]:
|
|
36
|
+
"""Pre-parse a scalar map dict into a list of (src, tgt) tuples.
|
|
37
|
+
|
|
38
|
+
Each entry's source value is deserialized using ``src_dtype`` and its target
|
|
39
|
+
value using ``tgt_dtype``, preserving full precision for both data types.
|
|
40
|
+
"""
|
|
41
|
+
entries: list[MapEntry] = []
|
|
42
|
+
for src_str, tgt_str in mapping.items():
|
|
43
|
+
src = src_dtype.from_json_scalar(src_str, zarr_format=3)
|
|
44
|
+
tgt = tgt_dtype.from_json_scalar(tgt_str, zarr_format=3)
|
|
45
|
+
entries.append(cast("MapEntry", (src, tgt)))
|
|
46
|
+
return entries
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass(frozen=True)
|
|
50
|
+
class CastValueBase(ArrayArrayCodec):
|
|
51
|
+
"""Base class for cast-value array-to-array codecs.
|
|
52
|
+
|
|
53
|
+
Subclasses must implement ``_cast_array`` to provide the actual
|
|
54
|
+
array casting logic. Everything else — metadata handling, validation,
|
|
55
|
+
serialization — is shared.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
data_type : str
|
|
60
|
+
Target zarr v3 data type name (e.g. "uint8", "float32").
|
|
61
|
+
rounding : RoundingMode
|
|
62
|
+
How to round when exact representation is impossible. Default is "nearest-even".
|
|
63
|
+
out_of_range : OutOfRangeMode or None
|
|
64
|
+
What to do when a value is outside the target's range.
|
|
65
|
+
None means error. "clamp" clips to range. "wrap" uses modular arithmetic
|
|
66
|
+
(only valid for integer types).
|
|
67
|
+
scalar_map : dict or None
|
|
68
|
+
Explicit value overrides as JSON: {"encode": [[src, tgt], ...], "decode": [[src, tgt], ...]}.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
is_fixed_size = True
|
|
72
|
+
|
|
73
|
+
dtype: ZDType[TBaseDType, TBaseScalar]
|
|
74
|
+
rounding: RoundingMode
|
|
75
|
+
out_of_range: OutOfRangeMode | None
|
|
76
|
+
scalar_map: ScalarMapJSON | None
|
|
77
|
+
|
|
78
|
+
def __init__(
|
|
79
|
+
self,
|
|
80
|
+
*,
|
|
81
|
+
data_type: str | ZDType[TBaseDType, TBaseScalar],
|
|
82
|
+
rounding: RoundingMode = "nearest-even",
|
|
83
|
+
out_of_range: OutOfRangeMode | None = None,
|
|
84
|
+
scalar_map: ScalarMapJSON | None = None,
|
|
85
|
+
) -> None:
|
|
86
|
+
if isinstance(data_type, str):
|
|
87
|
+
dtype = get_data_type_from_json(data_type, zarr_format=3)
|
|
88
|
+
else:
|
|
89
|
+
dtype = data_type
|
|
90
|
+
object.__setattr__(self, "dtype", dtype)
|
|
91
|
+
object.__setattr__(self, "rounding", rounding)
|
|
92
|
+
object.__setattr__(self, "out_of_range", out_of_range)
|
|
93
|
+
object.__setattr__(self, "scalar_map", scalar_map)
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def from_dict(cls, data: dict[str, JSON]) -> Self:
|
|
97
|
+
_, configuration_parsed = parse_named_configuration(
|
|
98
|
+
data, "cast_value", require_configuration=True
|
|
99
|
+
)
|
|
100
|
+
return cls(**configuration_parsed) # type: ignore[arg-type] # ty: ignore[invalid-argument-type]
|
|
101
|
+
|
|
102
|
+
def to_dict(self) -> dict[str, JSON]:
|
|
103
|
+
config: dict[str, JSON] = {
|
|
104
|
+
"data_type": cast("JSON", self.dtype.to_json(zarr_format=3))
|
|
105
|
+
}
|
|
106
|
+
if self.rounding != "nearest-even":
|
|
107
|
+
config["rounding"] = self.rounding
|
|
108
|
+
if self.out_of_range is not None:
|
|
109
|
+
config["out_of_range"] = self.out_of_range
|
|
110
|
+
if self.scalar_map is not None:
|
|
111
|
+
config["scalar_map"] = cast("JSON", self.scalar_map)
|
|
112
|
+
return {"name": "cast_value", "configuration": config}
|
|
113
|
+
|
|
114
|
+
def validate(
|
|
115
|
+
self,
|
|
116
|
+
*,
|
|
117
|
+
shape: tuple[int, ...], # noqa: ARG002
|
|
118
|
+
dtype: ZDType[TBaseDType, TBaseScalar],
|
|
119
|
+
chunk_grid: ChunkGrid, # noqa: ARG002
|
|
120
|
+
) -> None:
|
|
121
|
+
source_native = dtype.to_native_dtype()
|
|
122
|
+
target_native = self.dtype.to_native_dtype()
|
|
123
|
+
for label, dt in [("source", source_native), ("target", target_native)]:
|
|
124
|
+
if not np.issubdtype(dt, np.integer) and not np.issubdtype(dt, np.floating):
|
|
125
|
+
msg = (
|
|
126
|
+
f"cast_value codec only supports integer and floating-point data types. "
|
|
127
|
+
f"Got {label} dtype {dt}."
|
|
128
|
+
)
|
|
129
|
+
raise ValueError(msg)
|
|
130
|
+
if self.out_of_range == "wrap" and not np.issubdtype(target_native, np.integer):
|
|
131
|
+
msg = "out_of_range='wrap' is only valid for integer target types."
|
|
132
|
+
raise ValueError(msg)
|
|
133
|
+
|
|
134
|
+
def _cast_array(
|
|
135
|
+
self,
|
|
136
|
+
arr: np.ndarray,
|
|
137
|
+
*,
|
|
138
|
+
target_dtype: np.dtype,
|
|
139
|
+
scalar_map_entries: list[MapEntry] | None,
|
|
140
|
+
) -> np.ndarray:
|
|
141
|
+
"""Cast *arr* to *target_dtype*. Subclasses must override this."""
|
|
142
|
+
raise NotImplementedError
|
|
143
|
+
|
|
144
|
+
def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
|
|
145
|
+
target_zdtype = self.dtype
|
|
146
|
+
target_native = target_zdtype.to_native_dtype()
|
|
147
|
+
source_native = chunk_spec.dtype.to_native_dtype()
|
|
148
|
+
|
|
149
|
+
fill = chunk_spec.fill_value
|
|
150
|
+
fill_arr = np.array([fill], dtype=source_native)
|
|
151
|
+
|
|
152
|
+
encode_raw = extract_raw_map(self.scalar_map, "encode")
|
|
153
|
+
encode_entries = (
|
|
154
|
+
parse_map_entries(encode_raw, chunk_spec.dtype, self.dtype)
|
|
155
|
+
if encode_raw
|
|
156
|
+
else None
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
new_fill_arr = self._cast_array(
|
|
160
|
+
fill_arr,
|
|
161
|
+
target_dtype=target_native,
|
|
162
|
+
scalar_map_entries=encode_entries,
|
|
163
|
+
)
|
|
164
|
+
new_fill = target_native.type(new_fill_arr[0])
|
|
165
|
+
|
|
166
|
+
return replace(chunk_spec, dtype=target_zdtype, fill_value=new_fill)
|
|
167
|
+
|
|
168
|
+
def _encode_sync(
|
|
169
|
+
self,
|
|
170
|
+
chunk_array: NDBuffer,
|
|
171
|
+
_chunk_spec: ArraySpec,
|
|
172
|
+
) -> NDBuffer | None:
|
|
173
|
+
arr = chunk_array.as_ndarray_like()
|
|
174
|
+
target_native = self.dtype.to_native_dtype()
|
|
175
|
+
|
|
176
|
+
encode_raw = extract_raw_map(self.scalar_map, "encode")
|
|
177
|
+
encode_entries = (
|
|
178
|
+
parse_map_entries(encode_raw, _chunk_spec.dtype, self.dtype)
|
|
179
|
+
if encode_raw
|
|
180
|
+
else None
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
result = self._cast_array(
|
|
184
|
+
np.asarray(arr),
|
|
185
|
+
target_dtype=target_native,
|
|
186
|
+
scalar_map_entries=encode_entries,
|
|
187
|
+
)
|
|
188
|
+
return chunk_array.__class__.from_ndarray_like(result) # ty: ignore[invalid-argument-type]
|
|
189
|
+
|
|
190
|
+
async def _encode_single(
|
|
191
|
+
self,
|
|
192
|
+
chunk_data: NDBuffer,
|
|
193
|
+
chunk_spec: ArraySpec,
|
|
194
|
+
) -> NDBuffer | None:
|
|
195
|
+
return self._encode_sync(chunk_data, chunk_spec)
|
|
196
|
+
|
|
197
|
+
def _decode_sync(
|
|
198
|
+
self,
|
|
199
|
+
chunk_array: NDBuffer,
|
|
200
|
+
chunk_spec: ArraySpec,
|
|
201
|
+
) -> NDBuffer:
|
|
202
|
+
arr = chunk_array.as_ndarray_like()
|
|
203
|
+
target_native = chunk_spec.dtype.to_native_dtype()
|
|
204
|
+
|
|
205
|
+
decode_raw = extract_raw_map(self.scalar_map, "decode")
|
|
206
|
+
decode_entries = (
|
|
207
|
+
parse_map_entries(decode_raw, self.dtype, chunk_spec.dtype)
|
|
208
|
+
if decode_raw
|
|
209
|
+
else None
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
result = self._cast_array(
|
|
213
|
+
np.asarray(arr),
|
|
214
|
+
target_dtype=target_native,
|
|
215
|
+
scalar_map_entries=decode_entries,
|
|
216
|
+
)
|
|
217
|
+
return chunk_array.__class__.from_ndarray_like(result) # ty: ignore[invalid-argument-type]
|
|
218
|
+
|
|
219
|
+
async def _decode_single(
|
|
220
|
+
self,
|
|
221
|
+
chunk_data: NDBuffer,
|
|
222
|
+
chunk_spec: ArraySpec,
|
|
223
|
+
) -> NDBuffer:
|
|
224
|
+
return self._decode_sync(chunk_data, chunk_spec)
|
|
225
|
+
|
|
226
|
+
def compute_encoded_size(
|
|
227
|
+
self, input_byte_length: int, chunk_spec: ArraySpec
|
|
228
|
+
) -> int:
|
|
229
|
+
source_itemsize = chunk_spec.dtype.to_native_dtype().itemsize
|
|
230
|
+
target_itemsize = self.dtype.to_native_dtype().itemsize
|
|
231
|
+
if source_itemsize == 0: # pragma: no cover
|
|
232
|
+
return 0
|
|
233
|
+
num_elements = input_byte_length // source_itemsize
|
|
234
|
+
return num_elements * target_itemsize
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Cast-value codec backed by the pure-numpy implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from cast_value.core import cast_array
|
|
9
|
+
from cast_value.zarr_compat.v1._base import CastValueBase
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
from cast_value.types import MapEntry
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True, init=False)
|
|
18
|
+
class CastValueNumpy(CastValueBase):
|
|
19
|
+
"""Cast-value codec backed by the pure-numpy implementation."""
|
|
20
|
+
|
|
21
|
+
def _cast_array(
|
|
22
|
+
self,
|
|
23
|
+
arr: np.ndarray,
|
|
24
|
+
*,
|
|
25
|
+
target_dtype: np.dtype,
|
|
26
|
+
scalar_map_entries: list[MapEntry] | None,
|
|
27
|
+
) -> np.ndarray:
|
|
28
|
+
return cast_array(
|
|
29
|
+
arr,
|
|
30
|
+
target_dtype=target_dtype,
|
|
31
|
+
rounding_mode=self.rounding,
|
|
32
|
+
out_of_range_mode=self.out_of_range,
|
|
33
|
+
scalar_map_entries=scalar_map_entries,
|
|
34
|
+
)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Cast-value codec backed by the cast-value-rs Rust implementation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from cast_value.zarr_compat.v1._base import CastValueBase
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from cast_value.types import MapEntry
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _dtype_to_str(dtype: np.dtype) -> str:
|
|
17
|
+
"""Convert a numpy dtype to the string name expected by cast-value-rs."""
|
|
18
|
+
return dtype.name
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _convert_scalar_map(
|
|
22
|
+
entries: list[MapEntry] | None,
|
|
23
|
+
) -> list[tuple[int | float, int | float]] | None:
|
|
24
|
+
"""Convert scalar map entries to plain Python types for cast-value-rs.
|
|
25
|
+
|
|
26
|
+
cast-value-rs accepts Python int/float but rejects np.floating scalars.
|
|
27
|
+
"""
|
|
28
|
+
if entries is None:
|
|
29
|
+
return None
|
|
30
|
+
result: list[tuple[int | float, int | float]] = []
|
|
31
|
+
for src, tgt in entries:
|
|
32
|
+
src_py: int | float = int(src) if isinstance(src, np.integer) else float(src)
|
|
33
|
+
tgt_py: int | float = int(tgt) if isinstance(tgt, np.integer) else float(tgt)
|
|
34
|
+
result.append((src_py, tgt_py))
|
|
35
|
+
return result
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True, init=False)
|
|
39
|
+
class CastValueRust(CastValueBase):
|
|
40
|
+
"""Cast-value codec backed by the cast-value-rs Rust implementation."""
|
|
41
|
+
|
|
42
|
+
def _cast_array(
|
|
43
|
+
self,
|
|
44
|
+
arr: np.ndarray,
|
|
45
|
+
*,
|
|
46
|
+
target_dtype: np.dtype,
|
|
47
|
+
scalar_map_entries: list[MapEntry] | None,
|
|
48
|
+
) -> np.ndarray:
|
|
49
|
+
from cast_value_rs import ( # noqa: PLC0415 # pylint: disable=import-outside-toplevel
|
|
50
|
+
cast_array as rs_cast_array,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return rs_cast_array(
|
|
54
|
+
arr=arr,
|
|
55
|
+
target_dtype=_dtype_to_str(target_dtype), # ty: ignore[invalid-argument-type]
|
|
56
|
+
rounding_mode=self.rounding,
|
|
57
|
+
out_of_range_mode=self.out_of_range,
|
|
58
|
+
scalar_map_entries=_convert_scalar_map(scalar_map_entries),
|
|
59
|
+
)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cast-value
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python implementation of the `cast_value` codec.
|
|
5
|
+
Project-URL: Homepage, https://github.com/zarr-developers/cast-value
|
|
6
|
+
Project-URL: Bug Tracker, https://github.com/zarr-developers/cast-value/issues
|
|
7
|
+
Project-URL: Discussions, https://github.com/zarr-developers/cast-value/discussions
|
|
8
|
+
Project-URL: Changelog, https://github.com/zarr-developers/cast-value/releases
|
|
9
|
+
Author-email: Davis Bennett <davis.v.bennett@gmail.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Classifier: Development Status :: 1 - Planning
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.11
|
|
26
|
+
Requires-Dist: numpy>=2.4.3
|
|
27
|
+
Requires-Dist: zarr>=3.1.6
|
|
28
|
+
Provides-Extra: rs
|
|
29
|
+
Requires-Dist: cast-value-rs; extra == 'rs'
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
# cast-value
|
|
33
|
+
|
|
34
|
+
[![Actions Status][actions-badge]][actions-link]
|
|
35
|
+
[![Documentation Status][rtd-badge]][rtd-link]
|
|
36
|
+
|
|
37
|
+
[![PyPI version][pypi-version]][pypi-link]
|
|
38
|
+
[![Conda-Forge][conda-badge]][conda-link]
|
|
39
|
+
[![PyPI platforms][pypi-platforms]][pypi-link]
|
|
40
|
+
|
|
41
|
+
[![GitHub Discussion][github-discussions-badge]][github-discussions-link]
|
|
42
|
+
|
|
43
|
+
[![Coverage][coverage-badge]][coverage-link]
|
|
44
|
+
|
|
45
|
+
<!-- prettier-ignore-start -->
|
|
46
|
+
[actions-badge]: https://github.com/zarr-developers/cast-value/actions/workflows/ci.yml/badge.svg
|
|
47
|
+
[actions-link]: https://github.com/zarr-developers/cast-value/actions
|
|
48
|
+
[conda-badge]: https://img.shields.io/conda/vn/conda-forge/cast-value
|
|
49
|
+
[conda-link]: https://github.com/conda-forge/cast-value-feedstock
|
|
50
|
+
[github-discussions-badge]: https://img.shields.io/static/v1?label=Discussions&message=Ask&color=blue&logo=github
|
|
51
|
+
[github-discussions-link]: https://github.com/zarr-developers/cast-value/discussions
|
|
52
|
+
[pypi-link]: https://pypi.org/project/cast-value/
|
|
53
|
+
[pypi-platforms]: https://img.shields.io/pypi/pyversions/cast-value
|
|
54
|
+
[pypi-version]: https://img.shields.io/pypi/v/cast-value
|
|
55
|
+
[rtd-badge]: https://readthedocs.org/projects/cast-value/badge/?version=latest
|
|
56
|
+
[rtd-link]: https://cast-value.readthedocs.io/en/latest/?badge=latest
|
|
57
|
+
[coverage-badge]: https://codecov.io/github/zarr-developers/cast-value/branch/main/graph/badge.svg
|
|
58
|
+
[coverage-link]: https://codecov.io/github/zarr-developers/cast-value
|
|
59
|
+
|
|
60
|
+
<!-- prettier-ignore-end -->
|
|
61
|
+
|
|
62
|
+
# cast-value.py
|
|
63
|
+
|
|
64
|
+
Python implementation of the `cast_value` codec for Zarr.
|
|
65
|
+
|
|
66
|
+
## `cast_value` codec
|
|
67
|
+
|
|
68
|
+
The `cast_value` codec defines an operation for safely converting an array from
|
|
69
|
+
one numeric data type to another.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
cast_value/__init__.py,sha256=wOsAQnva6XpiepVMwm8gJ3ifXxytE1FgjJnDpp5bp7M,233
|
|
2
|
+
cast_value/_version.py,sha256=n_5vdJsPNu7wZ57LGuRL585uvll-hiuvZUBWzdG0RQU,520
|
|
3
|
+
cast_value/_version.pyi,sha256=o7uNL6MhuJoiqpEnriU7rBT6TmkJZA-i2qMoNz9YcgQ,82
|
|
4
|
+
cast_value/core.py,sha256=gjKClyuMhTvGao45HySphi9g7OjsVKdoBb8DOjqG5Gs,9039
|
|
5
|
+
cast_value/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
cast_value/types.py,sha256=jqTBgWv1LAD5JQb2saeU9qcXZSU9cVTig4zeuioam8k,676
|
|
7
|
+
cast_value/zarr_compat/__init__.py,sha256=y6F55EMdekcuzKGka-6mWcu28nbJZj5adgMc7qGY64w,421
|
|
8
|
+
cast_value/zarr_compat/v1/__init__.py,sha256=amFI9R0jO8NjZBsb6N0CRNyJghhcaatNRNEIGhMA9o0,691
|
|
9
|
+
cast_value/zarr_compat/v1/_base.py,sha256=LGpKW1sVgQOD7dHCCgj_62xOmf7Qq0HhHSkRnwqKSxM,8242
|
|
10
|
+
cast_value/zarr_compat/v1/numpy_codec.py,sha256=vCvfA1gj58mF7FL0Z4E4GFNw5wyGhtihTtmWveA5CtU,904
|
|
11
|
+
cast_value/zarr_compat/v1/rust_codec.py,sha256=ZaAR8iRakIm9j8oDnmHnAGAp3G6xgdZwEU_MGORRaG4,1881
|
|
12
|
+
cast_value-0.1.0.dist-info/METADATA,sha256=LqPLoMA1sPRy7yFUw5jIo_f5Dym_bKIrUgenssnRVGk,3095
|
|
13
|
+
cast_value-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
14
|
+
cast_value-0.1.0.dist-info/licenses/LICENSE,sha256=-ScGXcCIbt9h97OPndFxUZOn2Dkk9C-PQGAw4kH1Uf0,1053
|
|
15
|
+
cast_value-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Copyright 2026 Davis Bennett
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
4
|
+
this software and associated documentation files (the "Software"), to deal in
|
|
5
|
+
the Software without restriction, including without limitation the rights to
|
|
6
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
7
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
|
8
|
+
so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|