PyPI - ocf-data-sampler - Versions diffs - 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl - Mend

ocf-data-sampler 0.5.5py3-none-any.whl → 0.5.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (9) hide show

ocf_data_sampler/load/nwp/providers/utils.py CHANGED Viewed

@@ -4,8 +4,7 @@ from glob import glob
 import xarray as xr
-from ocf_data_sampler.load.open_tensorstore_zarrs import open_zarrs
-from ocf_data_sampler.load.xarray_tensorstore import open_zarr
+from ocf_data_sampler.load.open_xarray_tensorstore import open_zarr, open_zarrs
 def open_zarr_paths(

ocf_data_sampler/load/open_xarray_tensorstore.py ADDED Viewed

@@ -0,0 +1,167 @@
+"""Utilities for loading TensorStore data into Xarray.
+This module uses and adapts internal functions from the Google xarray-tensorstore project [1],
+licensed under the Apache License, Version 2.0. See [2] for details.
+Modifications copyright 2025 Open climate Fix. Licensed under the MIT License.
+Modifications from the original include:
+- Adding support for opening multiple zarr files as a single xarray object
+- Support for zarr 3 -> https://github.com/google/xarray-tensorstore/pull/22
+References:
+    [1] https://github.com/google-research/tensorstore/blob/main/tensorstore/xarray.py
+    [2] https://www.apache.org/licenses/LICENSE-2.0
+"""
+import os.path
+import re
+import tensorstore as ts
+import xarray as xr
+import zarr
+from xarray_tensorstore import (
+    _DEFAULT_STORAGE_DRIVER,
+    _raise_if_mask_and_scale_used_for_data_vars,
+    _TensorStoreAdapter,
+)
+def _zarr_spec_from_path(path: str, zarr_format: int) -> ...:
+    if re.match(r"\w+\://", path):  # path is a URI
+      kv_store = path
+    else:
+      kv_store = {"driver": _DEFAULT_STORAGE_DRIVER, "path": path}
+    return {"driver": f"zarr{zarr_format}", "kvstore": kv_store}
+def _get_data_variable_array_futures(
+    path: str,
+    context: ts.Context | None,
+    variables: list[str],
+) -> dict[ts.Future]:
+    """Open all data variables in a zarr group and return futures.
+    Args:
+        path: path or URI to zarr group to open.
+        context: TensorStore configuration options to use when opening arrays.
+        variables: The variables in the zarr groupto open.
+    """
+    zarr_format = zarr.open(path).metadata.zarr_format
+    specs = {k: _zarr_spec_from_path(os.path.join(path, k), zarr_format) for k in variables}
+    return {k: ts.open(spec, read=True, write=False, context=context) for k, spec in specs.items()}
+def _tensorstore_open_zarrs(
+    paths: list[str],
+    data_vars: list[str],
+    concat_axes: list[int],
+    context: ts.Context,
+) -> dict[str, ts.TensorStore]:
+    """Open multiple zarrs with TensorStore.
+    Args:
+        paths: List of paths to zarr stores.
+        data_vars: List of data variable names to open.
+        concat_axes: List of axes along which to concatenate the data variables.
+        context: TensorStore context.
+    """
+    # Open all the variables from all the datasets - returned as futures
+    arrays_list: list[dict[str, ts.Future]] = []
+    for path in paths:
+        arrays_list.append(_get_data_variable_array_futures(path, context, data_vars))
+    # Wait for the async open operations
+    arrays_list = [{k: v.result() for k, v in arrays.items()} for arrays in arrays_list]
+    # Concatenate each of the variables along the required axis
+    arrays = {}
+    for k, axis in zip(data_vars, concat_axes, strict=True):
+        variable_arrays = [d[k] for d in arrays_list]
+        arrays[k] = ts.concat(variable_arrays, axis=axis)
+    return arrays
+def open_zarr(
+    path: str,
+    context: ts.Context | None = None,
+    mask_and_scale: bool = True,
+) -> xr.Dataset:
+    """Open an xarray.Dataset from zarr using TensorStore.
+    Args:
+        path: path or URI to zarr group to open.
+        context: TensorStore configuration options to use when opening arrays.
+        mask_and_scale: if True (default), attempt to apply masking and scaling like
+          xarray.open_zarr(). This is only supported for coordinate variables and
+          otherwise will raise an error.
+    Returns:
+        Dataset with all data variables opened via TensorStore.
+    """
+    if context is None:
+        context = ts.Context()
+    # Avoid using dask by settung `chunks=None`
+    ds = xr.open_zarr(path, chunks=None, mask_and_scale=mask_and_scale)
+    if mask_and_scale:
+        _raise_if_mask_and_scale_used_for_data_vars(ds)
+    # Open all data variables using tensorstore - returned as futures
+    data_vars = list(ds.data_vars)
+    arrays = _get_data_variable_array_futures(path, context, data_vars)
+    # Wait for the async open operations
+    arrays = {k: v.result() for k, v in arrays.items()}
+    # Adapt the tensorstore arrays and plug them into the xarray object
+    new_data = {k: _TensorStoreAdapter(v) for k, v in arrays.items()}
+    return ds.copy(data=new_data)
+def open_zarrs(
+    paths: list[str],
+    concat_dim: str,
+    context: ts.Context | None = None,
+    mask_and_scale: bool = True,
+) -> xr.Dataset:
+    """Open multiple zarrs with TensorStore.
+    Args:
+        paths: List of paths to zarr stores.
+        concat_dim: Dimension along which to concatenate the data variables.
+        context: TensorStore context.
+        mask_and_scale: Whether to mask and scale the data.
+    Returns:
+        Concatenated Dataset with all data variables opened via TensorStore.
+    """
+    if context is None:
+        context = ts.Context()
+    ds_list = [xr.open_zarr(p, mask_and_scale=mask_and_scale, decode_timedelta=True) for p in paths]
+    ds = xr.concat(
+        ds_list,
+        dim=concat_dim,
+        data_vars="minimal",
+        compat="equals",
+        combine_attrs="drop_conflicts",
+    )
+    if mask_and_scale:
+        _raise_if_mask_and_scale_used_for_data_vars(ds)
+    # Find the axis along which each data array must be concatenated
+    data_vars = list(ds.data_vars)
+    concat_axes = [ds[v].dims.index(concat_dim) for v in data_vars]
+    # Open and concat all zarrs so each variables is a single TensorStore array
+    arrays = _tensorstore_open_zarrs(paths, data_vars, concat_axes, context)
+    # Plug the arrays into the xarray object
+    new_data = {k: _TensorStoreAdapter(v) for k, v in arrays.items()}
+    return ds.copy(data=new_data)

ocf_data_sampler/load/satellite.py CHANGED Viewed

@@ -2,14 +2,12 @@
 import numpy as np
 import xarray as xr
+from ocf_data_sampler.load.open_xarray_tensorstore import open_zarr, open_zarrs
 from ocf_data_sampler.load.utils import (
     check_time_unique_increasing,
     get_xr_data_array_from_xr_dataset,
     make_spatial_coords_increasing,
 )
-from ocf_data_sampler.load.xarray_tensorstore import open_zarr
-from .open_tensorstore_zarrs import open_zarrs
 def open_sat_data(zarr_path: str | list[str]) -> xr.DataArray:

{ocf_data_sampler-0.5.5.dist-info → ocf_data_sampler-0.5.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ocf-data-sampler
-Version: 0.5.5
+Version: 0.5.7
 Author: James Fulton, Peter Dudfield
 Author-email: Open Climate Fix team <info@openclimatefix.org>
 License: MIT License
@@ -44,7 +44,7 @@ Requires-Dist: pyproj
 Requires-Dist: pyaml_env
 Requires-Dist: pyresample
 Requires-Dist: h5netcdf
-Requires-Dist: tensorstore
+Requires-Dist: xarray-tensorstore==0.1.5
 Requires-Dist: zarr>=3
 # ocf-data-sampler
@@ -63,6 +63,12 @@ We are currently migrating to this repo from [ocf_datapipes](https://github.com/
 > [!Note]
 > This repository is still in early development development and large changes to the user facing functions may still occur.
+## Licence
+This project is primarily licensed under the MIT License (see LICENSE).
+It includes and adapts internal functions from the Google xarray-tensorstore project, licensed under the Apache License, Version 2.0.
 ## Documentation
 **ocf-data-sampler** doesn't have external documentation _yet_; you can read a bit about how our torch datasets work in the README [here](ocf_data_sampler/torch_datasets/README.md).

{ocf_data_sampler-0.5.5.dist-info → ocf_data_sampler-0.5.7.dist-info}/RECORD RENAMED Viewed

@@ -9,11 +9,10 @@ ocf_data_sampler/data/uk_gsp_locations_20250109.csv,sha256=XZISFatnbpO9j8LwaxNKF
 ocf_data_sampler/load/__init__.py,sha256=-vQP9g0UOWdVbjEGyVX_ipa7R1btmiETIKAf6aw4d78,201
 ocf_data_sampler/load/gsp.py,sha256=d30jQWnwFaLj6rKNMHdz1qD8fzF8q--RNnEXT7bGiX0,2981
 ocf_data_sampler/load/load_dataset.py,sha256=K8rWykjII-3g127If7WRRFivzHNx3SshCvZj4uQlf28,2089
-ocf_data_sampler/load/open_tensorstore_zarrs.py,sha256=ElXmW7GhYDpsHZr7KjM-KIDNJMc4lmgzVIBwHx5Wl0Q,2748
-ocf_data_sampler/load/satellite.py,sha256=X5ZqFfMgab_WDwI7w1ZmdyMeh3GwV1g7mBd8tFgr8dM,1862
+ocf_data_sampler/load/open_xarray_tensorstore.py,sha256=kAqlIavGe1dcCPkzAtoZo2dFS-tW36E-wRE_3w1HMfg,5620
+ocf_data_sampler/load/satellite.py,sha256=B-m0_Py_D0GwzwX5o-ixyeXntV5Z4k4MbmMBHZLUWMM,1831
 ocf_data_sampler/load/site.py,sha256=WtOy20VMHJIY0IwEemCdcecSDUGcVaLUown-4ixJw90,2147
 ocf_data_sampler/load/utils.py,sha256=AGL0aOOQPrgqNBTjlBtR7Qg1PyQov3DFJo-y198u8pY,2044
-ocf_data_sampler/load/xarray_tensorstore.py,sha256=DSZl364Hn3QjcVxxPmBKU9rsc5BlJBdzL_SMrv-9os0,10997
 ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
 ocf_data_sampler/load/nwp/nwp.py,sha256=0E9shei3Mq1N7F-fBlEKY5Hm0_kI7ysY_rffnWIshvk,3612
 ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -22,7 +21,7 @@ ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=P7JqfssmQq8eHKKXaBexsxts325A
 ocf_data_sampler/load/nwp/providers/gfs.py,sha256=h6vm-Rfz1JGOE4P_fP1_XQJ3bugNbeNAIyt56N8B1Dc,1066
 ocf_data_sampler/load/nwp/providers/icon.py,sha256=iVZwLKRr_D74_kAu5MHir6pRKEfbTmIxFRZAxzmiYdI,1257
 ocf_data_sampler/load/nwp/providers/ukv.py,sha256=2i32VM9gnmWUpbL0qBSp_AKzuyKucXZPS8yklbcGlbc,1039
-ocf_data_sampler/load/nwp/providers/utils.py,sha256=5LrLmy74AVY5uLwL2qEhy-yPqSYLoxOgN8W1v8FmaQA,2355
+ocf_data_sampler/load/nwp/providers/utils.py,sha256=IjJ3w7zDgXNFaVa4TMk8yVCvdzfrIRu5tn1OaaQ7Zso,2304
 ocf_data_sampler/numpy_sample/__init__.py,sha256=5bdpzM8hMAEe0XRSZ9AZFQdqEeBsEPhaF79Y8bDx3GQ,407
 ocf_data_sampler/numpy_sample/collate.py,sha256=hoxIc5SoHoIs3Nx37aRZzWChpswjy9lHUgaKgHIoo80,2039
 ocf_data_sampler/numpy_sample/common_types.py,sha256=9CjYHkUTx0ObduWh43fhsybZCTXvexql7qC2ptMDoek,377
@@ -57,7 +56,7 @@ ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=YqmT-lExWlI8_ul
 scripts/download_gsp_location_data.py,sha256=rRDXMoqX-RYY4jPdxhdlxJGhWdl6r245F5UARgKV6P4,3121
 scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
 utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
-ocf_data_sampler-0.5.5.dist-info/METADATA,sha256=R9MPrxfVGCnkBbUehSjd3taDZxeREDo_YaIv5ccqnyg,12581
-ocf_data_sampler-0.5.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ocf_data_sampler-0.5.5.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
-ocf_data_sampler-0.5.5.dist-info/RECORD,,
+ocf_data_sampler-0.5.7.dist-info/METADATA,sha256=Nu2RLYiLYyU6nkLu8g__Q8EPFIgYMLu5cZLcLXAckXs,12816
+ocf_data_sampler-0.5.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ocf_data_sampler-0.5.7.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
+ocf_data_sampler-0.5.7.dist-info/RECORD,,

ocf_data_sampler/load/open_tensorstore_zarrs.py DELETED Viewed

@@ -1,93 +0,0 @@
-"""Open multiple zarrs with TensorStore.
-This extendds the functionality of xarray_tensorstore to open multiple zarr stores
-"""
-import os
-import tensorstore as ts
-import xarray as xr
-from ocf_data_sampler.load.xarray_tensorstore import (
-    _raise_if_mask_and_scale_used_for_data_vars,
-    _TensorStoreAdapter,
-    _zarr_spec_from_path,
-)
-def tensorstore_open_multi_zarrs(
-    paths: list[str],
-    data_vars: list[str],
-    concat_axes: list[int],
-    context: ts.Context,
-    write: bool,
-) -> dict[str, ts.TensorStore]:
-    """Open multiple zarrs with TensorStore.
-    Args:
-        paths: List of paths to zarr stores.
-        data_vars: List of data variable names to open.
-        concat_axes: List of axes along which to concatenate the data variables.
-        context: TensorStore context.
-        write: Whether to open the stores for writing.
-    """
-    arrays_list = []
-    for path in paths:
-        specs = {k: _zarr_spec_from_path(os.path.join(path, k)) for k in data_vars}
-        array_futures = {
-          k: ts.open(spec, read=True, write=write, context=context)
-          for k, spec in specs.items()
-        }
-        arrays_list.append({k: v.result() for k, v in array_futures.items()})
-    arrays = {}
-    for k, axis in zip(data_vars, concat_axes, strict=False):
-        datasets = [d[k] for d in arrays_list]
-        arrays[k] = ts.concat(datasets, axis=axis)
-    return arrays
-def open_zarrs(
-    paths: list[str],
-    concat_dim: str,
-    *,
-    context: ts.Context | None = None,
-    mask_and_scale: bool = True,
-    write: bool = False,
-) -> xr.Dataset:
-    """Open multiple zarrs with TensorStore.
-    Args:
-        paths: List of paths to zarr stores.
-        concat_dim: Dimension along which to concatenate the data variables.
-        context: TensorStore context.
-        mask_and_scale: Whether to mask and scale the data.
-        write: Whether to open the stores for writing.
-    """
-    if context is None:
-        context = ts.Context()
-    ds = xr.open_mfdataset(
-        paths,
-        concat_dim=concat_dim,
-        combine="nested",
-        mask_and_scale=mask_and_scale,
-        decode_timedelta=True,
-    )
-    if mask_and_scale:
-        # Data variables get replaced below with _TensorStoreAdapter arrays, which
-        # don't get masked or scaled. Raising an error avoids surprising users with
-        # incorrect data values.
-        _raise_if_mask_and_scale_used_for_data_vars(ds)
-    data_vars = list(ds.data_vars)
-    concat_axes = [ds[v].dims.index(concat_dim) for v in data_vars]
-    arrays = tensorstore_open_multi_zarrs(paths, data_vars, concat_axes, context, write)
-    new_data = {k: _TensorStoreAdapter(v) for k, v in arrays.items()}
-    return ds.copy(data=new_data)

ocf_data_sampler/load/xarray_tensorstore.py DELETED Viewed

@@ -1,299 +0,0 @@
-# Copyright 2023 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Utilities for loading TensorStore data into Xarray.
-Copied from https://github.com/google-research/tensorstore/blob/main/tensorstore/xarray.py
-But we added small changes so that it works for zarr3
-https://github.com/google/xarray-tensorstore/pull/22
-"""
-from __future__ import annotations
-import dataclasses
-import math
-import os.path
-import re
-from typing import TypeVar
-import numpy as np
-import tensorstore
-import xarray
-import zarr
-from xarray.core import indexing
-__version__ = "0.1.5"  # keep in sync with setup.py
-Index = TypeVar("Index", int, slice, np.ndarray, None)
-XarrayData = TypeVar("XarrayData", xarray.Dataset, xarray.DataArray)
-def _numpy_to_tensorstore_index(index: Index, size: int) -> Index:
-  """Switch from NumPy to TensorStore indexing conventions."""
-  # https://google.github.io/tensorstore/python/indexing.html#differences-compared-to-numpy-indexing
-  if index is None:
-    return None
-  elif isinstance(index, int):
-    # Negative integers do not count from the end in TensorStore
-    return index + size if index < 0 else index
-  elif isinstance(index, slice):
-    start = _numpy_to_tensorstore_index(index.start, size)
-    stop = _numpy_to_tensorstore_index(index.stop, size)
-    if stop is not None:
-      # TensorStore does not allow out of bounds slicing
-      stop = min(stop, size)
-    return slice(start, stop, index.step)
-  else:
-    assert isinstance(index, np.ndarray)  # noqa S101
-    return np.where(index < 0, index + size, index)
-@dataclasses.dataclass(frozen=True)
-class _TensorStoreAdapter(indexing.ExplicitlyIndexed):
-  """TensorStore array that can be wrapped by xarray.Variable.
-  We use Xarray's semi-internal ExplicitlyIndexed API so that Xarray will not
-  attempt to load our array into memory as a NumPy array. In the future, this
-  should be supported by public Xarray APIs, as part of the refactor discussed
-  in: https://github.com/pydata/xarray/issues/3981
-  """
-  array: tensorstore.TensorStore
-  future: tensorstore.Future | None = None
-  @property
-  def shape(self) -> tuple[int, ...]:
-    return self.array.shape
-  @property
-  def dtype(self) -> np.dtype:
-    return self.array.dtype.numpy_dtype
-  @property
-  def ndim(self) -> int:
-    return len(self.shape)
-  @property
-  def size(self) -> int:
-    return math.prod(self.shape)
-  def __getitem__(self, key: indexing.ExplicitIndexer) -> _TensorStoreAdapter:
-    index_tuple = tuple(map(_numpy_to_tensorstore_index, key.tuple, self.shape))
-    if isinstance(key, indexing.OuterIndexer):
-      # TODO(shoyer): fix this for newer versions of Xarray.
-      # We get the error message:
-      # AttributeError: '_TensorStoreAdapter' object has no attribute 'oindex'
-      indexed = self.array.oindex[index_tuple]
-    elif isinstance(key, indexing.VectorizedIndexer):
-      indexed = self.array.vindex[index_tuple]
-    else:
-      assert isinstance(key, indexing.BasicIndexer)  # noqa S101
-      indexed = self.array[index_tuple]
-    # Translate to the origin so repeated indexing is relative to the new bounds
-    # like NumPy, not absolute like TensorStore
-    translated = indexed[tensorstore.d[:].translate_to[0]]
-    return type(self)(translated)
-  def __setitem__(self, key: indexing.ExplicitIndexer, value) -> None:  # noqa ANN001
-    index_tuple = tuple(map(_numpy_to_tensorstore_index, key.tuple, self.shape))
-    if isinstance(key, indexing.OuterIndexer):
-      self.array.oindex[index_tuple] = value
-    elif isinstance(key, indexing.VectorizedIndexer):
-      self.array.vindex[index_tuple] = value
-    else:
-      assert isinstance(key, indexing.BasicIndexer)  # noqa S101
-      self.array[index_tuple] = value
-    # Invalidate the future so that the next read will pick up the new value
-    object.__setattr__(self, "future", None)
-  # xarray>2024.02.0 uses oindex and vindex properties, which are expected to
-  # return objects whose __getitem__ method supports the appropriate form of
-  # indexing.
-  @property
-  def oindex(self) -> _TensorStoreAdapter:
-    return self
-  @property
-  def vindex(self) -> _TensorStoreAdapter:
-    return self
-  def transpose(self, order: tuple[int, ...]) -> _TensorStoreAdapter:
-    transposed = self.array[tensorstore.d[order].transpose[:]]
-    return type(self)(transposed)
-  def read(self) -> _TensorStoreAdapter:
-    future = self.array.read()
-    return type(self)(self.array, future)
-  def __array__(self, dtype: np.dtype | None = None) -> np.ndarray:  # type: ignore
-    future = self.array.read() if self.future is None else self.future
-    return np.asarray(future.result(), dtype=dtype)
-  def get_duck_array(self) -> np.ndarray:
-    # special method for xarray to return an in-memory (computed) representation
-    return np.asarray(self)
-  # Work around the missing __copy__ and __deepcopy__ methods from TensorStore,
-  # which are needed for Xarray:
-  # https://github.com/google/tensorstore/issues/109
-  # TensorStore objects are immutable, so there's no need to actually copy them.
-  def __copy__(self) -> _TensorStoreAdapter:
-    return type(self)(self.array, self.future)
-  def __deepcopy__(self, memo) -> _TensorStoreAdapter:  # noqa ANN001
-    return self.__copy__()
-def _read_tensorstore(
-    array: indexing.ExplicitlyIndexed,
-) -> indexing.ExplicitlyIndexed:
-  """Starts async reading on a TensorStore array."""
-  return array.read() if isinstance(array, _TensorStoreAdapter) else array
-def read(xarraydata: XarrayData, /) -> XarrayData:
-  """Starts async reads on all TensorStore arrays."""
-  # pylint: disable=protected-access
-  if isinstance(xarraydata, xarray.Dataset):
-    data = {
-        name: _read_tensorstore(var.variable._data)
-        for name, var in xarraydata.data_vars.items()
-    }
-  elif isinstance(xarraydata, xarray.DataArray):
-    data = _read_tensorstore(xarraydata.variable._data)
-  else:
-    raise TypeError(f"argument is not a DataArray or Dataset: {xarraydata}")
-  # pylint: enable=protected-access
-  return xarraydata.copy(data=data)
-_DEFAULT_STORAGE_DRIVER = "file"
-def _zarr_spec_from_path(path: str, zarr_format: int) -> ...:
-  if re.match(r"\w+\://", path):  # path is a URI
-    kv_store = path
-  else:
-    kv_store = {"driver": _DEFAULT_STORAGE_DRIVER, "path": path}
-  if zarr_format == 2:
-    return {"driver": "zarr2", "kvstore": kv_store}
-  else:
-    return {"driver": "zarr3", "kvstore": kv_store}
-def _raise_if_mask_and_scale_used_for_data_vars(ds: xarray.Dataset) -> None:
-  """Check a dataset for data variables that would need masking or scaling."""
-  advice = (
-      "Consider re-opening with xarray_tensorstore.open_zarr(..., "
-      "mask_and_scale=False), or falling back to use xarray.open_zarr()."
-  )
-  for k in ds:
-    encoding = ds[k].encoding
-    for attr in ["_FillValue", "missing_value"]:
-      fill_value = encoding.get(attr, np.nan)
-      if fill_value == fill_value:  # pylint: disable=comparison-with-itself
-        raise ValueError(
-            f"variable {k} has non-NaN fill value, which is not supported by"
-            f" xarray-tensorstore: {fill_value}. {advice}",
-        )
-    for attr in ["scale_factor", "add_offset"]:
-      if attr in encoding:
-        raise ValueError(
-            f"variable {k} uses scale/offset encoding, which is not supported"
-            f" by xarray-tensorstore: {encoding}. {advice}",
-        )
-def open_zarr(
-    path: str,
-    *,
-    context: tensorstore.Context | None = None,
-    mask_and_scale: bool = True,
-    write: bool = False,
-) -> xarray.Dataset:
-  """Open an xarray.Dataset from Zarr using TensorStore.
-  For best performance, explicitly call `read()` to asynchronously load data
-  in parallel. Otherwise, xarray's `.compute()` method will load each variable's
-  data in sequence.
-  Example usage:
-    import xarray_tensorstore
-    ds = xarray_tensorstore.open_zarr(path)
-    # indexing & transposing is lazy
-    example = ds.sel(time='2020-01-01').transpose('longitude', 'latitude', ...)
-    # start reading data asynchronously
-    read_example = xarray_tensorstore.read(example)
-    # blocking conversion of the data into NumPy arrays
-    numpy_example = read_example.compute()
-  Args:
-    path: path or URI to Zarr group to open.
-    context: TensorStore configuration options to use when opening arrays.
-    mask_and_scale: if True (default), attempt to apply masking and scaling like
-      xarray.open_zarr(). This is only supported for coordinate variables and
-      otherwise will raise an error.
-    write: Allow write access. Defaults to False.
-  Returns:
-    Dataset with all data variables opened via TensorStore.
-  """
-  # We use xarray.open_zarr (which uses Zarr Python internally) to open the
-  # initial version of the dataset for a few reasons:
-  # 1. TensorStore does not support Zarr groups or array attributes, which we
-  #    need to open in the xarray.Dataset. We use Zarr Python instead of
-  #    parsing the raw Zarr metadata files ourselves.
-  # 2. TensorStore doesn't support non-standard Zarr dtypes like UTF-8 strings.
-  # 3. Xarray's open_zarr machinery does some pre-processing (e.g., from numeric
-  #    to datetime64 dtypes) that we would otherwise need to invoke explicitly
-  #    via xarray.decode_cf().
-  #
-  # Fortunately (2) and (3) are most commonly encountered on small coordinate
-  # arrays, for which the performance advantages of TensorStore are irrelevant.
-  if context is None:
-    context = tensorstore.Context()
-  # chunks=None means avoid using dask
-  ds = xarray.open_zarr(path, chunks=None, mask_and_scale=mask_and_scale)
-  # find out if its 2 or 3
-  try:
-    # this should work with zarr>=3 - https://github.com/zarr-developers/zarr-python
-    zarr_format = zarr.open(path).metadata.zarr_format
-  except:  # noqa E722
-    # try to open it, but if it fails, assume zarr_format 2
-    zarr_format = 2
-  if mask_and_scale:
-    # Data variables get replaced below with _TensorStoreAdapter arrays, which
-    # don't get masked or scaled. Raising an error avoids surprising users with
-    # incorrect data values.
-    _raise_if_mask_and_scale_used_for_data_vars(ds)
-  specs = {k: _zarr_spec_from_path(os.path.join(path, k), zarr_format) for k in ds}
-  array_futures = {
-      k: tensorstore.open(spec, read=True, write=write, context=context)
-      for k, spec in specs.items()
-  }
-  arrays = {k: v.result() for k, v in array_futures.items()}
-  new_data = {k: _TensorStoreAdapter(v) for k, v in arrays.items()}
-  return ds.copy(data=new_data)

{ocf_data_sampler-0.5.5.dist-info → ocf_data_sampler-0.5.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{ocf_data_sampler-0.5.5.dist-info → ocf_data_sampler-0.5.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

ocf-data-sampler 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.5.5py3-none-any.whl → 0.5.7py3-none-any.whl