PyPI - ocf-data-sampler - Versions diffs - 0.5.2__py3-none-any.whl → 0.5.5__py3-none-any.whl - Mend

ocf-data-sampler 0.5.2py3-none-any.whl → 0.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (13) hide show

ocf_data_sampler/load/nwp/providers/utils.py CHANGED Viewed

@@ -3,9 +3,9 @@
 from glob import glob
 import xarray as xr
-from xarray_tensorstore import open_zarr
 from ocf_data_sampler.load.open_tensorstore_zarrs import open_zarrs
+from ocf_data_sampler.load.xarray_tensorstore import open_zarr
 def open_zarr_paths(

ocf_data_sampler/load/open_tensorstore_zarrs.py CHANGED Viewed

@@ -7,7 +7,8 @@ import os
 import tensorstore as ts
 import xarray as xr
-from xarray_tensorstore import (
+from ocf_data_sampler.load.xarray_tensorstore import (
     _raise_if_mask_and_scale_used_for_data_vars,
     _TensorStoreAdapter,
     _zarr_spec_from_path,

ocf_data_sampler/load/satellite.py CHANGED Viewed

@@ -1,13 +1,13 @@
 """Satellite loader."""
 import numpy as np
 import xarray as xr
-from xarray_tensorstore import open_zarr
 from ocf_data_sampler.load.utils import (
     check_time_unique_increasing,
     get_xr_data_array_from_xr_dataset,
     make_spatial_coords_increasing,
 )
+from ocf_data_sampler.load.xarray_tensorstore import open_zarr
 from .open_tensorstore_zarrs import open_zarrs

ocf_data_sampler/load/xarray_tensorstore.py ADDED Viewed

@@ -0,0 +1,299 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities for loading TensorStore data into Xarray.
+Copied from https://github.com/google-research/tensorstore/blob/main/tensorstore/xarray.py
+But we added small changes so that it works for zarr3
+https://github.com/google/xarray-tensorstore/pull/22
+"""
+from __future__ import annotations
+import dataclasses
+import math
+import os.path
+import re
+from typing import TypeVar
+import numpy as np
+import tensorstore
+import xarray
+import zarr
+from xarray.core import indexing
+__version__ = "0.1.5"  # keep in sync with setup.py
+Index = TypeVar("Index", int, slice, np.ndarray, None)
+XarrayData = TypeVar("XarrayData", xarray.Dataset, xarray.DataArray)
+def _numpy_to_tensorstore_index(index: Index, size: int) -> Index:
+  """Switch from NumPy to TensorStore indexing conventions."""
+  # https://google.github.io/tensorstore/python/indexing.html#differences-compared-to-numpy-indexing
+  if index is None:
+    return None
+  elif isinstance(index, int):
+    # Negative integers do not count from the end in TensorStore
+    return index + size if index < 0 else index
+  elif isinstance(index, slice):
+    start = _numpy_to_tensorstore_index(index.start, size)
+    stop = _numpy_to_tensorstore_index(index.stop, size)
+    if stop is not None:
+      # TensorStore does not allow out of bounds slicing
+      stop = min(stop, size)
+    return slice(start, stop, index.step)
+  else:
+    assert isinstance(index, np.ndarray)  # noqa S101
+    return np.where(index < 0, index + size, index)
+@dataclasses.dataclass(frozen=True)
+class _TensorStoreAdapter(indexing.ExplicitlyIndexed):
+  """TensorStore array that can be wrapped by xarray.Variable.
+  We use Xarray's semi-internal ExplicitlyIndexed API so that Xarray will not
+  attempt to load our array into memory as a NumPy array. In the future, this
+  should be supported by public Xarray APIs, as part of the refactor discussed
+  in: https://github.com/pydata/xarray/issues/3981
+  """
+  array: tensorstore.TensorStore
+  future: tensorstore.Future | None = None
+  @property
+  def shape(self) -> tuple[int, ...]:
+    return self.array.shape
+  @property
+  def dtype(self) -> np.dtype:
+    return self.array.dtype.numpy_dtype
+  @property
+  def ndim(self) -> int:
+    return len(self.shape)
+  @property
+  def size(self) -> int:
+    return math.prod(self.shape)
+  def __getitem__(self, key: indexing.ExplicitIndexer) -> _TensorStoreAdapter:
+    index_tuple = tuple(map(_numpy_to_tensorstore_index, key.tuple, self.shape))
+    if isinstance(key, indexing.OuterIndexer):
+      # TODO(shoyer): fix this for newer versions of Xarray.
+      # We get the error message:
+      # AttributeError: '_TensorStoreAdapter' object has no attribute 'oindex'
+      indexed = self.array.oindex[index_tuple]
+    elif isinstance(key, indexing.VectorizedIndexer):
+      indexed = self.array.vindex[index_tuple]
+    else:
+      assert isinstance(key, indexing.BasicIndexer)  # noqa S101
+      indexed = self.array[index_tuple]
+    # Translate to the origin so repeated indexing is relative to the new bounds
+    # like NumPy, not absolute like TensorStore
+    translated = indexed[tensorstore.d[:].translate_to[0]]
+    return type(self)(translated)
+  def __setitem__(self, key: indexing.ExplicitIndexer, value) -> None:  # noqa ANN001
+    index_tuple = tuple(map(_numpy_to_tensorstore_index, key.tuple, self.shape))
+    if isinstance(key, indexing.OuterIndexer):
+      self.array.oindex[index_tuple] = value
+    elif isinstance(key, indexing.VectorizedIndexer):
+      self.array.vindex[index_tuple] = value
+    else:
+      assert isinstance(key, indexing.BasicIndexer)  # noqa S101
+      self.array[index_tuple] = value
+    # Invalidate the future so that the next read will pick up the new value
+    object.__setattr__(self, "future", None)
+  # xarray>2024.02.0 uses oindex and vindex properties, which are expected to
+  # return objects whose __getitem__ method supports the appropriate form of
+  # indexing.
+  @property
+  def oindex(self) -> _TensorStoreAdapter:
+    return self
+  @property
+  def vindex(self) -> _TensorStoreAdapter:
+    return self
+  def transpose(self, order: tuple[int, ...]) -> _TensorStoreAdapter:
+    transposed = self.array[tensorstore.d[order].transpose[:]]
+    return type(self)(transposed)
+  def read(self) -> _TensorStoreAdapter:
+    future = self.array.read()
+    return type(self)(self.array, future)
+  def __array__(self, dtype: np.dtype | None = None) -> np.ndarray:  # type: ignore
+    future = self.array.read() if self.future is None else self.future
+    return np.asarray(future.result(), dtype=dtype)
+  def get_duck_array(self) -> np.ndarray:
+    # special method for xarray to return an in-memory (computed) representation
+    return np.asarray(self)
+  # Work around the missing __copy__ and __deepcopy__ methods from TensorStore,
+  # which are needed for Xarray:
+  # https://github.com/google/tensorstore/issues/109
+  # TensorStore objects are immutable, so there's no need to actually copy them.
+  def __copy__(self) -> _TensorStoreAdapter:
+    return type(self)(self.array, self.future)
+  def __deepcopy__(self, memo) -> _TensorStoreAdapter:  # noqa ANN001
+    return self.__copy__()
+def _read_tensorstore(
+    array: indexing.ExplicitlyIndexed,
+) -> indexing.ExplicitlyIndexed:
+  """Starts async reading on a TensorStore array."""
+  return array.read() if isinstance(array, _TensorStoreAdapter) else array
+def read(xarraydata: XarrayData, /) -> XarrayData:
+  """Starts async reads on all TensorStore arrays."""
+  # pylint: disable=protected-access
+  if isinstance(xarraydata, xarray.Dataset):
+    data = {
+        name: _read_tensorstore(var.variable._data)
+        for name, var in xarraydata.data_vars.items()
+    }
+  elif isinstance(xarraydata, xarray.DataArray):
+    data = _read_tensorstore(xarraydata.variable._data)
+  else:
+    raise TypeError(f"argument is not a DataArray or Dataset: {xarraydata}")
+  # pylint: enable=protected-access
+  return xarraydata.copy(data=data)
+_DEFAULT_STORAGE_DRIVER = "file"
+def _zarr_spec_from_path(path: str, zarr_format: int) -> ...:
+  if re.match(r"\w+\://", path):  # path is a URI
+    kv_store = path
+  else:
+    kv_store = {"driver": _DEFAULT_STORAGE_DRIVER, "path": path}
+  if zarr_format == 2:
+    return {"driver": "zarr2", "kvstore": kv_store}
+  else:
+    return {"driver": "zarr3", "kvstore": kv_store}
+def _raise_if_mask_and_scale_used_for_data_vars(ds: xarray.Dataset) -> None:
+  """Check a dataset for data variables that would need masking or scaling."""
+  advice = (
+      "Consider re-opening with xarray_tensorstore.open_zarr(..., "
+      "mask_and_scale=False), or falling back to use xarray.open_zarr()."
+  )
+  for k in ds:
+    encoding = ds[k].encoding
+    for attr in ["_FillValue", "missing_value"]:
+      fill_value = encoding.get(attr, np.nan)
+      if fill_value == fill_value:  # pylint: disable=comparison-with-itself
+        raise ValueError(
+            f"variable {k} has non-NaN fill value, which is not supported by"
+            f" xarray-tensorstore: {fill_value}. {advice}",
+        )
+    for attr in ["scale_factor", "add_offset"]:
+      if attr in encoding:
+        raise ValueError(
+            f"variable {k} uses scale/offset encoding, which is not supported"
+            f" by xarray-tensorstore: {encoding}. {advice}",
+        )
+def open_zarr(
+    path: str,
+    *,
+    context: tensorstore.Context | None = None,
+    mask_and_scale: bool = True,
+    write: bool = False,
+) -> xarray.Dataset:
+  """Open an xarray.Dataset from Zarr using TensorStore.
+  For best performance, explicitly call `read()` to asynchronously load data
+  in parallel. Otherwise, xarray's `.compute()` method will load each variable's
+  data in sequence.
+  Example usage:
+    import xarray_tensorstore
+    ds = xarray_tensorstore.open_zarr(path)
+    # indexing & transposing is lazy
+    example = ds.sel(time='2020-01-01').transpose('longitude', 'latitude', ...)
+    # start reading data asynchronously
+    read_example = xarray_tensorstore.read(example)
+    # blocking conversion of the data into NumPy arrays
+    numpy_example = read_example.compute()
+  Args:
+    path: path or URI to Zarr group to open.
+    context: TensorStore configuration options to use when opening arrays.
+    mask_and_scale: if True (default), attempt to apply masking and scaling like
+      xarray.open_zarr(). This is only supported for coordinate variables and
+      otherwise will raise an error.
+    write: Allow write access. Defaults to False.
+  Returns:
+    Dataset with all data variables opened via TensorStore.
+  """
+  # We use xarray.open_zarr (which uses Zarr Python internally) to open the
+  # initial version of the dataset for a few reasons:
+  # 1. TensorStore does not support Zarr groups or array attributes, which we
+  #    need to open in the xarray.Dataset. We use Zarr Python instead of
+  #    parsing the raw Zarr metadata files ourselves.
+  # 2. TensorStore doesn't support non-standard Zarr dtypes like UTF-8 strings.
+  # 3. Xarray's open_zarr machinery does some pre-processing (e.g., from numeric
+  #    to datetime64 dtypes) that we would otherwise need to invoke explicitly
+  #    via xarray.decode_cf().
+  #
+  # Fortunately (2) and (3) are most commonly encountered on small coordinate
+  # arrays, for which the performance advantages of TensorStore are irrelevant.
+  if context is None:
+    context = tensorstore.Context()
+  # chunks=None means avoid using dask
+  ds = xarray.open_zarr(path, chunks=None, mask_and_scale=mask_and_scale)
+  # find out if its 2 or 3
+  try:
+    # this should work with zarr>=3 - https://github.com/zarr-developers/zarr-python
+    zarr_format = zarr.open(path).metadata.zarr_format
+  except:  # noqa E722
+    # try to open it, but if it fails, assume zarr_format 2
+    zarr_format = 2
+  if mask_and_scale:
+    # Data variables get replaced below with _TensorStoreAdapter arrays, which
+    # don't get masked or scaled. Raising an error avoids surprising users with
+    # incorrect data values.
+    _raise_if_mask_and_scale_used_for_data_vars(ds)
+  specs = {k: _zarr_spec_from_path(os.path.join(path, k), zarr_format) for k in ds}
+  array_futures = {
+      k: tensorstore.open(spec, read=True, write=write, context=context)
+      for k, spec in specs.items()
+  }
+  arrays = {k: v.result() for k, v in array_futures.items()}
+  new_data = {k: _TensorStoreAdapter(v) for k, v in arrays.items()}
+  return ds.copy(data=new_data)

ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py CHANGED Viewed

@@ -21,7 +21,7 @@ from ocf_data_sampler.numpy_sample.nwp import NWPSampleKey
 from ocf_data_sampler.select import Location, fill_time_periods
 from ocf_data_sampler.select.geospatial import osgb_to_lon_lat
 from ocf_data_sampler.torch_datasets.utils import (
-    channel_dict_to_dataarray,
+    config_normalization_values_to_dicts,
     find_valid_time_periods,
     slice_datasets_by_space,
     slice_datasets_by_time,
@@ -110,11 +110,14 @@ class AbstractPVNetUKDataset(Dataset):
         self.config = config
         self.datasets_dict = datasets_dict
+        # Extract the normalisation values from the config for faster access
+        means_dict, stds_dict = config_normalization_values_to_dicts(config)
+        self.means_dict = means_dict
+        self.stds_dict = stds_dict
-    @staticmethod
     def process_and_combine_datasets(
+        self,
         dataset_dict: dict,
-        config: Configuration,
         t0: pd.Timestamp,
         location: Location,
     ) -> NumpySample:
@@ -122,7 +125,6 @@ class AbstractPVNetUKDataset(Dataset):
         Args:
             dataset_dict: Dictionary of xarray datasets
-            config: Configuration object
             t0: init-time for sample
             location: location of the sample
         """
@@ -134,13 +136,8 @@ class AbstractPVNetUKDataset(Dataset):
             for nwp_key, da_nwp in dataset_dict["nwp"].items():
                 # Standardise and convert to NumpyBatch
-                da_channel_means = channel_dict_to_dataarray(
-                    config.input_data.nwp[nwp_key].channel_means,
-                )
-                da_channel_stds = channel_dict_to_dataarray(
-                    config.input_data.nwp[nwp_key].channel_stds,
-                )
+                da_channel_means = self.means_dict["nwp"][nwp_key]
+                da_channel_stds = self.stds_dict["nwp"][nwp_key]
                 da_nwp = (da_nwp - da_channel_means) / da_channel_stds
@@ -153,15 +150,15 @@ class AbstractPVNetUKDataset(Dataset):
             da_sat = dataset_dict["sat"]
             # Standardise and convert to NumpyBatch
-            da_channel_means = channel_dict_to_dataarray(config.input_data.satellite.channel_means)
-            da_channel_stds = channel_dict_to_dataarray(config.input_data.satellite.channel_stds)
+            da_channel_means = self.means_dict["sat"]
+            da_channel_stds = self.stds_dict["sat"]
             da_sat = (da_sat - da_channel_means) / da_channel_stds
             numpy_modalities.append(convert_satellite_to_numpy_sample(da_sat))
         if "gsp" in dataset_dict:
-            gsp_config = config.input_data.gsp
+            gsp_config = self.config.input_data.gsp
             da_gsp = dataset_dict["gsp"]
             da_gsp = da_gsp / da_gsp.effective_capacity_mwp
@@ -183,13 +180,8 @@ class AbstractPVNetUKDataset(Dataset):
         )
         # Only add solar position if explicitly configured
-        has_solar_config = (
-            hasattr(config.input_data, "solar_position") and
-            config.input_data.solar_position is not None
-        )
-        if has_solar_config:
-            solar_config = config.input_data.solar_position
+        if self.config.input_data.solar_position is not None:
+            solar_config = self.config.input_data.solar_position
             # Create datetime range for solar position calculation
             datetimes = pd.date_range(
@@ -264,7 +256,7 @@ class PVNetUKRegionalDataset(AbstractPVNetUKDataset):
         sample_dict = slice_datasets_by_time(sample_dict, t0, self.config)
         sample_dict = compute(sample_dict)
-        return self.process_and_combine_datasets(sample_dict, self.config, t0, location)
+        return self.process_and_combine_datasets(sample_dict, t0, location)
     @override
     def __getitem__(self, idx: int) -> NumpySample:
@@ -330,7 +322,6 @@ class PVNetUKConcurrentDataset(AbstractPVNetUKDataset):
             gsp_sample_dict = slice_datasets_by_space(sample_dict, location, self.config)
             gsp_numpy_sample = self.process_and_combine_datasets(
                 gsp_sample_dict,
-                self.config,
                 t0,
                 location,
             )

ocf_data_sampler/torch_datasets/datasets/site.py CHANGED Viewed

@@ -25,7 +25,7 @@ from ocf_data_sampler.select import (
     intersection_of_multiple_dataframes_of_periods,
 )
 from ocf_data_sampler.torch_datasets.utils import (
-    channel_dict_to_dataarray,
+    config_normalization_values_to_dicts,
     find_valid_time_periods,
     slice_datasets_by_space,
     slice_datasets_by_time,
@@ -62,6 +62,8 @@ def process_and_combine_datasets(
     dataset_dict: dict,
     config: Configuration,
     t0: pd.Timestamp,
+    means_dict: dict[str, xr.DataArray | dict[str, xr.DataArray]],
+    stds_dict: dict[str, xr.DataArray | dict[str, xr.DataArray]],
 ) -> NumpySample:
     """Normalise and convert data to numpy arrays.
@@ -69,6 +71,8 @@ def process_and_combine_datasets(
         dataset_dict: Dictionary of xarray datasets
         config: Configuration object
         t0: init-time for sample
+        means_dict: Nested dictionary of mean values for the input data sources
+        stds_dict: Nested dictionary of std values for the input data sources
     """
     numpy_modalities = []
@@ -79,12 +83,8 @@ def process_and_combine_datasets(
             # Standardise and convert to NumpyBatch
-            da_channel_means = channel_dict_to_dataarray(
-                config.input_data.nwp[nwp_key].channel_means,
-            )
-            da_channel_stds = channel_dict_to_dataarray(
-                config.input_data.nwp[nwp_key].channel_stds,
-            )
+            da_channel_means = means_dict["nwp"][nwp_key]
+            da_channel_stds = stds_dict["nwp"][nwp_key]
             da_nwp = (da_nwp - da_channel_means) / da_channel_stds
@@ -97,8 +97,8 @@ def process_and_combine_datasets(
         da_sat = dataset_dict["sat"]
         # Standardise and convert to NumpyBatch
-        da_channel_means = channel_dict_to_dataarray(config.input_data.satellite.channel_means)
-        da_channel_stds = channel_dict_to_dataarray(config.input_data.satellite.channel_stds)
+        da_channel_means = means_dict["sat"]
+        da_channel_stds = stds_dict["sat"]
         da_sat = (da_sat - da_channel_means) / da_channel_stds
@@ -109,11 +109,7 @@ def process_and_combine_datasets(
         da_sites = da_sites / da_sites.capacity_kwp
         # Convert to NumpyBatch
-        numpy_modalities.append(
-            convert_site_to_numpy_sample(
-                da_sites,
-            ),
-        )
+        numpy_modalities.append(convert_site_to_numpy_sample(da_sites))
         # add datetime features
         datetimes = pd.DatetimeIndex(da_sites.time_utc.values)
@@ -193,6 +189,11 @@ class SitesDataset(Dataset):
         # Assign coords and indices to self
         self.valid_t0_and_site_ids = valid_t0_and_site_ids
+        # Extract the normalisation values from the config for faster access
+        means_dict, stds_dict = config_normalization_values_to_dicts(config)
+        self.means_dict = means_dict
+        self.stds_dict = stds_dict
     def find_valid_t0_and_site_ids(
         self,
         datasets_dict: dict,
@@ -273,7 +274,13 @@ class SitesDataset(Dataset):
         sample_dict = compute(sample_dict)
-        return process_and_combine_datasets(sample_dict, self.config, t0)
+        return process_and_combine_datasets(
+            sample_dict,
+            self.config,
+            t0,
+            self.means_dict,
+            self.stds_dict,
+        )
     def get_sample(self, t0: pd.Timestamp, site_id: int) -> dict:
         """Generate a sample for a given site id and t0.
@@ -332,6 +339,11 @@ class SitesDatasetConcurrent(Dataset):
         # Assign coords and indices to self
         self.valid_t0s = valid_t0s
+        # Extract the normalisation values from the config for faster access
+        means_dict, stds_dict = config_normalization_values_to_dicts(config)
+        self.means_dict = means_dict
+        self.stds_dict = stds_dict
     def find_valid_t0s(
         self,
         datasets_dict: dict,
@@ -406,6 +418,8 @@ class SitesDatasetConcurrent(Dataset):
                 site_sample_dict,
                 self.config,
                 t0,
+                self.means_dict,
+                self.stds_dict,
             )
             site_samples.append(site_numpy_sample)

ocf_data_sampler/torch_datasets/utils/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .channel_dict_to_dataarray import channel_dict_to_dataarray
+from .config_normalization_values_to_dicts import config_normalization_values_to_dicts
 from .merge_and_fill_utils import fill_nans_in_arrays, merge_dicts
 from .valid_time_periods import find_valid_time_periods
 from .spatial_slice_for_dataset import slice_datasets_by_space

ocf_data_sampler/torch_datasets/utils/config_normalization_values_to_dicts.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""Utility function for converting channel dictionaries to xarray DataArrays."""
+import xarray as xr
+from ocf_data_sampler.config import Configuration
+def channel_dict_to_dataarray(channel_dict: dict[str, float]) -> xr.DataArray:
+    """Converts a dictionary of channel values to a DataArray.
+    Args:
+        channel_dict: Dictionary mapping channel names (str) to their values (float).
+    Returns:
+        xr.DataArray: A 1D DataArray with channels as coordinates.
+    """
+    return xr.DataArray(
+        list(channel_dict.values()),
+        coords={"channel": list(channel_dict.keys())},
+    )
+def config_normalization_values_to_dicts(
+    config: Configuration,
+) -> tuple[dict[str, xr.DataArray | dict[str, xr.DataArray]]]:
+    """Construct DataArrays of mean and std values from the config normalisation constants.
+    Args:
+        config: Data configuration.
+    Returns:
+        Means dict
+        Stds dict
+    """
+    means_dict = {}
+    stds_dict = {}
+    if config.input_data.nwp is not None:
+        means_dict["nwp"] = {}
+        stds_dict["nwp"] = {}
+        for nwp_key in config.input_data.nwp:
+            # Standardise and convert to NumpyBatch
+            means_dict["nwp"][nwp_key] = channel_dict_to_dataarray(
+                config.input_data.nwp[nwp_key].channel_means,
+            )
+            stds_dict["nwp"][nwp_key] = channel_dict_to_dataarray(
+                config.input_data.nwp[nwp_key].channel_stds,
+            )
+    if config.input_data.satellite is not None:
+        means_dict["sat"] = channel_dict_to_dataarray(config.input_data.satellite.channel_means)
+        stds_dict["sat"] = channel_dict_to_dataarray(config.input_data.satellite.channel_stds)
+    return means_dict, stds_dict

{ocf_data_sampler-0.5.2.dist-info → ocf_data_sampler-0.5.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ocf-data-sampler
-Version: 0.5.2
+Version: 0.5.5
 Author: James Fulton, Peter Dudfield
 Author-email: Open Climate Fix team <info@openclimatefix.org>
 License: MIT License
@@ -28,14 +28,14 @@ License: MIT License
 Project-URL: repository, https://github.com/openclimatefix/ocf-data-sampler
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: MIT License
-Requires-Python: >=3.10
+Requires-Python: >=3.11
 Description-Content-Type: text/markdown
 Requires-Dist: torch
 Requires-Dist: numpy
 Requires-Dist: pandas
 Requires-Dist: xarray
 Requires-Dist: zarr
-Requires-Dist: numcodecs==0.13.1
+Requires-Dist: numcodecs
 Requires-Dist: dask
 Requires-Dist: matplotlib
 Requires-Dist: pvlib
@@ -44,7 +44,8 @@ Requires-Dist: pyproj
 Requires-Dist: pyaml_env
 Requires-Dist: pyresample
 Requires-Dist: h5netcdf
-Requires-Dist: xarray-tensorstore==0.1.5
+Requires-Dist: tensorstore
+Requires-Dist: zarr>=3
 # ocf-data-sampler

{ocf_data_sampler-0.5.2.dist-info → ocf_data_sampler-0.5.5.dist-info}/RECORD RENAMED Viewed

@@ -9,10 +9,11 @@ ocf_data_sampler/data/uk_gsp_locations_20250109.csv,sha256=XZISFatnbpO9j8LwaxNKF
 ocf_data_sampler/load/__init__.py,sha256=-vQP9g0UOWdVbjEGyVX_ipa7R1btmiETIKAf6aw4d78,201
 ocf_data_sampler/load/gsp.py,sha256=d30jQWnwFaLj6rKNMHdz1qD8fzF8q--RNnEXT7bGiX0,2981
 ocf_data_sampler/load/load_dataset.py,sha256=K8rWykjII-3g127If7WRRFivzHNx3SshCvZj4uQlf28,2089
-ocf_data_sampler/load/open_tensorstore_zarrs.py,sha256=_RHWe0GmrBSA9s1TH5I9VCMPpeZEsuRuhDt5Vyyx5Fo,2725
-ocf_data_sampler/load/satellite.py,sha256=RylkJz8avxdM5pK_liaTlD1DTboyPMgykXJ4_Ek9WBA,1840
+ocf_data_sampler/load/open_tensorstore_zarrs.py,sha256=ElXmW7GhYDpsHZr7KjM-KIDNJMc4lmgzVIBwHx5Wl0Q,2748
+ocf_data_sampler/load/satellite.py,sha256=X5ZqFfMgab_WDwI7w1ZmdyMeh3GwV1g7mBd8tFgr8dM,1862
 ocf_data_sampler/load/site.py,sha256=WtOy20VMHJIY0IwEemCdcecSDUGcVaLUown-4ixJw90,2147
 ocf_data_sampler/load/utils.py,sha256=AGL0aOOQPrgqNBTjlBtR7Qg1PyQov3DFJo-y198u8pY,2044
+ocf_data_sampler/load/xarray_tensorstore.py,sha256=DSZl364Hn3QjcVxxPmBKU9rsc5BlJBdzL_SMrv-9os0,10997
 ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
 ocf_data_sampler/load/nwp/nwp.py,sha256=0E9shei3Mq1N7F-fBlEKY5Hm0_kI7ysY_rffnWIshvk,3612
 ocf_data_sampler/load/nwp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -21,7 +22,7 @@ ocf_data_sampler/load/nwp/providers/ecmwf.py,sha256=P7JqfssmQq8eHKKXaBexsxts325A
 ocf_data_sampler/load/nwp/providers/gfs.py,sha256=h6vm-Rfz1JGOE4P_fP1_XQJ3bugNbeNAIyt56N8B1Dc,1066
 ocf_data_sampler/load/nwp/providers/icon.py,sha256=iVZwLKRr_D74_kAu5MHir6pRKEfbTmIxFRZAxzmiYdI,1257
 ocf_data_sampler/load/nwp/providers/ukv.py,sha256=2i32VM9gnmWUpbL0qBSp_AKzuyKucXZPS8yklbcGlbc,1039
-ocf_data_sampler/load/nwp/providers/utils.py,sha256=cVwCiC8FqNpkZFSUGv1CRqIQlKdjx1sIsb2SIUlvWV8,2333
+ocf_data_sampler/load/nwp/providers/utils.py,sha256=5LrLmy74AVY5uLwL2qEhy-yPqSYLoxOgN8W1v8FmaQA,2355
 ocf_data_sampler/numpy_sample/__init__.py,sha256=5bdpzM8hMAEe0XRSZ9AZFQdqEeBsEPhaF79Y8bDx3GQ,407
 ocf_data_sampler/numpy_sample/collate.py,sha256=hoxIc5SoHoIs3Nx37aRZzWChpswjy9lHUgaKgHIoo80,2039
 ocf_data_sampler/numpy_sample/common_types.py,sha256=9CjYHkUTx0ObduWh43fhsybZCTXvexql7qC2ptMDoek,377
@@ -40,14 +41,14 @@ ocf_data_sampler/select/location.py,sha256=AZvGR8y62opiW7zACGXjoOtBEWRfSLOZIA73O
 ocf_data_sampler/select/select_spatial_slice.py,sha256=Hd4jGRUfIZRoWCirOQZeoLpaUnStB6KyFSTPX69wZLw,8790
 ocf_data_sampler/select/select_time_slice.py,sha256=HeHbwZ0CP03x0-LaJtpbSdtpLufwVTR73p6wH6O_PS8,5513
 ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=o0SsEXXZ6k9iL__5_RN1Sf60lw_eqK91P3UFEHAD2k0,102
-ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=v63goKEMI6UgBPnQCnIbxhFFdwuP_sxgcPYY6iNfGkc,12257
-ocf_data_sampler/torch_datasets/datasets/site.py,sha256=_0A2kRq8B5WL5zWjKxNY9snAl_GwptohUt7c6DDa2AA,14812
+ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=876oLukvb1nLtZQ8HBN3PWfN7urKH2xa45tVar7XrbM,12010
+ocf_data_sampler/torch_datasets/datasets/site.py,sha256=nn6N8daGxllYwCCiFKbCJANTl84NrDRl-nbNGcfXc3U,15429
 ocf_data_sampler/torch_datasets/sample/__init__.py,sha256=GL84vdZl_SjHDGVyh9Uekx2XhPYuZ0dnO3l6f6KXnHI,100
 ocf_data_sampler/torch_datasets/sample/base.py,sha256=cQ1oIyhdmlotejZK8B3Cw6MNvpdnBPD8G_o2h7Ye4Vc,2206
 ocf_data_sampler/torch_datasets/sample/site.py,sha256=40NwNTqjL1WVhPdwe02zDHHfDLG2u_bvCfRCtGAtFc0,1466
 ocf_data_sampler/torch_datasets/sample/uk_regional.py,sha256=Xx5cBYUyaM6PGUWQ76MHT9hwj6IJ7WAOxbpmYFbJGhc,10483
-ocf_data_sampler/torch_datasets/utils/__init__.py,sha256=N7i_hHtWUDiJqsiJoDx4T_QuiYOuvIyulPrn6xEA4TY,309
-ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py,sha256=un2IiyoAmTDIymdeMiPU899_86iCDMD-oIifjHlNyqw,555
+ocf_data_sampler/torch_datasets/utils/__init__.py,sha256=_UHLL_yRzhLJVHi6ROSaSe8TGw80CAhU325uCZj7XkY,331
+ocf_data_sampler/torch_datasets/utils/config_normalization_values_to_dicts.py,sha256=jS3DkAwOF1W3AQnvsdkBJ1C8Unm93kQbS8hgTCtFv2A,1743
 ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=we7BTxRH7B7jKayDT7YfNyfI3zZClz2Bk-HXKQIokgU,956
 ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
 ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py,sha256=8E4a5v9dqr-sZOyBruuO-tjLPBbjtpYtdFY5z23aqnU,4365
@@ -56,7 +57,7 @@ ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=YqmT-lExWlI8_ul
 scripts/download_gsp_location_data.py,sha256=rRDXMoqX-RYY4jPdxhdlxJGhWdl6r245F5UARgKV6P4,3121
 scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
 utils/compute_icon_mean_stddev.py,sha256=a1oWMRMnny39rV-dvu8rcx85sb4bXzPFrR1gkUr4Jpg,2296
-ocf_data_sampler-0.5.2.dist-info/METADATA,sha256=mYEZX1GRP6sJoaRs3B5DY5SAFUl1r4OqkgJYXemLzOM,12580
-ocf_data_sampler-0.5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ocf_data_sampler-0.5.2.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
-ocf_data_sampler-0.5.2.dist-info/RECORD,,
+ocf_data_sampler-0.5.5.dist-info/METADATA,sha256=R9MPrxfVGCnkBbUehSjd3taDZxeREDo_YaIv5ccqnyg,12581
+ocf_data_sampler-0.5.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ocf_data_sampler-0.5.5.dist-info/top_level.txt,sha256=LEFU4Uk-PEo72QGLAfnVZIUEm37Q8mKuMeg_Xk-p33g,31
+ocf_data_sampler-0.5.5.dist-info/RECORD,,

ocf_data_sampler/torch_datasets/utils/channel_dict_to_dataarray.py DELETED Viewed

@@ -1,18 +0,0 @@
-"""Utility function for converting channel dictionaries to xarray DataArrays."""
-import xarray as xr
-def channel_dict_to_dataarray(channel_dict: dict[str, float]) -> xr.DataArray:
-    """Converts a dictionary of channel values to a DataArray.
-    Args:
-        channel_dict: Dictionary mapping channel names (str) to their values (float).
-    Returns:
-        xr.DataArray: A 1D DataArray with channels as coordinates.
-    """
-    return xr.DataArray(
-        list(channel_dict.values()),
-        coords={"channel": list(channel_dict.keys())},
-    )

{ocf_data_sampler-0.5.2.dist-info → ocf_data_sampler-0.5.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{ocf_data_sampler-0.5.2.dist-info → ocf_data_sampler-0.5.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

ocf-data-sampler 0.5.2__py3-none-any.whl → 0.5.5__py3-none-any.whl

Potentially problematic release.

ocf-data-sampler 0.5.2py3-none-any.whl → 0.5.5py3-none-any.whl