ocf-data-sampler 0.5.3__tar.gz → 0.5.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (69) hide show
  1. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/PKG-INFO +10 -3
  2. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/README.md +6 -0
  3. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/nwp/providers/utils.py +1 -2
  4. ocf_data_sampler-0.5.6/ocf_data_sampler/load/open_xarray_tensorstore.py +167 -0
  5. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/satellite.py +1 -3
  6. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler.egg-info/PKG-INFO +10 -3
  7. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler.egg-info/SOURCES.txt +1 -1
  8. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler.egg-info/requires.txt +2 -1
  9. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/pyproject.toml +3 -2
  10. ocf_data_sampler-0.5.3/ocf_data_sampler/load/open_tensorstore_zarrs.py +0 -92
  11. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/LICENSE +0 -0
  12. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/__init__.py +0 -0
  13. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/config/__init__.py +0 -0
  14. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/config/load.py +0 -0
  15. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/config/model.py +0 -0
  16. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/config/save.py +0 -0
  17. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/data/uk_gsp_locations_20220314.csv +0 -0
  18. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/data/uk_gsp_locations_20250109.csv +0 -0
  19. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/__init__.py +0 -0
  20. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/gsp.py +0 -0
  21. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/load_dataset.py +0 -0
  22. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  23. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  24. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  25. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/nwp/providers/cloudcasting.py +0 -0
  26. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  27. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/nwp/providers/gfs.py +0 -0
  28. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/nwp/providers/icon.py +0 -0
  29. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  30. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/site.py +0 -0
  31. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/load/utils.py +0 -0
  32. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  33. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  34. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/numpy_sample/common_types.py +0 -0
  35. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  36. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  37. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  38. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  39. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/numpy_sample/site.py +0 -0
  40. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  41. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/select/__init__.py +0 -0
  42. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/select/dropout.py +0 -0
  43. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  44. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  45. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/select/geospatial.py +0 -0
  46. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/select/location.py +0 -0
  47. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  48. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/select/select_time_slice.py +0 -0
  49. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  50. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +0 -0
  51. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/datasets/site.py +0 -0
  52. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/sample/__init__.py +0 -0
  53. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/sample/base.py +0 -0
  54. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/sample/site.py +0 -0
  55. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/sample/uk_regional.py +0 -0
  56. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/utils/__init__.py +0 -0
  57. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/utils/config_normalization_values_to_dicts.py +0 -0
  58. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  59. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py +0 -0
  60. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py +0 -0
  61. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  62. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/torch_datasets/utils/validation_utils.py +0 -0
  63. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler/utils.py +0 -0
  64. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  65. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  66. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/scripts/download_gsp_location_data.py +0 -0
  67. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/scripts/refactor_site.py +0 -0
  68. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/setup.cfg +0 -0
  69. {ocf_data_sampler-0.5.3 → ocf_data_sampler-0.5.6}/utils/compute_icon_mean_stddev.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.5.3
3
+ Version: 0.5.6
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -28,14 +28,14 @@ License: MIT License
28
28
  Project-URL: repository, https://github.com/openclimatefix/ocf-data-sampler
29
29
  Classifier: Programming Language :: Python :: 3
30
30
  Classifier: License :: OSI Approved :: MIT License
31
- Requires-Python: >=3.10
31
+ Requires-Python: >=3.11
32
32
  Description-Content-Type: text/markdown
33
33
  Requires-Dist: torch
34
34
  Requires-Dist: numpy
35
35
  Requires-Dist: pandas
36
36
  Requires-Dist: xarray
37
37
  Requires-Dist: zarr
38
- Requires-Dist: numcodecs==0.13.1
38
+ Requires-Dist: numcodecs
39
39
  Requires-Dist: dask
40
40
  Requires-Dist: matplotlib
41
41
  Requires-Dist: pvlib
@@ -45,6 +45,7 @@ Requires-Dist: pyaml_env
45
45
  Requires-Dist: pyresample
46
46
  Requires-Dist: h5netcdf
47
47
  Requires-Dist: xarray-tensorstore==0.1.5
48
+ Requires-Dist: zarr>=3
48
49
 
49
50
  # ocf-data-sampler
50
51
 
@@ -62,6 +63,12 @@ We are currently migrating to this repo from [ocf_datapipes](https://github.com/
62
63
  > [!Note]
63
64
  > This repository is still in early development development and large changes to the user facing functions may still occur.
64
65
 
66
+ ## Licence
67
+
68
+ This project is primarily licensed under the MIT License (see LICENSE).
69
+
70
+ It includes and adapts internal functions from the Google xarray-tensorstore project, licensed under the Apache License, Version 2.0.
71
+
65
72
  ## Documentation
66
73
 
67
74
  **ocf-data-sampler** doesn't have external documentation _yet_; you can read a bit about how our torch datasets work in the README [here](ocf_data_sampler/torch_datasets/README.md).
@@ -14,6 +14,12 @@ We are currently migrating to this repo from [ocf_datapipes](https://github.com/
14
14
  > [!Note]
15
15
  > This repository is still in early development development and large changes to the user facing functions may still occur.
16
16
 
17
+ ## Licence
18
+
19
+ This project is primarily licensed under the MIT License (see LICENSE).
20
+
21
+ It includes and adapts internal functions from the Google xarray-tensorstore project, licensed under the Apache License, Version 2.0.
22
+
17
23
  ## Documentation
18
24
 
19
25
  **ocf-data-sampler** doesn't have external documentation _yet_; you can read a bit about how our torch datasets work in the README [here](ocf_data_sampler/torch_datasets/README.md).
@@ -3,9 +3,8 @@
3
3
  from glob import glob
4
4
 
5
5
  import xarray as xr
6
- from xarray_tensorstore import open_zarr
7
6
 
8
- from ocf_data_sampler.load.open_tensorstore_zarrs import open_zarrs
7
+ from ocf_data_sampler.load.open_xarray_tensorstore import open_zarr, open_zarrs
9
8
 
10
9
 
11
10
  def open_zarr_paths(
@@ -0,0 +1,167 @@
1
+ """Utilities for loading TensorStore data into Xarray.
2
+
3
+ This module uses and adapts internal functions from the Google xarray-tensorstore project [1],
4
+ licensed under the Apache License, Version 2.0. See [2] for details.
5
+
6
+ Modifications copyright 2025 Open climate Fix. Licensed under the MIT License.
7
+
8
+ Modifications from the original include:
9
+ - Adding support for opening multiple zarr files as a single xarray object
10
+ - Support for zarr 3 -> https://github.com/google/xarray-tensorstore/pull/22
11
+
12
+ References:
13
+ [1] https://github.com/google-research/tensorstore/blob/main/tensorstore/xarray.py
14
+ [2] https://www.apache.org/licenses/LICENSE-2.0
15
+ """
16
+
17
+ import os.path
18
+ import re
19
+
20
+ import tensorstore as ts
21
+ import xarray as xr
22
+ import zarr
23
+ from xarray_tensorstore import (
24
+ _DEFAULT_STORAGE_DRIVER,
25
+ _raise_if_mask_and_scale_used_for_data_vars,
26
+ _TensorStoreAdapter,
27
+ )
28
+
29
+
30
+ def _zarr_spec_from_path(path: str, zarr_format: int) -> ...:
31
+ if re.match(r"\w+\://", path): # path is a URI
32
+ kv_store = path
33
+ else:
34
+ kv_store = {"driver": _DEFAULT_STORAGE_DRIVER, "path": path}
35
+ return {"driver": f"zarr{zarr_format}", "kvstore": kv_store}
36
+
37
+
38
+ def _get_data_variable_array_futures(
39
+ path: str,
40
+ context: ts.Context | None,
41
+ variables: list[str],
42
+ ) -> dict[ts.Future]:
43
+ """Open all data variables in a zarr group and return futures.
44
+
45
+ Args:
46
+ path: path or URI to zarr group to open.
47
+ context: TensorStore configuration options to use when opening arrays.
48
+ variables: The variables in the zarr groupto open.
49
+ """
50
+ zarr_format = zarr.open(path).metadata.zarr_format
51
+ specs = {k: _zarr_spec_from_path(os.path.join(path, k), zarr_format) for k in variables}
52
+ return {k: ts.open(spec, read=True, write=False, context=context) for k, spec in specs.items()}
53
+
54
+
55
+ def _tensorstore_open_zarrs(
56
+ paths: list[str],
57
+ data_vars: list[str],
58
+ concat_axes: list[int],
59
+ context: ts.Context,
60
+ ) -> dict[str, ts.TensorStore]:
61
+ """Open multiple zarrs with TensorStore.
62
+
63
+ Args:
64
+ paths: List of paths to zarr stores.
65
+ data_vars: List of data variable names to open.
66
+ concat_axes: List of axes along which to concatenate the data variables.
67
+ context: TensorStore context.
68
+ """
69
+ # Open all the variables from all the datasets - returned as futures
70
+ arrays_list: list[dict[str, ts.Future]] = []
71
+ for path in paths:
72
+ arrays_list.append(_get_data_variable_array_futures(path, context, data_vars))
73
+
74
+ # Wait for the async open operations
75
+ arrays_list = [{k: v.result() for k, v in arrays.items()} for arrays in arrays_list]
76
+
77
+ # Concatenate each of the variables along the required axis
78
+ arrays = {}
79
+ for k, axis in zip(data_vars, concat_axes, strict=True):
80
+ variable_arrays = [d[k] for d in arrays_list]
81
+ arrays[k] = ts.concat(variable_arrays, axis=axis)
82
+
83
+ return arrays
84
+
85
+
86
+ def open_zarr(
87
+ path: str,
88
+ context: ts.Context | None = None,
89
+ mask_and_scale: bool = True,
90
+ ) -> xr.Dataset:
91
+ """Open an xarray.Dataset from zarr using TensorStore.
92
+
93
+ Args:
94
+ path: path or URI to zarr group to open.
95
+ context: TensorStore configuration options to use when opening arrays.
96
+ mask_and_scale: if True (default), attempt to apply masking and scaling like
97
+ xarray.open_zarr(). This is only supported for coordinate variables and
98
+ otherwise will raise an error.
99
+
100
+ Returns:
101
+ Dataset with all data variables opened via TensorStore.
102
+ """
103
+ if context is None:
104
+ context = ts.Context()
105
+
106
+ # Avoid using dask by settung `chunks=None`
107
+ ds = xr.open_zarr(path, chunks=None, mask_and_scale=mask_and_scale)
108
+
109
+ if mask_and_scale:
110
+ _raise_if_mask_and_scale_used_for_data_vars(ds)
111
+
112
+ # Open all data variables using tensorstore - returned as futures
113
+ data_vars = list(ds.data_vars)
114
+ arrays = _get_data_variable_array_futures(path, context, data_vars)
115
+
116
+ # Wait for the async open operations
117
+ arrays = {k: v.result() for k, v in arrays.items()}
118
+
119
+ # Adapt the tensorstore arrays and plug them into the xarray object
120
+ new_data = {k: _TensorStoreAdapter(v) for k, v in arrays.items()}
121
+
122
+ return ds.copy(data=new_data)
123
+
124
+
125
+ def open_zarrs(
126
+ paths: list[str],
127
+ concat_dim: str,
128
+ context: ts.Context | None = None,
129
+ mask_and_scale: bool = True,
130
+ ) -> xr.Dataset:
131
+ """Open multiple zarrs with TensorStore.
132
+
133
+ Args:
134
+ paths: List of paths to zarr stores.
135
+ concat_dim: Dimension along which to concatenate the data variables.
136
+ context: TensorStore context.
137
+ mask_and_scale: Whether to mask and scale the data.
138
+
139
+ Returns:
140
+ Concatenated Dataset with all data variables opened via TensorStore.
141
+ """
142
+ if context is None:
143
+ context = ts.Context()
144
+
145
+ ds_list = [xr.open_zarr(p, mask_and_scale=mask_and_scale, decode_timedelta=True) for p in paths]
146
+ ds = xr.concat(
147
+ ds_list,
148
+ dim=concat_dim,
149
+ data_vars="minimal",
150
+ compat="equals",
151
+ combine_attrs="no_conflicts",
152
+ )
153
+
154
+ if mask_and_scale:
155
+ _raise_if_mask_and_scale_used_for_data_vars(ds)
156
+
157
+ # Find the axis along which each data array must be concatenated
158
+ data_vars = list(ds.data_vars)
159
+ concat_axes = [ds[v].dims.index(concat_dim) for v in data_vars]
160
+
161
+ # Open and concat all zarrs so each variables is a single TensorStore array
162
+ arrays = _tensorstore_open_zarrs(paths, data_vars, concat_axes, context)
163
+
164
+ # Plug the arrays into the xarray object
165
+ new_data = {k: _TensorStoreAdapter(v) for k, v in arrays.items()}
166
+
167
+ return ds.copy(data=new_data)
@@ -1,16 +1,14 @@
1
1
  """Satellite loader."""
2
2
  import numpy as np
3
3
  import xarray as xr
4
- from xarray_tensorstore import open_zarr
5
4
 
5
+ from ocf_data_sampler.load.open_xarray_tensorstore import open_zarr, open_zarrs
6
6
  from ocf_data_sampler.load.utils import (
7
7
  check_time_unique_increasing,
8
8
  get_xr_data_array_from_xr_dataset,
9
9
  make_spatial_coords_increasing,
10
10
  )
11
11
 
12
- from .open_tensorstore_zarrs import open_zarrs
13
-
14
12
 
15
13
  def open_sat_data(zarr_path: str | list[str]) -> xr.DataArray:
16
14
  """Lazily opens the zarr store and validates data types.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.5.3
3
+ Version: 0.5.6
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -28,14 +28,14 @@ License: MIT License
28
28
  Project-URL: repository, https://github.com/openclimatefix/ocf-data-sampler
29
29
  Classifier: Programming Language :: Python :: 3
30
30
  Classifier: License :: OSI Approved :: MIT License
31
- Requires-Python: >=3.10
31
+ Requires-Python: >=3.11
32
32
  Description-Content-Type: text/markdown
33
33
  Requires-Dist: torch
34
34
  Requires-Dist: numpy
35
35
  Requires-Dist: pandas
36
36
  Requires-Dist: xarray
37
37
  Requires-Dist: zarr
38
- Requires-Dist: numcodecs==0.13.1
38
+ Requires-Dist: numcodecs
39
39
  Requires-Dist: dask
40
40
  Requires-Dist: matplotlib
41
41
  Requires-Dist: pvlib
@@ -45,6 +45,7 @@ Requires-Dist: pyaml_env
45
45
  Requires-Dist: pyresample
46
46
  Requires-Dist: h5netcdf
47
47
  Requires-Dist: xarray-tensorstore==0.1.5
48
+ Requires-Dist: zarr>=3
48
49
 
49
50
  # ocf-data-sampler
50
51
 
@@ -62,6 +63,12 @@ We are currently migrating to this repo from [ocf_datapipes](https://github.com/
62
63
  > [!Note]
63
64
  > This repository is still in early development development and large changes to the user facing functions may still occur.
64
65
 
66
+ ## Licence
67
+
68
+ This project is primarily licensed under the MIT License (see LICENSE).
69
+
70
+ It includes and adapts internal functions from the Google xarray-tensorstore project, licensed under the Apache License, Version 2.0.
71
+
65
72
  ## Documentation
66
73
 
67
74
  **ocf-data-sampler** doesn't have external documentation _yet_; you can read a bit about how our torch datasets work in the README [here](ocf_data_sampler/torch_datasets/README.md).
@@ -17,7 +17,7 @@ ocf_data_sampler/data/uk_gsp_locations_20250109.csv
17
17
  ocf_data_sampler/load/__init__.py
18
18
  ocf_data_sampler/load/gsp.py
19
19
  ocf_data_sampler/load/load_dataset.py
20
- ocf_data_sampler/load/open_tensorstore_zarrs.py
20
+ ocf_data_sampler/load/open_xarray_tensorstore.py
21
21
  ocf_data_sampler/load/satellite.py
22
22
  ocf_data_sampler/load/site.py
23
23
  ocf_data_sampler/load/utils.py
@@ -3,7 +3,7 @@ numpy
3
3
  pandas
4
4
  xarray
5
5
  zarr
6
- numcodecs==0.13.1
6
+ numcodecs
7
7
  dask
8
8
  matplotlib
9
9
  pvlib
@@ -13,3 +13,4 @@ pyaml_env
13
13
  pyresample
14
14
  h5netcdf
15
15
  xarray-tensorstore==0.1.5
16
+ zarr>=3
@@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta"
9
9
  name = "ocf-data-sampler"
10
10
  dynamic = ["version"] # Set automtically using git: https://setuptools-git-versioning.readthedocs.io/en/stable/
11
11
  readme = { file = "README.md", content-type = "text/markdown" }
12
- requires-python = ">=3.10"
12
+ requires-python = ">=3.11"
13
13
  license = { file = "LICENSE" }
14
14
  authors = [
15
15
  { name = "Open Climate Fix team", email = "info@openclimatefix.org" },
@@ -26,7 +26,7 @@ dependencies = [
26
26
  "pandas",
27
27
  "xarray",
28
28
  "zarr",
29
- "numcodecs==0.13.1",
29
+ "numcodecs",
30
30
  "dask",
31
31
  "matplotlib",
32
32
  "pvlib",
@@ -36,6 +36,7 @@ dependencies = [
36
36
  "pyresample",
37
37
  "h5netcdf",
38
38
  "xarray-tensorstore==0.1.5",
39
+ "zarr>=3",
39
40
  ]
40
41
 
41
42
  [dependency-groups]
@@ -1,92 +0,0 @@
1
- """Open multiple zarrs with TensorStore.
2
-
3
- This extendds the functionality of xarray_tensorstore to open multiple zarr stores
4
- """
5
-
6
- import os
7
-
8
- import tensorstore as ts
9
- import xarray as xr
10
- from xarray_tensorstore import (
11
- _raise_if_mask_and_scale_used_for_data_vars,
12
- _TensorStoreAdapter,
13
- _zarr_spec_from_path,
14
- )
15
-
16
-
17
- def tensorstore_open_multi_zarrs(
18
- paths: list[str],
19
- data_vars: list[str],
20
- concat_axes: list[int],
21
- context: ts.Context,
22
- write: bool,
23
- ) -> dict[str, ts.TensorStore]:
24
- """Open multiple zarrs with TensorStore.
25
-
26
- Args:
27
- paths: List of paths to zarr stores.
28
- data_vars: List of data variable names to open.
29
- concat_axes: List of axes along which to concatenate the data variables.
30
- context: TensorStore context.
31
- write: Whether to open the stores for writing.
32
- """
33
- arrays_list = []
34
- for path in paths:
35
- specs = {k: _zarr_spec_from_path(os.path.join(path, k)) for k in data_vars}
36
- array_futures = {
37
- k: ts.open(spec, read=True, write=write, context=context)
38
- for k, spec in specs.items()
39
- }
40
- arrays_list.append({k: v.result() for k, v in array_futures.items()})
41
-
42
- arrays = {}
43
- for k, axis in zip(data_vars, concat_axes, strict=False):
44
- datasets = [d[k] for d in arrays_list]
45
- arrays[k] = ts.concat(datasets, axis=axis)
46
-
47
- return arrays
48
-
49
-
50
- def open_zarrs(
51
- paths: list[str],
52
- concat_dim: str,
53
- *,
54
- context: ts.Context | None = None,
55
- mask_and_scale: bool = True,
56
- write: bool = False,
57
- ) -> xr.Dataset:
58
- """Open multiple zarrs with TensorStore.
59
-
60
- Args:
61
- paths: List of paths to zarr stores.
62
- concat_dim: Dimension along which to concatenate the data variables.
63
- context: TensorStore context.
64
- mask_and_scale: Whether to mask and scale the data.
65
- write: Whether to open the stores for writing.
66
- """
67
- if context is None:
68
- context = ts.Context()
69
-
70
- ds = xr.open_mfdataset(
71
- paths,
72
- concat_dim=concat_dim,
73
- combine="nested",
74
- mask_and_scale=mask_and_scale,
75
- decode_timedelta=True,
76
- )
77
-
78
- if mask_and_scale:
79
- # Data variables get replaced below with _TensorStoreAdapter arrays, which
80
- # don't get masked or scaled. Raising an error avoids surprising users with
81
- # incorrect data values.
82
- _raise_if_mask_and_scale_used_for_data_vars(ds)
83
-
84
- data_vars = list(ds.data_vars)
85
-
86
- concat_axes = [ds[v].dims.index(concat_dim) for v in data_vars]
87
-
88
- arrays = tensorstore_open_multi_zarrs(paths, data_vars, concat_axes, context, write)
89
-
90
- new_data = {k: _TensorStoreAdapter(v) for k, v in arrays.items()}
91
-
92
- return ds.copy(data=new_data)