ocf-data-sampler 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -12,15 +12,20 @@ from ocf_data_sampler.load.utils import (
12
12
 
13
13
 
14
14
  def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
15
- """Helper function to open a zarr from either local or GCP path.
16
-
17
- The local or GCP path may contain wildcard matching (*)
15
+ """Helper function to open a Zarr from either a local or GCP path.
18
16
 
19
17
  Args:
20
- zarr_path: Path to zarr file
18
+ zarr_path: Path to a Zarr file. Wildcards (*) are supported **only** for local paths.
19
+ GCS paths (gs://) **do not support** wildcards.
20
+
21
+ Returns:
22
+ An xarray Dataset containing satellite data.
23
+
24
+ Raises:
25
+ ValueError: If a wildcard (*) is used in a GCS (gs://) path.
21
26
  """
22
27
 
23
- # These kwargs are used if zarr path contains "*"
28
+ # These kwargs are used if the path contains "*"
24
29
  openmf_kwargs = dict(
25
30
  engine="zarr",
26
31
  concat_dim="time",
@@ -29,19 +34,17 @@ def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
29
34
  join="override",
30
35
  )
31
36
 
32
- # Need to generate list of files if using GCP bucket storage
37
+ # Raise an error if a wildcard is used in a GCP path
33
38
  if "gs://" in str(zarr_path) and "*" in str(zarr_path):
34
- result_string = subprocess.run(
35
- f"gsutil ls -d {zarr_path}".split(" "), stdout=subprocess.PIPE
36
- ).stdout.decode("utf-8")
37
- files = result_string.splitlines()
38
-
39
- ds = xr.open_mfdataset(files, **openmf_kwargs)
39
+ raise ValueError("Wildcard (*) paths are not supported for GCP (gs://) URLs.")
40
40
 
41
- elif "*" in str(zarr_path): # Multi-file dataset
41
+ # Handle multi-file dataset for local paths
42
+ if "*" in str(zarr_path):
42
43
  ds = xr.open_mfdataset(zarr_path, **openmf_kwargs)
43
44
  else:
44
45
  ds = xr.open_dataset(zarr_path, engine="zarr", chunks="auto")
46
+
47
+ # Ensure time is unique and sorted
45
48
  ds = ds.drop_duplicates("time").sortby("time")
46
49
 
47
50
  return ds
@@ -53,24 +56,6 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
53
56
  Args:
54
57
  zarr_path: Cloud URL or local path pattern, or list of these. If GCS URL, it must start with
55
58
  'gs://'.
56
-
57
- Example:
58
- With wild cards and GCS path:
59
- ```
60
- zarr_paths = [
61
- "gs://bucket/2020_nonhrv_split_*.zarr",
62
- "gs://bucket/2019_nonhrv_split_*.zarr",
63
- ]
64
- ds = open_sat_data(zarr_paths)
65
- ```
66
- Without wild cards and with local path:
67
- ```
68
- zarr_paths = [
69
- "/data/2020_nonhrv.zarr",
70
- "/data/2019_nonhrv.zarr",
71
- ]
72
- ds = open_sat_data(zarr_paths)
73
- ```
74
59
  """
75
60
 
76
61
  # Open the data
@@ -84,7 +69,7 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
84
69
  else:
85
70
  ds = _get_single_sat_data(zarr_path)
86
71
 
87
- # Rename
72
+ # Rename dimensions
88
73
  ds = ds.rename(
89
74
  {
90
75
  "variable": "channel",
@@ -92,13 +77,13 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
92
77
  }
93
78
  )
94
79
 
95
- # Check the timestamps are unique and increasing
80
+ # Check timestamps
96
81
  check_time_unique_increasing(ds.time_utc)
97
82
 
98
- # Make sure the spatial coords are in increasing order
83
+ # Ensure spatial coordinates are sorted
99
84
  ds = make_spatial_coords_increasing(ds, x_coord="x_geostationary", y_coord="y_geostationary")
100
85
 
101
86
  ds = ds.transpose("time_utc", "channel", "x_geostationary", "y_geostationary")
102
-
103
87
  # TODO: should we control the dtype of the DataArray?
88
+
104
89
  return get_xr_data_array_from_xr_dataset(ds)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -9,7 +9,7 @@ ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZt
9
9
  ocf_data_sampler/load/__init__.py,sha256=MjgfxilTzyz1RYFoBEeAXmE9hyjknLvdmlHPmlAoiQY,44
10
10
  ocf_data_sampler/load/gsp.py,sha256=Gcr1JVUOPKhFRDCSHtfPDjxx0BtyyEhXrZvGEKLPJ5I,759
11
11
  ocf_data_sampler/load/load_dataset.py,sha256=Ua3RaUg4PIYJkD9BKqTfN8IWUbezbhThJGgEkd9PcaE,1587
12
- ocf_data_sampler/load/satellite.py,sha256=3KlA1fx4SwxdzM-jC1WRaONXO0D6m0WxORnEnwUnZrA,2967
12
+ ocf_data_sampler/load/satellite.py,sha256=f2Q7FSyySOf7DeHxcigHd-vk-J-U4S2pXg_CnhnhuwU,2571
13
13
  ocf_data_sampler/load/site.py,sha256=P83uz01WBDzoZajdOH0m8FQt4-buKDlUk19N548KqhA,1086
14
14
  ocf_data_sampler/load/utils.py,sha256=sAEkPMS9LXVCrc5pANQo97zaoEItVg9hoNj2ZWfx_Ug,1405
15
15
  ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
@@ -72,8 +72,8 @@ tests/test_sample/test_uk_regional_sample.py,sha256=gkeQWC2wC757jKJz_QBmDMFQjn3R
72
72
  tests/torch_datasets/test_merge_and_fill_utils.py,sha256=GtuQg82BM1eHQjT7Ik1x1zaVcuc7KJO4_NC9stXsd4s,1123
73
73
  tests/torch_datasets/test_pvnet_uk.py,sha256=OzT9ArdnWPa3iJKggxc2-7npkDqWmZyS5pzM4M08NZU,5566
74
74
  tests/torch_datasets/test_site.py,sha256=5MH5zkHFJXekwpnV6nHuSxt_sRNu9_mxiUjfWqmEhr0,6966
75
- ocf_data_sampler-0.1.3.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
76
- ocf_data_sampler-0.1.3.dist-info/METADATA,sha256=c5LEfePIqtFvxzWabUufEZkAhIZqp8ep-cHLUH61zAU,12173
77
- ocf_data_sampler-0.1.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
78
- ocf_data_sampler-0.1.3.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
79
- ocf_data_sampler-0.1.3.dist-info/RECORD,,
75
+ ocf_data_sampler-0.1.4.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
76
+ ocf_data_sampler-0.1.4.dist-info/METADATA,sha256=hsPFFitpSgAL9VsZXEgLOihY8XZsC82NBmdf7E9Ty-I,12173
77
+ ocf_data_sampler-0.1.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
78
+ ocf_data_sampler-0.1.4.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
79
+ ocf_data_sampler-0.1.4.dist-info/RECORD,,