ocf-data-sampler 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- ocf_data_sampler/load/satellite.py +20 -35
- {ocf_data_sampler-0.1.3.dist-info → ocf_data_sampler-0.1.4.dist-info}/METADATA +1 -1
- {ocf_data_sampler-0.1.3.dist-info → ocf_data_sampler-0.1.4.dist-info}/RECORD +6 -6
- {ocf_data_sampler-0.1.3.dist-info → ocf_data_sampler-0.1.4.dist-info}/LICENSE +0 -0
- {ocf_data_sampler-0.1.3.dist-info → ocf_data_sampler-0.1.4.dist-info}/WHEEL +0 -0
- {ocf_data_sampler-0.1.3.dist-info → ocf_data_sampler-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -12,15 +12,20 @@ from ocf_data_sampler.load.utils import (
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
|
|
15
|
-
"""Helper function to open a
|
|
16
|
-
|
|
17
|
-
The local or GCP path may contain wildcard matching (*)
|
|
15
|
+
"""Helper function to open a Zarr from either a local or GCP path.
|
|
18
16
|
|
|
19
17
|
Args:
|
|
20
|
-
zarr_path: Path to
|
|
18
|
+
zarr_path: Path to a Zarr file. Wildcards (*) are supported **only** for local paths.
|
|
19
|
+
GCS paths (gs://) **do not support** wildcards.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
An xarray Dataset containing satellite data.
|
|
23
|
+
|
|
24
|
+
Raises:
|
|
25
|
+
ValueError: If a wildcard (*) is used in a GCS (gs://) path.
|
|
21
26
|
"""
|
|
22
27
|
|
|
23
|
-
# These kwargs are used if
|
|
28
|
+
# These kwargs are used if the path contains "*"
|
|
24
29
|
openmf_kwargs = dict(
|
|
25
30
|
engine="zarr",
|
|
26
31
|
concat_dim="time",
|
|
@@ -29,19 +34,17 @@ def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
|
|
|
29
34
|
join="override",
|
|
30
35
|
)
|
|
31
36
|
|
|
32
|
-
#
|
|
37
|
+
# Raise an error if a wildcard is used in a GCP path
|
|
33
38
|
if "gs://" in str(zarr_path) and "*" in str(zarr_path):
|
|
34
|
-
|
|
35
|
-
f"gsutil ls -d {zarr_path}".split(" "), stdout=subprocess.PIPE
|
|
36
|
-
).stdout.decode("utf-8")
|
|
37
|
-
files = result_string.splitlines()
|
|
38
|
-
|
|
39
|
-
ds = xr.open_mfdataset(files, **openmf_kwargs)
|
|
39
|
+
raise ValueError("Wildcard (*) paths are not supported for GCP (gs://) URLs.")
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
# Handle multi-file dataset for local paths
|
|
42
|
+
if "*" in str(zarr_path):
|
|
42
43
|
ds = xr.open_mfdataset(zarr_path, **openmf_kwargs)
|
|
43
44
|
else:
|
|
44
45
|
ds = xr.open_dataset(zarr_path, engine="zarr", chunks="auto")
|
|
46
|
+
|
|
47
|
+
# Ensure time is unique and sorted
|
|
45
48
|
ds = ds.drop_duplicates("time").sortby("time")
|
|
46
49
|
|
|
47
50
|
return ds
|
|
@@ -53,24 +56,6 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
|
|
|
53
56
|
Args:
|
|
54
57
|
zarr_path: Cloud URL or local path pattern, or list of these. If GCS URL, it must start with
|
|
55
58
|
'gs://'.
|
|
56
|
-
|
|
57
|
-
Example:
|
|
58
|
-
With wild cards and GCS path:
|
|
59
|
-
```
|
|
60
|
-
zarr_paths = [
|
|
61
|
-
"gs://bucket/2020_nonhrv_split_*.zarr",
|
|
62
|
-
"gs://bucket/2019_nonhrv_split_*.zarr",
|
|
63
|
-
]
|
|
64
|
-
ds = open_sat_data(zarr_paths)
|
|
65
|
-
```
|
|
66
|
-
Without wild cards and with local path:
|
|
67
|
-
```
|
|
68
|
-
zarr_paths = [
|
|
69
|
-
"/data/2020_nonhrv.zarr",
|
|
70
|
-
"/data/2019_nonhrv.zarr",
|
|
71
|
-
]
|
|
72
|
-
ds = open_sat_data(zarr_paths)
|
|
73
|
-
```
|
|
74
59
|
"""
|
|
75
60
|
|
|
76
61
|
# Open the data
|
|
@@ -84,7 +69,7 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
|
|
|
84
69
|
else:
|
|
85
70
|
ds = _get_single_sat_data(zarr_path)
|
|
86
71
|
|
|
87
|
-
# Rename
|
|
72
|
+
# Rename dimensions
|
|
88
73
|
ds = ds.rename(
|
|
89
74
|
{
|
|
90
75
|
"variable": "channel",
|
|
@@ -92,13 +77,13 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
|
|
|
92
77
|
}
|
|
93
78
|
)
|
|
94
79
|
|
|
95
|
-
# Check
|
|
80
|
+
# Check timestamps
|
|
96
81
|
check_time_unique_increasing(ds.time_utc)
|
|
97
82
|
|
|
98
|
-
#
|
|
83
|
+
# Ensure spatial coordinates are sorted
|
|
99
84
|
ds = make_spatial_coords_increasing(ds, x_coord="x_geostationary", y_coord="y_geostationary")
|
|
100
85
|
|
|
101
86
|
ds = ds.transpose("time_utc", "channel", "x_geostationary", "y_geostationary")
|
|
102
|
-
|
|
103
87
|
# TODO: should we control the dtype of the DataArray?
|
|
88
|
+
|
|
104
89
|
return get_xr_data_array_from_xr_dataset(ds)
|
|
@@ -9,7 +9,7 @@ ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZt
|
|
|
9
9
|
ocf_data_sampler/load/__init__.py,sha256=MjgfxilTzyz1RYFoBEeAXmE9hyjknLvdmlHPmlAoiQY,44
|
|
10
10
|
ocf_data_sampler/load/gsp.py,sha256=Gcr1JVUOPKhFRDCSHtfPDjxx0BtyyEhXrZvGEKLPJ5I,759
|
|
11
11
|
ocf_data_sampler/load/load_dataset.py,sha256=Ua3RaUg4PIYJkD9BKqTfN8IWUbezbhThJGgEkd9PcaE,1587
|
|
12
|
-
ocf_data_sampler/load/satellite.py,sha256=
|
|
12
|
+
ocf_data_sampler/load/satellite.py,sha256=f2Q7FSyySOf7DeHxcigHd-vk-J-U4S2pXg_CnhnhuwU,2571
|
|
13
13
|
ocf_data_sampler/load/site.py,sha256=P83uz01WBDzoZajdOH0m8FQt4-buKDlUk19N548KqhA,1086
|
|
14
14
|
ocf_data_sampler/load/utils.py,sha256=sAEkPMS9LXVCrc5pANQo97zaoEItVg9hoNj2ZWfx_Ug,1405
|
|
15
15
|
ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
|
|
@@ -72,8 +72,8 @@ tests/test_sample/test_uk_regional_sample.py,sha256=gkeQWC2wC757jKJz_QBmDMFQjn3R
|
|
|
72
72
|
tests/torch_datasets/test_merge_and_fill_utils.py,sha256=GtuQg82BM1eHQjT7Ik1x1zaVcuc7KJO4_NC9stXsd4s,1123
|
|
73
73
|
tests/torch_datasets/test_pvnet_uk.py,sha256=OzT9ArdnWPa3iJKggxc2-7npkDqWmZyS5pzM4M08NZU,5566
|
|
74
74
|
tests/torch_datasets/test_site.py,sha256=5MH5zkHFJXekwpnV6nHuSxt_sRNu9_mxiUjfWqmEhr0,6966
|
|
75
|
-
ocf_data_sampler-0.1.
|
|
76
|
-
ocf_data_sampler-0.1.
|
|
77
|
-
ocf_data_sampler-0.1.
|
|
78
|
-
ocf_data_sampler-0.1.
|
|
79
|
-
ocf_data_sampler-0.1.
|
|
75
|
+
ocf_data_sampler-0.1.4.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
|
|
76
|
+
ocf_data_sampler-0.1.4.dist-info/METADATA,sha256=hsPFFitpSgAL9VsZXEgLOihY8XZsC82NBmdf7E9Ty-I,12173
|
|
77
|
+
ocf_data_sampler-0.1.4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
78
|
+
ocf_data_sampler-0.1.4.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
|
|
79
|
+
ocf_data_sampler-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|