ocf-data-sampler 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -0,0 +1 @@
1
+ from ocf_blosc2 import Blosc2 # noqa: F401
@@ -0,0 +1,33 @@
1
+ from pathlib import Path
2
+ import pkg_resources
3
+
4
+ import pandas as pd
5
+ import xarray as xr
6
+
7
+
8
+ def open_gsp(zarr_path: str | Path) -> xr.DataArray:
9
+
10
+ # Load GSP generation xr.Dataset
11
+ ds = xr.open_zarr(zarr_path)
12
+
13
+ # Rename to standard time name
14
+ ds = ds.rename({"datetime_gmt": "time_utc"})
15
+
16
+ # Load UK GSP locations
17
+ df_gsp_loc = pd.read_csv(
18
+ pkg_resources.resource_filename(__name__, "../data/uk_gsp_locations.csv"),
19
+ index_col="gsp_id",
20
+ )
21
+
22
+ # Add coordinates
23
+ ds = ds.assign_coords(
24
+ x_osgb=(df_gsp_loc.x_osgb.to_xarray()),
25
+ y_osgb=(df_gsp_loc.y_osgb.to_xarray()),
26
+ nominal_capacity_mwp=ds.installedcapacity_mwp,
27
+ effective_capacity_mwp=ds.capacity_mwp,
28
+
29
+ )
30
+
31
+ return ds.generation_mw
32
+
33
+
@@ -0,0 +1 @@
1
+ from .nwp import open_nwp
@@ -0,0 +1,23 @@
1
+ from pathlib import Path
2
+ import xarray as xr
3
+
4
+ from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
5
+ from ocf_data_sampler.load.nwp.providers.ecmwf import open_ifs
6
+
7
+
8
+ def open_nwp(zarr_path: Path | str | list[Path] | list[str], provider: str) -> xr.DataArray:
9
+ """Opens NWP Zarr
10
+
11
+ Args:
12
+ zarr_path: Path to the Zarr file
13
+ provider: NWP provider
14
+ """
15
+
16
+ if provider.lower() == "ukv":
17
+ _open_nwp = open_ukv
18
+ elif provider.lower() == "ecmwf":
19
+ _open_nwp = open_ifs
20
+ else:
21
+ raise ValueError(f"Unknown provider: {provider}")
22
+ return _open_nwp(zarr_path)
23
+
@@ -0,0 +1,101 @@
1
+ """Satellite loader"""
2
+
3
+ import subprocess
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+ import xarray as xr
8
+ from ocf_data_sampler.load.utils import check_time_unique_increasing, make_spatial_coords_increasing
9
+
10
+
11
+ def _get_single_sat_data(zarr_path: Path | str) -> xr.DataArray:
12
+ """Helper function to open a zarr from either local or GCP path.
13
+
14
+ The local or GCP path may contain wildcard matching (*)
15
+
16
+ Args:
17
+ zarr_path: Path to zarr file
18
+ """
19
+
20
+ # These kwargs are used if zarr path contains "*"
21
+ openmf_kwargs = dict(
22
+ engine="zarr",
23
+ concat_dim="time",
24
+ combine="nested",
25
+ chunks="auto",
26
+ join="override",
27
+ )
28
+
29
+ # Need to generate list of files if using GCP bucket storage
30
+ if "gs://" in str(zarr_path) and "*" in str(zarr_path):
31
+ result_string = subprocess.run(
32
+ f"gsutil ls -d {zarr_path}".split(" "), stdout=subprocess.PIPE
33
+ ).stdout.decode("utf-8")
34
+ files = result_string.splitlines()
35
+
36
+ ds = xr.open_mfdataset(files, **openmf_kwargs)
37
+
38
+ elif "*" in str(zarr_path): # Multi-file dataset
39
+ ds = xr.open_mfdataset(zarr_path, **openmf_kwargs)
40
+ else:
41
+ ds = xr.open_dataset(zarr_path, engine="zarr", chunks="auto")
42
+ ds = ds.drop_duplicates("time").sortby("time")
43
+
44
+ return ds
45
+
46
+
47
+ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArray:
48
+ """Lazily opens the Zarr store.
49
+
50
+ Args:
51
+ zarr_path: Cloud URL or local path pattern, or list of these. If GCS URL, it must start with
52
+ 'gs://'.
53
+
54
+ Example:
55
+ With wild cards and GCS path:
56
+ ```
57
+ zarr_paths = [
58
+ "gs://bucket/2020_nonhrv_split_*.zarr",
59
+ "gs://bucket/2019_nonhrv_split_*.zarr",
60
+ ]
61
+ ds = open_sat_data(zarr_paths)
62
+ ```
63
+ Without wild cards and with local path:
64
+ ```
65
+ zarr_paths = [
66
+ "/data/2020_nonhrv.zarr",
67
+ "/data/2019_nonhrv.zarr",
68
+ ]
69
+ ds = open_sat_data(zarr_paths)
70
+ ```
71
+ """
72
+
73
+ # Open the data
74
+ if isinstance(zarr_path, (list, tuple)):
75
+ ds = xr.combine_nested(
76
+ [_get_single_sat_data(path) for path in zarr_path],
77
+ concat_dim="time",
78
+ combine_attrs="override",
79
+ join="override",
80
+ )
81
+ else:
82
+ ds = _get_single_sat_data(zarr_path)
83
+
84
+ # Rename
85
+ ds = ds.rename(
86
+ {
87
+ "variable": "channel",
88
+ "time": "time_utc",
89
+ }
90
+ )
91
+
92
+ # Check the timestmps are unique and increasing
93
+ check_time_unique_increasing(ds.time_utc)
94
+
95
+ # Make sure the spatial coords are in increasing order
96
+ ds = make_spatial_coords_increasing(ds, x_coord="x_geostationary", y_coord="y_geostationary")
97
+
98
+ ds = ds.transpose("time_utc", "channel", "x_geostationary", "y_geostationary")
99
+
100
+ # TODO: should we control the dtype of the DataArray?
101
+ return ds.data
@@ -0,0 +1,29 @@
1
+ import xarray as xr
2
+ import pandas as pd
3
+
4
+ def check_time_unique_increasing(datetimes) -> None:
5
+ """Check that the time dimension is unique and increasing"""
6
+ time = pd.DatetimeIndex(datetimes)
7
+ assert time.is_unique
8
+ assert time.is_monotonic_increasing
9
+
10
+ def make_spatial_coords_increasing(ds: xr.Dataset, x_coord: str, y_coord: str) -> xr.Dataset:
11
+ """Make sure the spatial coordinates are in increasing order
12
+
13
+ Args:
14
+ ds: Xarray Dataset
15
+ x_coord: Name of the x coordinate
16
+ y_coord: Name of the y coordinate
17
+ """
18
+
19
+ # Make sure the coords are in increasing order
20
+ if ds[x_coord][0] > ds[x_coord][-1]:
21
+ ds = ds.isel({x_coord:slice(None, None, -1)})
22
+ if ds[y_coord][0] > ds[y_coord][-1]:
23
+ ds = ds.isel({y_coord:slice(None, None, -1)})
24
+
25
+ # Check the coords are all increasing now
26
+ assert (ds[x_coord].diff(dim=x_coord) > 0).all()
27
+ assert (ds[y_coord].diff(dim=y_coord) > 0).all()
28
+
29
+ return ds
@@ -13,7 +13,7 @@ def convert_nwp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> N
13
13
  NWPBatchKey.nwp: da.values,
14
14
  NWPBatchKey.nwp_channel_names: da.channel.values,
15
15
  NWPBatchKey.nwp_init_time_utc: da.init_time_utc.values.astype(float),
16
- NWPBatchKey.nwp_step: (da.step.values / pd.Timedelta("1H")).astype(int),
16
+ NWPBatchKey.nwp_step: (da.step.values / pd.Timedelta("1h")).astype(int),
17
17
  }
18
18
 
19
19
  if "target_time_utc" in da.coords:
@@ -124,8 +124,10 @@ def _get_idx_of_pixel_closest_to_poi_geostationary(
124
124
  center_geostationary = Location(x=x, y=y, coordinate_system="geostationary")
125
125
 
126
126
  # Check that the requested point lies within the data
127
- assert da[x_dim].min() < x < da[x_dim].max()
128
- assert da[y_dim].min() < y < da[y_dim].max()
127
+ assert da[x_dim].min() < x < da[x_dim].max(), \
128
+ f"{x} is not in the interval {da[x_dim].min().values}: {da[x_dim].max().values}"
129
+ assert da[y_dim].min() < y < da[y_dim].max(), \
130
+ f"{y} is not in the interval {da[y_dim].min().values}: {da[y_dim].max().values}"
129
131
 
130
132
  # Get the index into x and y nearest to x_center_geostationary and y_center_geostationary:
131
133
  x_index_at_center = searchsorted(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -1,22 +1,28 @@
1
1
  ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
2
2
  ocf_data_sampler/data/uk_gsp_locations.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
3
+ ocf_data_sampler/load/__init__.py,sha256=MjgfxilTzyz1RYFoBEeAXmE9hyjknLvdmlHPmlAoiQY,44
4
+ ocf_data_sampler/load/gsp.py,sha256=ewuypEah4dfj_gIcYwZoLHiA2ZjUj7KPeyBYKwT1qdo,761
5
+ ocf_data_sampler/load/satellite.py,sha256=RcF0HmpV2PKedOdqcTc6dDk4qdQZAdTLYwmMuNpSTZQ,2906
6
+ ocf_data_sampler/load/utils.py,sha256=tkhuhL3YzJucAtaCH572OxBkYvcEDpSed83yg02O8jg,966
7
+ ocf_data_sampler/load/nwp/__init__.py,sha256=SmcrnbygO5xtCKmGR4wtHrj-HI7nOAvnAtfuvRufBGQ,25
8
+ ocf_data_sampler/load/nwp/nwp.py,sha256=O4QnajEZem8BvBgTcYYDBhRhgqPYuJkolHmpMRmrXEA,610
3
9
  ocf_data_sampler/numpy_batch/__init__.py,sha256=mrtqwbGik5Zc9MYP5byfCTBm08wMtS2XnTsypC4fPMo,245
4
10
  ocf_data_sampler/numpy_batch/gsp.py,sha256=EL0_cJJNyvkQQcOat9vFA61pF4lema3BP_vB4ZS788U,805
5
- ocf_data_sampler/numpy_batch/nwp.py,sha256=AdEBPOjvEnyWJ8UAVqrBfSXPGMSl3sC1_Rfxxr0A1xM,1105
11
+ ocf_data_sampler/numpy_batch/nwp.py,sha256=Rv0yfDj902Z2oCwdlRjOs3Kh-F5Fgxjjylh99-lQ9ws,1105
6
12
  ocf_data_sampler/numpy_batch/satellite.py,sha256=e6eoNmiiHtzZbDVtBolFzDuE3qwhHN6bL9H86emAUsk,732
7
13
  ocf_data_sampler/numpy_batch/sun_position.py,sha256=UW6-WtjrKdCkcguolHUDSLhYFfarknQzzjlCX8YdEOM,1700
8
14
  ocf_data_sampler/select/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
9
15
  ocf_data_sampler/select/dropout.py,sha256=JYbjG5e8d48te7xj4I9pTWk43d6ksjGeyKFLSTuAOlY,1062
10
16
  ocf_data_sampler/select/fill_time_periods.py,sha256=iTtMjIPFYG5xtUYYedAFBLjTWWUa7t7WQ0-yksWf0-E,440
11
17
  ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=6ioB8LeFpFNBMgKDxrgG3zqzNjkBF_jlV9yye2ZYT2E,11925
12
- ocf_data_sampler/select/select_spatial_slice.py,sha256=ZKYrEVyUB83Amayc_cYkEw4g93Veone8d3DWrYJCZb8,11286
18
+ ocf_data_sampler/select/select_spatial_slice.py,sha256=7BSzOFPMSBWpBWXSajWTfI8luUVsSgh4zN-rkr-AuUs,11470
13
19
  ocf_data_sampler/select/select_time_slice.py,sha256=XuksC9N03c5rV9OeWtxjGuoGyeJJGy4JMJe3w7m6oaw,6654
14
20
  ocf_data_sampler/torch_datasets/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
15
21
  ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=rVKFfoHqSfm4C-eOXiqi5GwBJdMewRMIikvpjEJXi1s,17477
16
22
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
23
  tests/conftest.py,sha256=OcArgF60paroZQqoP7xExRBF34nEyMuXd7dS7hD6p3w,5393
18
- ocf_data_sampler-0.0.8.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
19
- ocf_data_sampler-0.0.8.dist-info/METADATA,sha256=kMfeb2SacYux-bEozcQPMpFRYNH9YEGu2luYR-Hx9JE,587
20
- ocf_data_sampler-0.0.8.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
21
- ocf_data_sampler-0.0.8.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
22
- ocf_data_sampler-0.0.8.dist-info/RECORD,,
24
+ ocf_data_sampler-0.0.10.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
25
+ ocf_data_sampler-0.0.10.dist-info/METADATA,sha256=eXXMZ7ogKXx0j3Krj508liDesXTjUzM2c0NlzOz1P6Q,588
26
+ ocf_data_sampler-0.0.10.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
27
+ ocf_data_sampler-0.0.10.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
28
+ ocf_data_sampler-0.0.10.dist-info/RECORD,,