ocf-data-sampler 0.0.12__tar.gz → 0.0.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- {ocf_data_sampler-0.0.12/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.14}/PKG-INFO +1 -1
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +80 -59
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/setup.py +1 -1
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/LICENSE +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/MANIFEST.in +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/README.md +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/__init__.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/__init__.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/gsp.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/nwp/__init__.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/nwp/nwp.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/satellite.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/utils.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/numpy_batch/__init__.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/numpy_batch/gsp.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/numpy_batch/nwp.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/numpy_batch/satellite.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/numpy_batch/sun_position.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/select/__init__.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/select/dropout.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/select/fill_time_periods.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/select/select_time_slice.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/torch_datasets/__init__.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler.egg-info/requires.txt +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler.egg-info/top_level.txt +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/requirements.txt +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/setup.cfg +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/tests/__init__.py +0 -0
- {ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/tests/conftest.py +0 -0
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import xarray as xr
|
|
6
6
|
from torch.utils.data import Dataset
|
|
7
|
-
|
|
7
|
+
import pkg_resources
|
|
8
8
|
|
|
9
9
|
from ocf_data_sampler.load.gsp import open_gsp
|
|
10
10
|
from ocf_data_sampler.load.nwp import open_nwp
|
|
@@ -37,8 +37,6 @@ from ocf_datapipes.utils.geospatial import osgb_to_lon_lat
|
|
|
37
37
|
from ocf_datapipes.utils.consts import (
|
|
38
38
|
NWP_MEANS,
|
|
39
39
|
NWP_STDS,
|
|
40
|
-
RSS_MEAN,
|
|
41
|
-
RSS_STD,
|
|
42
40
|
)
|
|
43
41
|
|
|
44
42
|
from ocf_datapipes.training.common import concat_xr_time_utc, normalize_gsp
|
|
@@ -69,11 +67,12 @@ def get_dataset_dict(config: Configuration) -> dict[xr.DataArray, dict[xr.DataAr
|
|
|
69
67
|
|
|
70
68
|
datasets_dict = {}
|
|
71
69
|
|
|
72
|
-
#
|
|
73
|
-
|
|
70
|
+
# Load GSP data unless the path is None
|
|
71
|
+
if in_config.gsp.gsp_zarr_path:
|
|
72
|
+
da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path)
|
|
74
73
|
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
# Remove national GSP
|
|
75
|
+
datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
|
|
77
76
|
|
|
78
77
|
# Load NWP data if in config
|
|
79
78
|
if in_config.nwp:
|
|
@@ -172,19 +171,19 @@ def find_valid_t0_times(
|
|
|
172
171
|
|
|
173
172
|
contiguous_time_periods['sat'] = time_periods
|
|
174
173
|
|
|
175
|
-
|
|
176
|
-
|
|
174
|
+
if "gsp" in datasets_dict:
|
|
175
|
+
gsp_config = config.input_data.gsp
|
|
177
176
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
177
|
+
time_periods = find_contiguous_t0_periods(
|
|
178
|
+
pd.DatetimeIndex(datasets_dict["gsp"]["time_utc"]),
|
|
179
|
+
sample_period_duration=minutes(gsp_config.time_resolution_minutes),
|
|
180
|
+
history_duration=minutes(gsp_config.history_minutes),
|
|
181
|
+
forecast_duration=minutes(gsp_config.forecast_minutes),
|
|
182
|
+
)
|
|
184
183
|
|
|
185
|
-
|
|
184
|
+
contiguous_time_periods['gsp'] = time_periods
|
|
186
185
|
|
|
187
|
-
# just get the values (
|
|
186
|
+
# just get the values (not the keys)
|
|
188
187
|
contiguous_time_periods_values = list(contiguous_time_periods.values())
|
|
189
188
|
|
|
190
189
|
# Find joint overlapping contiguous time periods
|
|
@@ -196,7 +195,7 @@ def find_valid_t0_times(
|
|
|
196
195
|
valid_time_periods = contiguous_time_periods_values[0]
|
|
197
196
|
|
|
198
197
|
# check there are some valid time periods
|
|
199
|
-
if len(valid_time_periods
|
|
198
|
+
if len(valid_time_periods) == 0:
|
|
200
199
|
raise ValueError(f"No valid time periods found, {contiguous_time_periods=}")
|
|
201
200
|
|
|
202
201
|
# Fill out the contiguous time periods to get the t0 times
|
|
@@ -248,8 +247,8 @@ def slice_datasets_by_space(
|
|
|
248
247
|
width_pixels=sat_config.satellite_image_size_pixels_width,
|
|
249
248
|
)
|
|
250
249
|
|
|
251
|
-
|
|
252
|
-
|
|
250
|
+
if "gsp" in datasets_dict:
|
|
251
|
+
sliced_datasets_dict["gsp"] = datasets_dict["gsp"].sel(gsp_id=location.id)
|
|
253
252
|
|
|
254
253
|
return sliced_datasets_dict
|
|
255
254
|
|
|
@@ -314,33 +313,33 @@ def slice_datasets_by_time(
|
|
|
314
313
|
sat_dropout_time,
|
|
315
314
|
)
|
|
316
315
|
|
|
317
|
-
|
|
318
|
-
|
|
316
|
+
if "gsp" in datasets_dict:
|
|
317
|
+
gsp_config = config.input_data.gsp
|
|
319
318
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
319
|
+
sliced_datasets_dict["gsp_future"] = select_time_slice(
|
|
320
|
+
datasets_dict["gsp"],
|
|
321
|
+
t0,
|
|
322
|
+
sample_period_duration=minutes(gsp_config.time_resolution_minutes),
|
|
323
|
+
interval_start=minutes(30),
|
|
324
|
+
interval_end=minutes(gsp_config.forecast_minutes),
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
sliced_datasets_dict["gsp"] = select_time_slice(
|
|
328
|
+
datasets_dict["gsp"],
|
|
329
|
+
t0,
|
|
330
|
+
sample_period_duration=minutes(gsp_config.time_resolution_minutes),
|
|
331
|
+
interval_start=-minutes(gsp_config.history_minutes),
|
|
332
|
+
interval_end=minutes(0),
|
|
333
|
+
)
|
|
335
334
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
335
|
+
# Dropout on the GSP, but not the future GSP
|
|
336
|
+
gsp_dropout_time = draw_dropout_time(
|
|
337
|
+
t0,
|
|
338
|
+
dropout_timedeltas=minutes(gsp_config.dropout_timedeltas_minutes),
|
|
339
|
+
dropout_frac=gsp_config.dropout_fraction,
|
|
340
|
+
)
|
|
342
341
|
|
|
343
|
-
|
|
342
|
+
sliced_datasets_dict["gsp"] = apply_dropout_time(sliced_datasets_dict["gsp"], gsp_dropout_time)
|
|
344
343
|
|
|
345
344
|
return sliced_datasets_dict
|
|
346
345
|
|
|
@@ -379,23 +378,23 @@ def process_and_combine_datasets(
|
|
|
379
378
|
numpy_modalities.append({BatchKey.nwp: nwp_numpy_modalities})
|
|
380
379
|
|
|
381
380
|
if "sat" in dataset_dict:
|
|
382
|
-
#
|
|
383
|
-
|
|
384
|
-
|
|
381
|
+
# Satellite is already in the range [0-1] so no need to standardise
|
|
382
|
+
da_sat = dataset_dict["sat"]
|
|
383
|
+
|
|
385
384
|
# Convert to NumpyBatch
|
|
386
385
|
numpy_modalities.append(convert_satellite_to_numpy_batch(da_sat))
|
|
387
386
|
|
|
388
|
-
# GSP always assumed to be in data
|
|
389
387
|
gsp_config = config.input_data.gsp
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
388
|
+
if "gsp" in dataset_dict:
|
|
389
|
+
da_gsp = concat_xr_time_utc([dataset_dict["gsp"], dataset_dict["gsp_future"]])
|
|
390
|
+
da_gsp = normalize_gsp(da_gsp)
|
|
391
|
+
|
|
392
|
+
numpy_modalities.append(
|
|
393
|
+
convert_gsp_to_numpy_batch(
|
|
394
|
+
da_gsp,
|
|
395
|
+
t0_idx=gsp_config.history_minutes / gsp_config.time_resolution_minutes
|
|
396
|
+
)
|
|
397
397
|
)
|
|
398
|
-
)
|
|
399
398
|
|
|
400
399
|
# Make sun coords NumpyBatch
|
|
401
400
|
datetimes = pd.date_range(
|
|
@@ -440,6 +439,29 @@ def get_locations(ga_gsp: xr.DataArray) -> list[Location]:
|
|
|
440
439
|
return locations
|
|
441
440
|
|
|
442
441
|
|
|
442
|
+
def get_gsp_locations() -> list[Location]:
|
|
443
|
+
"""Get list of locations of all GSPs"""
|
|
444
|
+
locations = []
|
|
445
|
+
|
|
446
|
+
# Load UK GSP locations
|
|
447
|
+
df_gsp_loc = pd.read_csv(
|
|
448
|
+
pkg_resources.resource_filename(__name__, "../data/uk_gsp_locations.csv"),
|
|
449
|
+
index_col="gsp_id",
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
for gsp_id in np.arange(1, 318):
|
|
453
|
+
locations.append(
|
|
454
|
+
Location(
|
|
455
|
+
coordinate_system = "osgb",
|
|
456
|
+
x=df_gsp_loc.loc[gsp_id].x_osgb,
|
|
457
|
+
y=df_gsp_loc.loc[gsp_id].y_osgb,
|
|
458
|
+
id=gsp_id,
|
|
459
|
+
)
|
|
460
|
+
)
|
|
461
|
+
return locations
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
|
|
443
465
|
class PVNetUKRegionalDataset(Dataset):
|
|
444
466
|
def __init__(
|
|
445
467
|
self,
|
|
@@ -470,7 +492,7 @@ class PVNetUKRegionalDataset(Dataset):
|
|
|
470
492
|
valid_t0_times = valid_t0_times[valid_t0_times<=pd.Timestamp(end_time)]
|
|
471
493
|
|
|
472
494
|
# Construct list of locations to sample from
|
|
473
|
-
locations =
|
|
495
|
+
locations = get_gsp_locations()
|
|
474
496
|
|
|
475
497
|
# Construct a lookup for locations - useful for users to construct sample by GSP ID
|
|
476
498
|
location_lookup = {loc.id: loc for loc in locations}
|
|
@@ -540,6 +562,5 @@ class PVNetUKRegionalDataset(Dataset):
|
|
|
540
562
|
assert gsp_id in self.location_lookup
|
|
541
563
|
|
|
542
564
|
location = self.location_lookup[gsp_id]
|
|
543
|
-
|
|
544
565
|
|
|
545
|
-
return self._get_sample(t0, location)
|
|
566
|
+
return self._get_sample(t0, location)
|
|
@@ -10,7 +10,7 @@ install_requires = (this_directory / "requirements.txt").read_text().splitlines(
|
|
|
10
10
|
|
|
11
11
|
setup(
|
|
12
12
|
name="ocf_data_sampler",
|
|
13
|
-
version="0.0.
|
|
13
|
+
version="0.0.14",
|
|
14
14
|
license="MIT",
|
|
15
15
|
description="Sample from weather data for renewable energy prediction",
|
|
16
16
|
author="James Fulton, Peter Dudfield, and the Open Climate Fix team",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/data/uk_gsp_locations.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/nwp/providers/__init__.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/nwp/providers/ecmwf.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/nwp/providers/ukv.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/load/nwp/providers/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/numpy_batch/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/numpy_batch/satellite.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/numpy_batch/sun_position.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/select/fill_time_periods.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/select/select_spatial_slice.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/select/select_time_slice.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler/torch_datasets/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.12 → ocf_data_sampler-0.0.14}/ocf_data_sampler.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|