ocf-data-sampler 0.0.17__tar.gz → 0.0.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ocf-data-sampler might be problematic. Click here for more details.
- {ocf_data_sampler-0.0.17/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.19}/PKG-INFO +1 -1
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/numpy_batch/gsp.py +0 -3
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/torch_datasets/pvnet_uk_regional.py +40 -22
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/setup.py +1 -1
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/LICENSE +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/MANIFEST.in +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/README.md +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/__init__.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/__init__.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/gsp.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/nwp/__init__.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/nwp/nwp.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/satellite.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/utils.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/numpy_batch/__init__.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/numpy_batch/nwp.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/numpy_batch/satellite.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/numpy_batch/sun_position.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/select/__init__.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/select/dropout.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/select/fill_time_periods.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/select/select_time_slice.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/torch_datasets/__init__.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler.egg-info/requires.txt +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler.egg-info/top_level.txt +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/requirements.txt +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/setup.cfg +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/tests/__init__.py +0 -0
- {ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/tests/conftest.py +0 -0
|
@@ -9,12 +9,9 @@ def convert_gsp_to_numpy_batch(da: xr.DataArray, t0_idx: int | None = None) -> N
|
|
|
9
9
|
|
|
10
10
|
example: NumpyBatch = {
|
|
11
11
|
BatchKey.gsp: da.values,
|
|
12
|
-
BatchKey.gsp_id: da.gsp_id.values,
|
|
13
12
|
BatchKey.gsp_nominal_capacity_mwp: da.isel(time_utc=0)["nominal_capacity_mwp"].values,
|
|
14
13
|
BatchKey.gsp_effective_capacity_mwp: da.isel(time_utc=0)["effective_capacity_mwp"].values,
|
|
15
14
|
BatchKey.gsp_time_utc: da["time_utc"].values.astype(float),
|
|
16
|
-
BatchKey.gsp_x_osgb: da.x_osgb.item(),
|
|
17
|
-
BatchKey.gsp_y_osgb: da.y_osgb.item(),
|
|
18
15
|
}
|
|
19
16
|
|
|
20
17
|
if t0_idx is not None:
|
|
@@ -69,7 +69,7 @@ def get_dataset_dict(config: Configuration) -> dict[xr.DataArray, dict[xr.DataAr
|
|
|
69
69
|
|
|
70
70
|
# Load GSP data unless the path is None
|
|
71
71
|
if in_config.gsp.gsp_zarr_path:
|
|
72
|
-
da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path)
|
|
72
|
+
da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path).compute()
|
|
73
73
|
|
|
74
74
|
# Remove national GSP
|
|
75
75
|
datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
|
|
@@ -344,6 +344,24 @@ def slice_datasets_by_time(
|
|
|
344
344
|
return sliced_datasets_dict
|
|
345
345
|
|
|
346
346
|
|
|
347
|
+
def fill_nans_in_arrays(batch: NumpyBatch) -> NumpyBatch:
|
|
348
|
+
"""Fills all NaN values in each np.ndarray in the batch dictionary with zeros.
|
|
349
|
+
|
|
350
|
+
Operation is performed in-place on the batch.
|
|
351
|
+
"""
|
|
352
|
+
for k, v in batch.items():
|
|
353
|
+
if isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.number):
|
|
354
|
+
if np.isnan(v).any():
|
|
355
|
+
batch[k] = np.nan_to_num(v, copy=False, nan=0.0)
|
|
356
|
+
|
|
357
|
+
# Recursion is included to reach NWP arrays in subdict
|
|
358
|
+
elif isinstance(v, dict):
|
|
359
|
+
fill_nans_in_arrays(v)
|
|
360
|
+
|
|
361
|
+
return batch
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
|
|
347
365
|
def merge_dicts(list_of_dicts: list[dict]) -> dict:
|
|
348
366
|
"""Merge a list of dictionaries into a single dictionary"""
|
|
349
367
|
# TODO: This doesn't account for duplicate keys, which will be overwritten
|
|
@@ -385,6 +403,7 @@ def process_and_combine_datasets(
|
|
|
385
403
|
numpy_modalities.append(convert_satellite_to_numpy_batch(da_sat))
|
|
386
404
|
|
|
387
405
|
gsp_config = config.input_data.gsp
|
|
406
|
+
|
|
388
407
|
if "gsp" in dataset_dict:
|
|
389
408
|
da_gsp = concat_xr_time_utc([dataset_dict["gsp"], dataset_dict["gsp_future"]])
|
|
390
409
|
da_gsp = normalize_gsp(da_gsp)
|
|
@@ -406,9 +425,18 @@ def process_and_combine_datasets(
|
|
|
406
425
|
lon, lat = osgb_to_lon_lat(location.x, location.y)
|
|
407
426
|
|
|
408
427
|
numpy_modalities.append(make_sun_position_numpy_batch(datetimes, lon, lat))
|
|
409
|
-
|
|
410
|
-
#
|
|
428
|
+
|
|
429
|
+
# Add coordinate data
|
|
430
|
+
# TODO: Do we need all of these?
|
|
431
|
+
numpy_modalities.append({
|
|
432
|
+
BatchKey.gsp_id: location.id,
|
|
433
|
+
BatchKey.gsp_x_osgb: location.x,
|
|
434
|
+
BatchKey.gsp_y_osgb: location.y,
|
|
435
|
+
})
|
|
436
|
+
|
|
437
|
+
# Combine all the modalities and fill NaNs
|
|
411
438
|
combined_sample = merge_dicts(numpy_modalities)
|
|
439
|
+
combined_sample = fill_nans_in_arrays(combined_sample)
|
|
412
440
|
|
|
413
441
|
return combined_sample
|
|
414
442
|
|
|
@@ -423,24 +451,12 @@ def compute(xarray_dict: dict) -> dict:
|
|
|
423
451
|
return xarray_dict
|
|
424
452
|
|
|
425
453
|
|
|
426
|
-
def
|
|
427
|
-
"""Get list of locations of GSP"""
|
|
428
|
-
locations = []
|
|
429
|
-
for gsp_id in ga_gsp.gsp_id.values:
|
|
430
|
-
da = ga_gsp.sel(gsp_id=gsp_id)
|
|
431
|
-
locations.append(
|
|
432
|
-
Location(
|
|
433
|
-
coordinate_system = "osgb",
|
|
434
|
-
x=da.x_osgb.item(),
|
|
435
|
-
y=da.y_osgb.item(),
|
|
436
|
-
id=gsp_id,
|
|
437
|
-
)
|
|
438
|
-
)
|
|
439
|
-
return locations
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
def get_gsp_locations() -> list[Location]:
|
|
454
|
+
def get_gsp_locations(gsp_ids: list[int] | None = None) -> list[Location]:
|
|
443
455
|
"""Get list of locations of all GSPs"""
|
|
456
|
+
|
|
457
|
+
if gsp_ids is None:
|
|
458
|
+
gsp_ids = [i for i in range(1, 318)]
|
|
459
|
+
|
|
444
460
|
locations = []
|
|
445
461
|
|
|
446
462
|
# Load UK GSP locations
|
|
@@ -449,7 +465,7 @@ def get_gsp_locations() -> list[Location]:
|
|
|
449
465
|
index_col="gsp_id",
|
|
450
466
|
)
|
|
451
467
|
|
|
452
|
-
for gsp_id in
|
|
468
|
+
for gsp_id in gsp_ids:
|
|
453
469
|
locations.append(
|
|
454
470
|
Location(
|
|
455
471
|
coordinate_system = "osgb",
|
|
@@ -468,6 +484,7 @@ class PVNetUKRegionalDataset(Dataset):
|
|
|
468
484
|
config_filename: str,
|
|
469
485
|
start_time: str | None = None,
|
|
470
486
|
end_time: str| None = None,
|
|
487
|
+
gsp_ids: list[int] | None = None,
|
|
471
488
|
):
|
|
472
489
|
"""A torch Dataset for creating PVNet UK GSP samples
|
|
473
490
|
|
|
@@ -475,6 +492,7 @@ class PVNetUKRegionalDataset(Dataset):
|
|
|
475
492
|
config_filename: Path to the configuration file
|
|
476
493
|
start_time: Limit the init-times to be after this
|
|
477
494
|
end_time: Limit the init-times to be before this
|
|
495
|
+
gsp_ids: List of GSP IDs to create samples for. Defaults to all
|
|
478
496
|
"""
|
|
479
497
|
|
|
480
498
|
config = load_yaml_configuration(config_filename)
|
|
@@ -492,7 +510,7 @@ class PVNetUKRegionalDataset(Dataset):
|
|
|
492
510
|
valid_t0_times = valid_t0_times[valid_t0_times<=pd.Timestamp(end_time)]
|
|
493
511
|
|
|
494
512
|
# Construct list of locations to sample from
|
|
495
|
-
locations = get_gsp_locations()
|
|
513
|
+
locations = get_gsp_locations(gsp_ids)
|
|
496
514
|
|
|
497
515
|
# Construct a lookup for locations - useful for users to construct sample by GSP ID
|
|
498
516
|
location_lookup = {loc.id: loc for loc in locations}
|
|
@@ -10,7 +10,7 @@ install_requires = (this_directory / "requirements.txt").read_text().splitlines(
|
|
|
10
10
|
|
|
11
11
|
setup(
|
|
12
12
|
name="ocf_data_sampler",
|
|
13
|
-
version="0.0.
|
|
13
|
+
version="0.0.19",
|
|
14
14
|
license="MIT",
|
|
15
15
|
description="Sample from weather data for renewable energy prediction",
|
|
16
16
|
author="James Fulton, Peter Dudfield, and the Open Climate Fix team",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/data/uk_gsp_locations.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/nwp/providers/__init__.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/nwp/providers/ecmwf.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/nwp/providers/ukv.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/load/nwp/providers/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/numpy_batch/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/numpy_batch/satellite.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/numpy_batch/sun_position.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/select/fill_time_periods.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/select/select_spatial_slice.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/select/select_time_slice.py
RENAMED
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler/torch_datasets/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{ocf_data_sampler-0.0.17 → ocf_data_sampler-0.0.19}/ocf_data_sampler.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|