ocf-data-sampler 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import numpy as np
4
4
  import pandas as pd
5
5
  import xarray as xr
6
6
  from torch.utils.data import Dataset
7
-
7
+ import pkg_resources
8
8
 
9
9
  from ocf_data_sampler.load.gsp import open_gsp
10
10
  from ocf_data_sampler.load.nwp import open_nwp
@@ -37,8 +37,6 @@ from ocf_datapipes.utils.geospatial import osgb_to_lon_lat
37
37
  from ocf_datapipes.utils.consts import (
38
38
  NWP_MEANS,
39
39
  NWP_STDS,
40
- RSS_MEAN,
41
- RSS_STD,
42
40
  )
43
41
 
44
42
  from ocf_datapipes.training.common import concat_xr_time_utc, normalize_gsp
@@ -69,11 +67,12 @@ def get_dataset_dict(config: Configuration) -> dict[xr.DataArray, dict[xr.DataAr
69
67
 
70
68
  datasets_dict = {}
71
69
 
72
- # We always assume GSP will be included
73
- da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path)
70
+ # Load GSP data unless the path is None
71
+ if in_config.gsp.gsp_zarr_path:
72
+ da_gsp = open_gsp(zarr_path=in_config.gsp.gsp_zarr_path)
74
73
 
75
- # Remove national GSP
76
- datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
74
+ # Remove national GSP
75
+ datasets_dict["gsp"] = da_gsp.sel(gsp_id=slice(1, None))
77
76
 
78
77
  # Load NWP data if in config
79
78
  if in_config.nwp:
@@ -172,19 +171,19 @@ def find_valid_t0_times(
172
171
 
173
172
  contiguous_time_periods['sat'] = time_periods
174
173
 
175
- # GSP always assumed to be in data
176
- gsp_config = config.input_data.gsp
174
+ if "gsp" in datasets_dict:
175
+ gsp_config = config.input_data.gsp
177
176
 
178
- time_periods = find_contiguous_t0_periods(
179
- pd.DatetimeIndex(datasets_dict["gsp"]["time_utc"]),
180
- sample_period_duration=minutes(gsp_config.time_resolution_minutes),
181
- history_duration=minutes(gsp_config.history_minutes),
182
- forecast_duration=minutes(gsp_config.forecast_minutes),
183
- )
177
+ time_periods = find_contiguous_t0_periods(
178
+ pd.DatetimeIndex(datasets_dict["gsp"]["time_utc"]),
179
+ sample_period_duration=minutes(gsp_config.time_resolution_minutes),
180
+ history_duration=minutes(gsp_config.history_minutes),
181
+ forecast_duration=minutes(gsp_config.forecast_minutes),
182
+ )
184
183
 
185
- contiguous_time_periods['gsp'] = time_periods
184
+ contiguous_time_periods['gsp'] = time_periods
186
185
 
187
- # just get the values (no the keys)
186
+ # just get the values (not the keys)
188
187
  contiguous_time_periods_values = list(contiguous_time_periods.values())
189
188
 
190
189
  # Find joint overlapping contiguous time periods
@@ -248,8 +247,8 @@ def slice_datasets_by_space(
248
247
  width_pixels=sat_config.satellite_image_size_pixels_width,
249
248
  )
250
249
 
251
- # GSP always assumed to be in data
252
- sliced_datasets_dict["gsp"] = datasets_dict["gsp"].sel(gsp_id=location.id)
250
+ if "gsp" in datasets_dict:
251
+ sliced_datasets_dict["gsp"] = datasets_dict["gsp"].sel(gsp_id=location.id)
253
252
 
254
253
  return sliced_datasets_dict
255
254
 
@@ -314,33 +313,33 @@ def slice_datasets_by_time(
314
313
  sat_dropout_time,
315
314
  )
316
315
 
317
- # GSP always assumed to be included
318
- gsp_config = config.input_data.gsp
316
+ if "gsp" in datasets_dict:
317
+ gsp_config = config.input_data.gsp
319
318
 
320
- sliced_datasets_dict["gsp_future"] = select_time_slice(
321
- datasets_dict["gsp"],
322
- t0,
323
- sample_period_duration=minutes(gsp_config.time_resolution_minutes),
324
- interval_start=minutes(30),
325
- interval_end=minutes(gsp_config.forecast_minutes),
326
- )
327
-
328
- sliced_datasets_dict["gsp"] = select_time_slice(
329
- datasets_dict["gsp"],
330
- t0,
331
- sample_period_duration=minutes(gsp_config.time_resolution_minutes),
332
- interval_start=-minutes(gsp_config.history_minutes),
333
- interval_end=minutes(0),
334
- )
319
+ sliced_datasets_dict["gsp_future"] = select_time_slice(
320
+ datasets_dict["gsp"],
321
+ t0,
322
+ sample_period_duration=minutes(gsp_config.time_resolution_minutes),
323
+ interval_start=minutes(30),
324
+ interval_end=minutes(gsp_config.forecast_minutes),
325
+ )
326
+
327
+ sliced_datasets_dict["gsp"] = select_time_slice(
328
+ datasets_dict["gsp"],
329
+ t0,
330
+ sample_period_duration=minutes(gsp_config.time_resolution_minutes),
331
+ interval_start=-minutes(gsp_config.history_minutes),
332
+ interval_end=minutes(0),
333
+ )
335
334
 
336
- # Dropout on the GSP, but not the future GSP
337
- gsp_dropout_time = draw_dropout_time(
338
- t0,
339
- dropout_timedeltas=minutes(gsp_config.dropout_timedeltas_minutes),
340
- dropout_frac=gsp_config.dropout_fraction,
341
- )
335
+ # Dropout on the GSP, but not the future GSP
336
+ gsp_dropout_time = draw_dropout_time(
337
+ t0,
338
+ dropout_timedeltas=minutes(gsp_config.dropout_timedeltas_minutes),
339
+ dropout_frac=gsp_config.dropout_fraction,
340
+ )
342
341
 
343
- sliced_datasets_dict["gsp"] = apply_dropout_time(sliced_datasets_dict["gsp"], gsp_dropout_time)
342
+ sliced_datasets_dict["gsp"] = apply_dropout_time(sliced_datasets_dict["gsp"], gsp_dropout_time)
344
343
 
345
344
  return sliced_datasets_dict
346
345
 
@@ -379,23 +378,23 @@ def process_and_combine_datasets(
379
378
  numpy_modalities.append({BatchKey.nwp: nwp_numpy_modalities})
380
379
 
381
380
  if "sat" in dataset_dict:
382
- # Standardise
383
- # TODO: Since satellite is in range 0-1 already, so we don't need to standardize
384
- da_sat = (dataset_dict["sat"] - RSS_MEAN) / RSS_STD
381
+ # Satellite is already in the range [0-1] so no need to standardise
382
+ da_sat = dataset_dict["sat"]
383
+
385
384
  # Convert to NumpyBatch
386
385
  numpy_modalities.append(convert_satellite_to_numpy_batch(da_sat))
387
386
 
388
- # GSP always assumed to be in data
389
387
  gsp_config = config.input_data.gsp
390
- da_gsp = concat_xr_time_utc([dataset_dict["gsp"], dataset_dict["gsp_future"]])
391
- da_gsp = normalize_gsp(da_gsp)
392
-
393
- numpy_modalities.append(
394
- convert_gsp_to_numpy_batch(
395
- da_gsp,
396
- t0_idx=gsp_config.history_minutes / gsp_config.time_resolution_minutes
388
+ if "gsp" in dataset_dict:
389
+ da_gsp = concat_xr_time_utc([dataset_dict["gsp"], dataset_dict["gsp_future"]])
390
+ da_gsp = normalize_gsp(da_gsp)
391
+
392
+ numpy_modalities.append(
393
+ convert_gsp_to_numpy_batch(
394
+ da_gsp,
395
+ t0_idx=gsp_config.history_minutes / gsp_config.time_resolution_minutes
396
+ )
397
397
  )
398
- )
399
398
 
400
399
  # Make sun coords NumpyBatch
401
400
  datetimes = pd.date_range(
@@ -440,6 +439,29 @@ def get_locations(ga_gsp: xr.DataArray) -> list[Location]:
440
439
  return locations
441
440
 
442
441
 
442
+ def get_gsp_locations() -> list[Location]:
443
+ """Get list of locations of all GSPs"""
444
+ locations = []
445
+
446
+ # Load UK GSP locations
447
+ df_gsp_loc = pd.read_csv(
448
+ pkg_resources.resource_filename(__name__, "../data/uk_gsp_locations.csv"),
449
+ index_col="gsp_id",
450
+ )
451
+
452
+ for gsp_id in np.arange(1, 318):
453
+ locations.append(
454
+ Location(
455
+ coordinate_system = "osgb",
456
+ x=df_gsp_loc.loc[gsp_id].x_osgb,
457
+ y=df_gsp_loc.loc[gsp_id].y_osgb,
458
+ id=gsp_id,
459
+ )
460
+ )
461
+ return locations
462
+
463
+
464
+
443
465
  class PVNetUKRegionalDataset(Dataset):
444
466
  def __init__(
445
467
  self,
@@ -470,7 +492,7 @@ class PVNetUKRegionalDataset(Dataset):
470
492
  valid_t0_times = valid_t0_times[valid_t0_times<=pd.Timestamp(end_time)]
471
493
 
472
494
  # Construct list of locations to sample from
473
- locations = get_locations(datasets_dict["gsp"])
495
+ locations = get_gsp_locations()
474
496
 
475
497
  # Construct a lookup for locations - useful for users to construct sample by GSP ID
476
498
  location_lookup = {loc.id: loc for loc in locations}
@@ -540,6 +562,5 @@ class PVNetUKRegionalDataset(Dataset):
540
562
  assert gsp_id in self.location_lookup
541
563
 
542
564
  location = self.location_lookup[gsp_id]
543
-
544
565
 
545
- return self._get_sample(t0, location)
566
+ return self._get_sample(t0, location)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.13
3
+ Version: 0.0.14
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -22,11 +22,11 @@ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=6ioB8LeFpFNBMgKDx
22
22
  ocf_data_sampler/select/select_spatial_slice.py,sha256=7BSzOFPMSBWpBWXSajWTfI8luUVsSgh4zN-rkr-AuUs,11470
23
23
  ocf_data_sampler/select/select_time_slice.py,sha256=XuksC9N03c5rV9OeWtxjGuoGyeJJGy4JMJe3w7m6oaw,6654
24
24
  ocf_data_sampler/torch_datasets/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
25
- ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=1jo-5KhGhFv6mb5C9HHTn_fiTHgaFgnuifA_cLt4JYs,17823
25
+ ocf_data_sampler/torch_datasets/pvnet_uk_regional.py,sha256=haljV4FAZI4-Qf-65nq-JIJOIQNhR6YRncjTBWMYkY4,18502
26
26
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  tests/conftest.py,sha256=OcArgF60paroZQqoP7xExRBF34nEyMuXd7dS7hD6p3w,5393
28
- ocf_data_sampler-0.0.13.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
29
- ocf_data_sampler-0.0.13.dist-info/METADATA,sha256=8G8qD019wgJTz9M2594c5Zm19aIDWxqvl1smiTgEJT4,588
30
- ocf_data_sampler-0.0.13.dist-info/WHEEL,sha256=Mdi9PDNwEZptOjTlUcAth7XJDFtKrHYaQMPulZeBCiQ,91
31
- ocf_data_sampler-0.0.13.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
32
- ocf_data_sampler-0.0.13.dist-info/RECORD,,
28
+ ocf_data_sampler-0.0.14.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
29
+ ocf_data_sampler-0.0.14.dist-info/METADATA,sha256=3aN9lKWnmbNdjsF-J69AKAPwvc2WwUDkb0Nnyorr92c,588
30
+ ocf_data_sampler-0.0.14.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
31
+ ocf_data_sampler-0.0.14.dist-info/top_level.txt,sha256=KaQn5qzkJGJP6hKWqsVAc9t0cMLjVvSTk8-kTrW79SA,23
32
+ ocf_data_sampler-0.0.14.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (73.0.1)
2
+ Generator: setuptools (74.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5