ocf-data-sampler 0.0.46__py3-none-any.whl → 0.0.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -13,7 +13,7 @@ class SiteSampleKey:
13
13
  solar_elevation = "site_solar_elevation"
14
14
  id = "site_id"
15
15
 
16
-
16
+ # TODO update to include trig datetime + solar coords
17
17
  def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
18
18
  """Convert from Xarray to NumpySample"""
19
19
 
@@ -22,6 +22,7 @@ def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) ->
22
22
  SiteSampleKey.generation: da.values,
23
23
  SiteSampleKey.capacity_kwp: da.isel(time_utc=0)["capacity_kwp"].values,
24
24
  SiteSampleKey.time_utc: da["time_utc"].values.astype(float),
25
+ SiteSampleKey.id: da["site_id"].values,
25
26
  }
26
27
 
27
28
  if t0_idx is not None:
@@ -3,7 +3,6 @@ import pandas as pd
3
3
  import numpy as np
4
4
 
5
5
 
6
-
7
6
  def _sel_fillnan(
8
7
  da: xr.DataArray,
9
8
  start_dt: pd.Timestamp,
@@ -25,17 +24,6 @@ def _sel_default(
25
24
  return da.sel(time_utc=slice(start_dt, end_dt))
26
25
 
27
26
 
28
- # TODO either implement this or remove it, which would tidy up the code
29
- def _sel_fillinterp(
30
- da: xr.DataArray,
31
- start_dt: pd.Timestamp,
32
- end_dt: pd.Timestamp,
33
- sample_period_duration: pd.Timedelta,
34
- ) -> xr.DataArray:
35
- """Select a time slice from a DataArray, filling missing times with linear interpolation."""
36
- return NotImplemented
37
-
38
-
39
27
  def select_time_slice(
40
28
  ds: xr.DataArray,
41
29
  t0: pd.Timestamp,
@@ -43,17 +31,10 @@ def select_time_slice(
43
31
  interval_end: pd.Timedelta,
44
32
  sample_period_duration: pd.Timedelta,
45
33
  fill_selection: bool = False,
46
- max_steps_gap: int = 0,
47
34
  ):
48
35
  """Select a time slice from a Dataset or DataArray."""
49
- assert max_steps_gap >= 0, "max_steps_gap must be >= 0 "
50
36
 
51
- if fill_selection and max_steps_gap == 0:
52
- _sel = _sel_fillnan
53
- elif fill_selection and max_steps_gap > 0:
54
- _sel = _sel_fillinterp
55
- else:
56
- _sel = _sel_default
37
+ _sel = _sel_fillnan if fill_selection else _sel_default
57
38
 
58
39
  t0_datetime_utc = pd.Timestamp(t0)
59
40
  start_dt = t0_datetime_utc + interval_start
@@ -51,7 +51,6 @@ def slice_datasets_by_time(
51
51
  sample_period_duration=minutes(sat_config.time_resolution_minutes),
52
52
  interval_start=minutes(sat_config.interval_start_minutes),
53
53
  interval_end=minutes(sat_config.interval_end_minutes),
54
- max_steps_gap=2,
55
54
  )
56
55
 
57
56
  # Randomly sample dropout
@@ -122,4 +121,4 @@ def slice_datasets_by_time(
122
121
  site_dropout_time,
123
122
  )
124
123
 
125
- return sliced_datasets_dict
124
+ return sliced_datasets_dict
@@ -257,6 +257,8 @@ class SitesDataset(Dataset):
257
257
  )
258
258
  combined_sample_dataset = xr.merge([combined_sample_dataset, sun_position_features_xr])
259
259
 
260
+ # TODO include t0_index in xr dataset?
261
+
260
262
  # Fill any nan values
261
263
  return combined_sample_dataset.fillna(0.0)
262
264
 
@@ -317,6 +319,26 @@ class SitesDataset(Dataset):
317
319
 
318
320
  # ----- functions to load presaved samples ------
319
321
 
322
+ def convert_netcdf_to_numpy_sample(ds: xr.Dataset) -> dict:
323
+ """Convert a netcdf dataset to a numpy sample"""
324
+
325
+ # convert the single dataset to a dict of arrays
326
+ sample_dict = convert_from_dataset_to_dict_datasets(ds)
327
+
328
+ if "satellite" in sample_dict:
329
+ # rename satellite to satellite actual # TODO this could be improves
330
+ sample_dict["sat"] = sample_dict.pop("satellite")
331
+
332
+ # process and combine the datasets
333
+ sample = convert_to_numpy_and_combine(
334
+ dataset_dict=sample_dict,
335
+ )
336
+
337
+ # TODO think about normalization, maybe its done not in sample creation, maybe its done afterwards,
338
+ # to allow it to be flexible
339
+
340
+ return sample
341
+
320
342
  def convert_from_dataset_to_dict_datasets(combined_dataset: xr.Dataset) -> dict[str, xr.DataArray]:
321
343
  """
322
344
  Convert a combined sample dataset to a dict of datasets for each input
@@ -360,26 +382,6 @@ def nest_nwp_source_dict(d: dict, sep: str = "/") -> dict:
360
382
  new_dict["nwp"] = nwp_subdict
361
383
  return new_dict
362
384
 
363
- def convert_netcdf_to_numpy_sample(ds: xr.Dataset) -> dict:
364
- """Convert a netcdf dataset to a numpy sample"""
365
-
366
- # convert the single dataset to a dict of arrays
367
- sample_dict = convert_from_dataset_to_dict_datasets(ds)
368
-
369
- if "satellite" in sample_dict:
370
- # rename satellite to satellite actual # TODO this could be improves
371
- sample_dict["sat"] = sample_dict.pop("satellite")
372
-
373
- # process and combine the datasets
374
- sample = convert_to_numpy_and_combine(
375
- dataset_dict=sample_dict,
376
- )
377
-
378
- # TODO think about normalization, maybe its done not in sample creation, maybe its done afterwards,
379
- # to allow it to be flexible
380
-
381
- return sample
382
-
383
385
  def convert_to_numpy_and_combine(
384
386
  dataset_dict: dict,
385
387
  ) -> dict:
@@ -406,7 +408,6 @@ def convert_to_numpy_and_combine(
406
408
 
407
409
  if "site" in dataset_dict:
408
410
  da_sites = dataset_dict["site"]
409
- sites_sample = convert_site_to_numpy_sample(da_sites)
410
411
 
411
412
  numpy_modalities.append(
412
413
  convert_site_to_numpy_sample(
@@ -414,8 +415,6 @@ def convert_to_numpy_and_combine(
414
415
  )
415
416
  )
416
417
 
417
- numpy_modalities.append(sites_sample)
418
-
419
418
  # Combine all the modalities and fill NaNs
420
419
  combined_sample = merge_dicts(numpy_modalities)
421
420
  combined_sample = fill_nans_in_arrays(combined_sample)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.46
3
+ Version: 0.0.48
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -24,7 +24,7 @@ ocf_data_sampler/numpy_sample/datetime_features.py,sha256=U-9uRplfZ7VYFA4qBduI8O
24
24
  ocf_data_sampler/numpy_sample/gsp.py,sha256=5UaWO_aGRRVQo82wnDaT4zBKHihOnIsXiwgPjM8vGFM,1005
25
25
  ocf_data_sampler/numpy_sample/nwp.py,sha256=_seQNWsut3IzPsrpipqImjnaM3XNHZCy5_5be6syivk,1297
26
26
  ocf_data_sampler/numpy_sample/satellite.py,sha256=8OaTvkPjzSjotcdKsa6BKmmlBKDBunbhDN4Pjo0Grxs,910
27
- ocf_data_sampler/numpy_sample/site.py,sha256=PIfmCtPA37dqpC8GArkryVqFrNAwqacj0iW2ikBOdSk,789
27
+ ocf_data_sampler/numpy_sample/site.py,sha256=cOVpFN_EVRD0d4TJtmPdNYcWjiWuWr8eswktC97KR8Q,890
28
28
  ocf_data_sampler/numpy_sample/sun_position.py,sha256=UklhucCxCT6GMlAhCWL6c4cfWrdc1cWgegrYaqUoHOY,1611
29
29
  ocf_data_sampler/select/__init__.py,sha256=E4AJulEbO2K-o0UlG1fgaEteuf_1ZFjHTvrotXSb4YU,332
30
30
  ocf_data_sampler/select/dropout.py,sha256=HCx5Wzk8Oh2Z9vV94Jy-ALJsHtGduwvMaQOleQXp5z0,1142
@@ -33,12 +33,12 @@ ocf_data_sampler/select/find_contiguous_time_periods.py,sha256=q7IaNfX95A3z9XHqb
33
33
  ocf_data_sampler/select/geospatial.py,sha256=4xL-9y674jjoaXeqE52NHCHVfknciE4OEGsZtn9DvP4,4911
34
34
  ocf_data_sampler/select/location.py,sha256=26Y5ZjfFngShBwXieuWSoOA-RLaRzci4TTmcDk3Wg7U,2015
35
35
  ocf_data_sampler/select/select_spatial_slice.py,sha256=WNxwur9Q5oetvogATw8-hNejDuEwrXHzuZIovFDjNJA,11488
36
- ocf_data_sampler/select/select_time_slice.py,sha256=D5P_cSvnv8Qs49K5au7lPxDr9U_VmDn42s5leMzHt0k,6122
36
+ ocf_data_sampler/select/select_time_slice.py,sha256=gFeuAuV2C7DJMHgiTHqjRUXOdfI-iraVF5NIzWhewFQ,5524
37
37
  ocf_data_sampler/select/spatial_slice_for_dataset.py,sha256=3tRrMBXr7s4CnClbVSIq7hpls3H4Y3qYTDwswcxCCCE,1763
38
- ocf_data_sampler/select/time_slice_for_dataset.py,sha256=LMw8KnOCKnPjD0m4UubAWERpaiQtzRKkI2cSh5a0A-M,4335
38
+ ocf_data_sampler/select/time_slice_for_dataset.py,sha256=BFjNwWAzhcb1hpqx7UPi5RF9WWt15owbZp1WB-uGA6Q,4305
39
39
  ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=nJUa2KzVa84ZoM0PT2AbDz26ennmAYc7M7WJVfypPMs,85
40
40
  ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py,sha256=xxeX4Js9LQpydehi3BS7k9psqkYGzgJuM17uTYux40M,8742
41
- ocf_data_sampler/torch_datasets/datasets/site.py,sha256=7gTtXG3DFzs_0XlYK0oleFPT-Gena_NSngcG_FAnY54,15394
41
+ ocf_data_sampler/torch_datasets/datasets/site.py,sha256=75M0oDstOLyLZBySVIS6fLJSbEjfxcWBlgGP_ewui7s,15334
42
42
  ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=hIbekql64eXsNDFIoEc--GWxwdVWrh2qKegdOi70Bow,874
43
43
  ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=Qo65qUHtle_bW5tLTYr7empHTRv-lpjvfx_6GNJj3Xg,4371
44
44
  scripts/refactor_site.py,sha256=asZ27hQ4IyXgCCUaFJqcz1ObBNcV2W3ywqHBpSXA_fc,1728
@@ -66,8 +66,8 @@ tests/torch_datasets/conftest.py,sha256=eRCzHE7cxS4AoskExkCGFDBeqItktAYNAdkfpMoF
66
66
  tests/torch_datasets/test_merge_and_fill_utils.py,sha256=ueA0A7gZaWEgNdsU8p3CnKuvSnlleTUjEhSw2HUUROM,1229
67
67
  tests/torch_datasets/test_pvnet_uk_regional.py,sha256=FCiFueeFqrsXe7gWguSjBz5ZeUrvyhGbGw81gaVvkHM,5087
68
68
  tests/torch_datasets/test_site.py,sha256=0tnjgx6z4VlzjoF_V2p3Y2t2Z1d0o_07Vwb-FH_c3tU,4640
69
- ocf_data_sampler-0.0.46.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
70
- ocf_data_sampler-0.0.46.dist-info/METADATA,sha256=S8ScJ8z3O0O5qhgGZmdI0Ugan2Yz4dH0nGj9R8N1sgs,11788
71
- ocf_data_sampler-0.0.46.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
72
- ocf_data_sampler-0.0.46.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
73
- ocf_data_sampler-0.0.46.dist-info/RECORD,,
69
+ ocf_data_sampler-0.0.48.dist-info/LICENSE,sha256=F-Q3UFCR-BECSocV55BFDpn4YKxve9PKrm-lTt6o_Tg,1073
70
+ ocf_data_sampler-0.0.48.dist-info/METADATA,sha256=Wl37iFa0xFXGRsHNEHLn7WYeXbWvbg8SGgLXcksExQA,11788
71
+ ocf_data_sampler-0.0.48.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
72
+ ocf_data_sampler-0.0.48.dist-info/top_level.txt,sha256=Faob6N6cFdPc5eUpCTYcXgCaNhi4XLLteUL5W5ayYmg,31
73
+ ocf_data_sampler-0.0.48.dist-info/RECORD,,