ocf-data-sampler 0.0.49__tar.gz → 0.0.51__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (79) hide show
  1. {ocf_data_sampler-0.0.49/ocf_data_sampler.egg-info → ocf_data_sampler-0.0.51}/PKG-INFO +1 -1
  2. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/numpy_sample/site.py +11 -2
  3. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/select/select_time_slice.py +7 -36
  4. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/select/time_slice_for_dataset.py +3 -7
  5. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
  6. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/pyproject.toml +1 -1
  7. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/select/test_select_time_slice.py +9 -6
  8. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/LICENSE +0 -0
  9. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/MANIFEST.in +0 -0
  10. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/README.md +0 -0
  11. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/__init__.py +0 -0
  12. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/config/__init__.py +0 -0
  13. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/config/load.py +0 -0
  14. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/config/model.py +0 -0
  15. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/config/save.py +0 -0
  16. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/constants.py +0 -0
  17. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  18. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/__init__.py +0 -0
  19. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/gsp.py +0 -0
  20. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/load_dataset.py +0 -0
  21. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  22. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  23. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  24. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  25. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  26. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  27. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/satellite.py +0 -0
  28. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/site.py +0 -0
  29. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/load/utils.py +0 -0
  30. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  31. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  32. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  33. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  34. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  35. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  36. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  37. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/select/__init__.py +0 -0
  38. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/select/dropout.py +0 -0
  39. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  40. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  41. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/select/geospatial.py +0 -0
  42. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/select/location.py +0 -0
  43. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  44. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/select/spatial_slice_for_dataset.py +0 -0
  45. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  46. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk_regional.py +0 -0
  47. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/torch_datasets/datasets/site.py +0 -0
  48. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  49. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  50. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler/utils.py +0 -0
  51. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  52. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  53. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler.egg-info/requires.txt +0 -0
  54. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  55. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/scripts/refactor_site.py +0 -0
  56. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/setup.cfg +0 -0
  57. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/__init__.py +0 -0
  58. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/config/test_config.py +0 -0
  59. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/config/test_save.py +0 -0
  60. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/conftest.py +0 -0
  61. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/load/test_load_gsp.py +0 -0
  62. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/load/test_load_nwp.py +0 -0
  63. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/load/test_load_satellite.py +0 -0
  64. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/load/test_load_sites.py +0 -0
  65. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/numpy_sample/test_collate.py +0 -0
  66. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/numpy_sample/test_datetime_features.py +0 -0
  67. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/numpy_sample/test_gsp.py +0 -0
  68. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/numpy_sample/test_nwp.py +0 -0
  69. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/numpy_sample/test_satellite.py +0 -0
  70. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/numpy_sample/test_sun_position.py +0 -0
  71. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/select/test_dropout.py +0 -0
  72. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/select/test_fill_time_periods.py +0 -0
  73. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/select/test_find_contiguous_time_periods.py +0 -0
  74. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/select/test_location.py +0 -0
  75. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/select/test_select_spatial_slice.py +0 -0
  76. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/torch_datasets/conftest.py +0 -0
  77. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/torch_datasets/test_merge_and_fill_utils.py +0 -0
  78. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/torch_datasets/test_pvnet_uk_regional.py +0 -0
  79. {ocf_data_sampler-0.0.49 → ocf_data_sampler-0.0.51}/tests/torch_datasets/test_site.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.49
3
+ Version: 0.0.51
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -9,11 +9,14 @@ class SiteSampleKey:
9
9
  capacity_kwp = "site_capacity_kwp"
10
10
  time_utc = "site_time_utc"
11
11
  t0_idx = "site_t0_idx"
12
+ id = "site_id"
12
13
  solar_azimuth = "site_solar_azimuth"
13
14
  solar_elevation = "site_solar_elevation"
14
- id = "site_id"
15
+ date_sin = "site_date_sin"
16
+ date_cos = "site_date_cos"
17
+ time_sin = "site_time_sin"
18
+ time_cos = "site_time_cos"
15
19
 
16
- # TODO update to include trig datetime + solar coords
17
20
  def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) -> dict:
18
21
  """Convert from Xarray to NumpySample"""
19
22
 
@@ -23,6 +26,12 @@ def convert_site_to_numpy_sample(da: xr.DataArray, t0_idx: int | None = None) ->
23
26
  SiteSampleKey.capacity_kwp: da.isel(time_utc=0)["capacity_kwp"].values,
24
27
  SiteSampleKey.time_utc: da["time_utc"].values.astype(float),
25
28
  SiteSampleKey.id: da["site_id"].values,
29
+ SiteSampleKey.solar_azimuth: da["solar_azimuth"].values,
30
+ SiteSampleKey.solar_elevation: da["solar_elevation"].values,
31
+ SiteSampleKey.date_sin: da["date_sin"].values,
32
+ SiteSampleKey.date_cos: da["date_cos"].values,
33
+ SiteSampleKey.time_sin: da["time_sin"].values,
34
+ SiteSampleKey.time_cos: da["time_cos"].values,
26
35
  }
27
36
 
28
37
  if t0_idx is not None:
@@ -2,40 +2,14 @@ import xarray as xr
2
2
  import pandas as pd
3
3
  import numpy as np
4
4
 
5
-
6
- def _sel_fillnan(
7
- da: xr.DataArray,
8
- start_dt: pd.Timestamp,
9
- end_dt: pd.Timestamp,
10
- sample_period_duration: pd.Timedelta,
11
- ) -> xr.DataArray:
12
- """Select a time slice from a DataArray, filling missing times with NaNs."""
13
- requested_times = pd.date_range(start_dt, end_dt, freq=sample_period_duration)
14
- return da.reindex(time_utc=requested_times)
15
-
16
-
17
- def _sel_default(
18
- da: xr.DataArray,
19
- start_dt: pd.Timestamp,
20
- end_dt: pd.Timestamp,
21
- sample_period_duration: pd.Timedelta,
22
- ) -> xr.DataArray:
23
- """Select a time slice from a DataArray, without filling missing times."""
24
- return da.sel(time_utc=slice(start_dt, end_dt))
25
-
26
-
27
5
  def select_time_slice(
28
6
  ds: xr.DataArray,
29
7
  t0: pd.Timestamp,
30
8
  interval_start: pd.Timedelta,
31
9
  interval_end: pd.Timedelta,
32
10
  sample_period_duration: pd.Timedelta,
33
- fill_selection: bool = False,
34
11
  ):
35
12
  """Select a time slice from a Dataset or DataArray."""
36
-
37
- _sel = _sel_fillnan if fill_selection else _sel_default
38
-
39
13
  t0_datetime_utc = pd.Timestamp(t0)
40
14
  start_dt = t0_datetime_utc + interval_start
41
15
  end_dt = t0_datetime_utc + interval_end
@@ -43,8 +17,7 @@ def select_time_slice(
43
17
  start_dt = start_dt.ceil(sample_period_duration)
44
18
  end_dt = end_dt.ceil(sample_period_duration)
45
19
 
46
- return _sel(ds, start_dt, end_dt, sample_period_duration)
47
-
20
+ return ds.sel(time_utc=slice(start_dt, end_dt))
48
21
 
49
22
  def select_time_slice_nwp(
50
23
  da: xr.DataArray,
@@ -57,7 +30,6 @@ def select_time_slice_nwp(
57
30
  accum_channels: list[str] = [],
58
31
  channel_dim_name: str = "channel",
59
32
  ):
60
-
61
33
  if dropout_timedeltas is not None:
62
34
  assert all(
63
35
  [t < pd.Timedelta(0) for t in dropout_timedeltas]
@@ -66,8 +38,7 @@ def select_time_slice_nwp(
66
38
  assert 0 <= dropout_frac <= 1
67
39
  consider_dropout = (dropout_timedeltas is not None) and dropout_frac > 0
68
40
 
69
-
70
- # The accumatation and non-accumulation channels
41
+ # The accumatation and non-accumulation channels
71
42
  accum_channels = np.intersect1d(
72
43
  da[channel_dim_name].values, accum_channels
73
44
  )
@@ -100,19 +71,19 @@ def select_time_slice_nwp(
100
71
 
101
72
  # Find the required steps for all target times
102
73
  steps = target_times - selected_init_times
103
-
74
+
104
75
  # We want one timestep for each target_time_hourly (obviously!) If we simply do
105
76
  # nwp.sel(init_time=init_times, step=steps) then we'll get the *product* of
106
77
  # init_times and steps, which is not what # we want! Instead, we use xarray's
107
78
  # vectorized-indexing mode by using a DataArray indexer. See the last example here:
108
79
  # https://docs.xarray.dev/en/latest/user-guide/indexing.html#more-advanced-indexing
80
+
109
81
  coords = {"target_time_utc": target_times}
110
82
  init_time_indexer = xr.DataArray(selected_init_times, coords=coords)
111
83
  step_indexer = xr.DataArray(steps, coords=coords)
112
84
 
113
85
  if len(accum_channels) == 0:
114
86
  da_sel = da.sel(step=step_indexer, init_time_utc=init_time_indexer)
115
-
116
87
  else:
117
88
  # First minimise the size of the dataset we are diffing
118
89
  # - find the init times we are slicing from
@@ -136,14 +107,14 @@ def select_time_slice_nwp(
136
107
 
137
108
  # Slice out the channels which need to be diffed
138
109
  da_accum = da_min.sel({channel_dim_name: accum_channels})
139
-
110
+
140
111
  # Take the diff and slice requested data
141
112
  da_accum = da_accum.diff(dim="step", label="lower")
142
113
  da_sel_accum = da_accum.sel(step=step_indexer, init_time_utc=init_time_indexer)
143
114
 
144
115
  # Join diffed and non-diffed variables
145
116
  da_sel = xr.concat([da_sel_non_accum, da_sel_accum], dim=channel_dim_name)
146
-
117
+
147
118
  # Reorder the variable back to the original order
148
119
  da_sel = da_sel.sel({channel_dim_name: da[channel_dim_name].values})
149
120
 
@@ -153,4 +124,4 @@ def select_time_slice_nwp(
153
124
  for v in da_sel[channel_dim_name].values
154
125
  ]
155
126
 
156
- return da_sel
127
+ return da_sel
@@ -6,7 +6,6 @@ from ocf_data_sampler.select.dropout import draw_dropout_time, apply_dropout_tim
6
6
  from ocf_data_sampler.select.select_time_slice import select_time_slice_nwp, select_time_slice
7
7
  from ocf_data_sampler.utils import minutes
8
8
 
9
-
10
9
  def slice_datasets_by_time(
11
10
  datasets_dict: dict,
12
11
  t0: pd.Timestamp,
@@ -23,11 +22,9 @@ def slice_datasets_by_time(
23
22
  sliced_datasets_dict = {}
24
23
 
25
24
  if "nwp" in datasets_dict:
26
-
27
25
  sliced_datasets_dict["nwp"] = {}
28
-
26
+
29
27
  for nwp_key, da_nwp in datasets_dict["nwp"].items():
30
-
31
28
  nwp_config = config.input_data.nwp[nwp_key]
32
29
 
33
30
  sliced_datasets_dict["nwp"][nwp_key] = select_time_slice_nwp(
@@ -42,7 +39,6 @@ def slice_datasets_by_time(
42
39
  )
43
40
 
44
41
  if "sat" in datasets_dict:
45
-
46
42
  sat_config = config.input_data.satellite
47
43
 
48
44
  sliced_datasets_dict["sat"] = select_time_slice(
@@ -76,7 +72,7 @@ def slice_datasets_by_time(
76
72
  interval_start=minutes(gsp_config.time_resolution_minutes),
77
73
  interval_end=minutes(gsp_config.interval_end_minutes),
78
74
  )
79
-
75
+
80
76
  sliced_datasets_dict["gsp"] = select_time_slice(
81
77
  datasets_dict["gsp"],
82
78
  t0,
@@ -96,7 +92,7 @@ def slice_datasets_by_time(
96
92
  sliced_datasets_dict["gsp"],
97
93
  gsp_dropout_time
98
94
  )
99
-
95
+
100
96
  if "site" in datasets_dict:
101
97
  site_config = config.input_data.site
102
98
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.0.49
3
+ Version: 0.0.51
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ocf_data_sampler"
7
- version = "0.0.49"
7
+ version = "0.0.51"
8
8
  license = { file = "LICENSE" }
9
9
  readme = "README.md"
10
10
  description = "Sample from weather data for renewable energy prediction"
@@ -86,11 +86,15 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
86
86
  freq = pd.Timedelta("5min")
87
87
 
88
88
  # The data is available between these times
89
- min_time = da_sat_like.time_utc.min()
90
- max_time = da_sat_like.time_utc.max()
91
-
92
- # Expect to return these timestamps from the selection
93
- expected_datetimes = pd.date_range(t0 + interval_start, t0 + interval_end, freq=freq)
89
+ min_time = pd.Timestamp(da_sat_like.time_utc.min().item())
90
+ max_time = pd.Timestamp(da_sat_like.time_utc.max().item())
91
+
92
+ # Expect to return these timestamps within the requested range
93
+ expected_datetimes = pd.date_range(
94
+ max(t0 + interval_start, min_time),
95
+ min(t0 + interval_end, max_time),
96
+ freq=freq,
97
+ )
94
98
 
95
99
  # Make the partially out of bounds selection
96
100
  sat_sample = select_time_slice(
@@ -99,7 +103,6 @@ def test_select_time_slice_out_of_bounds(da_sat_like, t0_str):
99
103
  interval_start=interval_start,
100
104
  interval_end=interval_end,
101
105
  sample_period_duration=freq,
102
- fill_selection=True
103
106
  )
104
107
 
105
108
  # Check the returned times are as expected