ocf-data-sampler 0.5.34__py3-none-any.whl → 0.5.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

@@ -93,6 +93,11 @@ class DropoutMixin(Base):
93
93
  "floats (probability that dropout of the corresponding timedelta is applied)",
94
94
  )
95
95
 
96
+ dropout_value: float = Field(
97
+ default=0.0,
98
+ description="The value to use for dropped out values. "
99
+ "Idea is to use -1, but to be backwards comptaible we've put the default as 0")
100
+
96
101
  @field_validator("dropout_timedeltas_minutes")
97
102
  def dropout_timedeltas_minutes_negative(cls, v: list[int]) -> list[int]:
98
103
  """Validate 'dropout_timedeltas_minutes'."""
@@ -206,7 +206,7 @@ class AbstractPVNetUKDataset(PickleCacheMixin, Dataset):
206
206
 
207
207
  # Combine all the modalities and fill NaNs
208
208
  combined_sample = merge_dicts(numpy_modalities)
209
- combined_sample = fill_nans_in_arrays(combined_sample)
209
+ combined_sample = fill_nans_in_arrays(combined_sample, config=self.config)
210
210
 
211
211
  return combined_sample
212
212
 
@@ -139,7 +139,7 @@ def process_and_combine_datasets(
139
139
 
140
140
  # Combine all the modalities and fill NaNs
141
141
  combined_sample = merge_dicts(numpy_modalities)
142
- combined_sample = fill_nans_in_arrays(combined_sample)
142
+ combined_sample = fill_nans_in_arrays(combined_sample, config=config)
143
143
 
144
144
  return combined_sample
145
145
 
@@ -330,7 +330,12 @@ class SitesDatasetConcurrent(PickleCacheMixin, Dataset):
330
330
  self.config = config
331
331
 
332
332
  # get all locations
333
- self.locations = get_locations(datasets_dict["site"])
333
+ locations = get_locations(datasets_dict["site"])
334
+ self.locations = add_alterate_coordinate_projections(
335
+ locations,
336
+ datasets_dict,
337
+ primary_coords="lon_lat",
338
+ )
334
339
 
335
340
  # Get t0 times where all input data is available
336
341
  valid_t0s = self.find_valid_t0s(datasets_dict)
@@ -2,6 +2,12 @@
2
2
 
3
3
  import numpy as np
4
4
 
5
+ from ocf_data_sampler.config.model import Configuration
6
+ from ocf_data_sampler.numpy_sample.gsp import GSPSampleKey
7
+ from ocf_data_sampler.numpy_sample.nwp import NWPSampleKey
8
+ from ocf_data_sampler.numpy_sample.satellite import SatelliteSampleKey
9
+ from ocf_data_sampler.numpy_sample.site import SiteSampleKey
10
+
5
11
 
6
12
  def merge_dicts(list_of_dicts: list[dict]) -> dict:
7
13
  """Merge a list of dictionaries into a single dictionary."""
@@ -12,18 +18,33 @@ def merge_dicts(list_of_dicts: list[dict]) -> dict:
12
18
  return combined_dict
13
19
 
14
20
 
15
- def fill_nans_in_arrays(sample: dict) -> dict:
16
- """Fills all NaN values in each np.ndarray in the sample dictionary with zeros.
21
+ def fill_nans_in_arrays(
22
+ sample: dict, config: Configuration | None = None, nwp_provider: str | None = None,
23
+ ) -> dict:
24
+ """Fills all NaN values in each np.ndarray in the sample dictionary.
17
25
 
18
26
  Operation is performed in-place on the sample.
27
+ By default a fill value of 0.0 are used, but if a config is provided,
28
+ it can use the configured dropout values.
19
29
  """
20
30
  for k, v in sample.items():
21
31
  if isinstance(v, np.ndarray) and np.issubdtype(v.dtype, np.number):
22
32
  if np.isnan(v).any():
23
- sample[k] = np.nan_to_num(v, copy=False, nan=0.0)
33
+ fill_value = 0.0
34
+ if config is not None:
35
+ if k == GSPSampleKey.gsp:
36
+ fill_value = config.input_data.gsp.dropout_value
37
+ elif k == SiteSampleKey.generation:
38
+ fill_value = config.input_data.site.dropout_value
39
+ elif k == SatelliteSampleKey.satellite_actual:
40
+ fill_value = config.input_data.satellite.dropout_value
41
+ elif k == NWPSampleKey.nwp and nwp_provider in config.input_data.nwp:
42
+ fill_value = config.input_data.nwp[nwp_provider].dropout_value
43
+
44
+ sample[k] = np.nan_to_num(v, copy=False, nan=fill_value)
24
45
 
25
46
  # Recursion is included to reach NWP arrays in subdict
26
47
  elif isinstance(v, dict):
27
- fill_nans_in_arrays(v)
48
+ fill_nans_in_arrays(v, config=config, nwp_provider=k)
28
49
 
29
50
  return sample
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ocf-data-sampler
3
- Version: 0.5.34
3
+ Version: 0.5.36
4
4
  Author: James Fulton, Peter Dudfield
5
5
  Author-email: Open Climate Fix team <info@openclimatefix.org>
6
6
  License: MIT License
@@ -2,7 +2,7 @@ ocf_data_sampler/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,
2
2
  ocf_data_sampler/utils.py,sha256=WfmyBacjFGsv_IlUHRezNGc4ixi4wBvom_JF76iJYbY,1487
3
3
  ocf_data_sampler/config/__init__.py,sha256=O29mbH0XG2gIY1g3BaveGCnpBO2SFqdu-qzJ7a6evl0,223
4
4
  ocf_data_sampler/config/load.py,sha256=LL-7wemI8o4KPkx35j-wQ3HjsMvDgqXr7G46IcASfnU,632
5
- ocf_data_sampler/config/model.py,sha256=3aaMZIITDOQ6IGeod9xOYGOzft5zNVfEVBWbmWM50nA,11843
5
+ ocf_data_sampler/config/model.py,sha256=U1AdoK85DFM_H_5ejiiAA_94aYU6ZZdj2uz7P1Uv6MU,12051
6
6
  ocf_data_sampler/config/save.py,sha256=m8SPw5rXjkMm1rByjh3pK5StdBi4e8ysnn3jQopdRaI,1064
7
7
  ocf_data_sampler/data/uk_gsp_locations_20220314.csv,sha256=RSh7DRh55E3n8lVAaWXGTaXXHevZZtI58td4d4DhGos,10415772
8
8
  ocf_data_sampler/data/uk_gsp_locations_20250109.csv,sha256=XZISFatnbpO9j8LwaxNKFzQSjs6hcHFsV8a9uDDpy2E,9055334
@@ -42,8 +42,8 @@ ocf_data_sampler/select/select_spatial_slice.py,sha256=Nov6foM5xPkAREsEAHHAak8jH
42
42
  ocf_data_sampler/select/select_time_slice.py,sha256=cpkdovJMvcjxSGfq9G0OJK5aDAeCXg7exWYrJnR4N2w,4116
43
43
  ocf_data_sampler/torch_datasets/datasets/__init__.py,sha256=o0SsEXXZ6k9iL__5_RN1Sf60lw_eqK91P3UFEHAD2k0,102
44
44
  ocf_data_sampler/torch_datasets/datasets/picklecache.py,sha256=b8T5lgKfiPXLwuVQuFpCQBlU-HNBrA-Z-eSwYICKvsQ,1350
45
- ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=Tpf1zVPtBy-815n__-zgsTeNRxlxjLwuu6UgCCyIEL0,12365
46
- ocf_data_sampler/torch_datasets/datasets/site.py,sha256=F3emPejGQNtm3qzWtIGSFOVVH9l3DxS9YkiSATXROHQ,15095
45
+ ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py,sha256=TDSraKUYE2wA5YF9SYO2RmAIbDJtcvfTtxr7WmYDszg,12385
46
+ ocf_data_sampler/torch_datasets/datasets/site.py,sha256=IFMVy8c887mfT2NmcKX8ocm1OvlBhvzJNye_scfBMFQ,15265
47
47
  ocf_data_sampler/torch_datasets/sample/__init__.py,sha256=GL84vdZl_SjHDGVyh9Uekx2XhPYuZ0dnO3l6f6KXnHI,100
48
48
  ocf_data_sampler/torch_datasets/sample/base.py,sha256=cQ1oIyhdmlotejZK8B3Cw6MNvpdnBPD8G_o2h7Ye4Vc,2206
49
49
  ocf_data_sampler/torch_datasets/sample/site.py,sha256=40NwNTqjL1WVhPdwe02zDHHfDLG2u_bvCfRCtGAtFc0,1466
@@ -52,14 +52,14 @@ ocf_data_sampler/torch_datasets/utils/__init__.py,sha256=4l1VcEmxHInU9G66zrimNMa
52
52
  ocf_data_sampler/torch_datasets/utils/add_alterate_coordinate_projections.py,sha256=w6Q4TyxNyl7PKAbhqiXvqOpnqIjwmOUcGREIvPNGYlQ,2666
53
53
  ocf_data_sampler/torch_datasets/utils/config_normalization_values_to_dicts.py,sha256=SGt1H2nXcaj44ND14-gHzvA7dkLfgjTacCq7rOkRGwg,1991
54
54
  ocf_data_sampler/torch_datasets/utils/diff_nwp_data.py,sha256=o7NpKWxKHhwMbol3xBAF087-tDgDUZeP0j8vG08E7Nc,816
55
- ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=we7BTxRH7B7jKayDT7YfNyfI3zZClz2Bk-HXKQIokgU,956
55
+ ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py,sha256=VkQv4wJVihObh_OiSuwKqV_w2lEOweaYgJPkm075CZc,2132
56
56
  ocf_data_sampler/torch_datasets/utils/spatial_slice_for_dataset.py,sha256=Hvz0wHSWMYYamf2oHNiGlzJcM4cAH6pL_7ZEvIBL2dE,1882
57
57
  ocf_data_sampler/torch_datasets/utils/time_slice_for_dataset.py,sha256=1r1J2KNSo1_imN9gpVf5AupJaZ7VSnSevS1o_wck440,3925
58
58
  ocf_data_sampler/torch_datasets/utils/valid_time_periods.py,sha256=xcy75cVxl0WrglnX5YUAFjXXlO2GwEBHWyqo8TDuiOA,4714
59
59
  ocf_data_sampler/torch_datasets/utils/validation_utils.py,sha256=YqmT-lExWlI8_ul3l0EP73Ik002fStr_bhsZh9mQqEU,4735
60
60
  scripts/download_gsp_location_data.py,sha256=rRDXMoqX-RYY4jPdxhdlxJGhWdl6r245F5UARgKV6P4,3121
61
61
  scripts/refactor_site.py,sha256=skzvsPP0Cn9yTKndzkilyNcGz4DZ88ctvCJ0XrBdc2A,3135
62
- ocf_data_sampler-0.5.34.dist-info/METADATA,sha256=VVnKoyWCmUnQmBBaSsUvT-OhFD1Ogd3-HGSh-jyRRps,13541
63
- ocf_data_sampler-0.5.34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
- ocf_data_sampler-0.5.34.dist-info/top_level.txt,sha256=deUxqmsONNAGZDNbsntbXH7BRA1MqWaUeAJrCo6q_xA,25
65
- ocf_data_sampler-0.5.34.dist-info/RECORD,,
62
+ ocf_data_sampler-0.5.36.dist-info/METADATA,sha256=OgFiRyisHUXi5zZoScJUxJkoAF9OivTYIaEBotOlwXs,13541
63
+ ocf_data_sampler-0.5.36.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
64
+ ocf_data_sampler-0.5.36.dist-info/top_level.txt,sha256=deUxqmsONNAGZDNbsntbXH7BRA1MqWaUeAJrCo6q_xA,25
65
+ ocf_data_sampler-0.5.36.dist-info/RECORD,,