ocf-data-sampler 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (85) hide show
  1. {ocf_data_sampler-0.1.3/ocf_data_sampler.egg-info → ocf_data_sampler-0.1.5}/PKG-INFO +1 -1
  2. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/satellite.py +20 -35
  3. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +8 -10
  4. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
  5. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/pyproject.toml +1 -1
  6. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/torch_datasets/test_pvnet_uk.py +1 -0
  7. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/LICENSE +0 -0
  8. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/MANIFEST.in +0 -0
  9. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/README.md +0 -0
  10. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/__init__.py +0 -0
  11. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/config/__init__.py +0 -0
  12. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/config/load.py +0 -0
  13. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/config/model.py +0 -0
  14. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/config/save.py +0 -0
  15. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/constants.py +0 -0
  16. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  17. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/__init__.py +0 -0
  18. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/gsp.py +0 -0
  19. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/load_dataset.py +0 -0
  20. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  21. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  22. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  23. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  24. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  25. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  26. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/site.py +0 -0
  27. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/load/utils.py +0 -0
  28. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  29. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  30. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  31. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  32. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  33. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  34. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/numpy_sample/site.py +0 -0
  35. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  36. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/sample/__init__.py +0 -0
  37. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/sample/base.py +0 -0
  38. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/sample/site.py +0 -0
  39. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/sample/uk_regional.py +0 -0
  40. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/select/__init__.py +0 -0
  41. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/select/dropout.py +0 -0
  42. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  43. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  44. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/select/geospatial.py +0 -0
  45. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/select/location.py +0 -0
  46. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  47. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/select/select_time_slice.py +0 -0
  48. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/select/spatial_slice_for_dataset.py +0 -0
  49. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/select/time_slice_for_dataset.py +0 -0
  50. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  51. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/torch_datasets/datasets/site.py +0 -0
  52. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  53. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  54. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler/utils.py +0 -0
  55. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  56. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  57. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler.egg-info/requires.txt +0 -0
  58. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  59. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/scripts/refactor_site.py +0 -0
  60. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/setup.cfg +0 -0
  61. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/__init__.py +0 -0
  62. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/config/test_config.py +0 -0
  63. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/config/test_save.py +0 -0
  64. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/conftest.py +0 -0
  65. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/load/test_load_gsp.py +0 -0
  66. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/load/test_load_nwp.py +0 -0
  67. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/load/test_load_satellite.py +0 -0
  68. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/load/test_load_sites.py +0 -0
  69. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/numpy_sample/test_collate.py +0 -0
  70. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/numpy_sample/test_datetime_features.py +0 -0
  71. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/numpy_sample/test_gsp.py +0 -0
  72. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/numpy_sample/test_nwp.py +0 -0
  73. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/numpy_sample/test_satellite.py +0 -0
  74. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/numpy_sample/test_sun_position.py +0 -0
  75. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/select/test_dropout.py +0 -0
  76. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/select/test_fill_time_periods.py +0 -0
  77. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/select/test_find_contiguous_time_periods.py +0 -0
  78. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/select/test_location.py +0 -0
  79. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/select/test_select_spatial_slice.py +0 -0
  80. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/select/test_select_time_slice.py +0 -0
  81. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/test_sample/test_base.py +0 -0
  82. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/test_sample/test_site_sample.py +0 -0
  83. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/test_sample/test_uk_regional_sample.py +0 -0
  84. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/torch_datasets/test_merge_and_fill_utils.py +0 -0
  85. {ocf_data_sampler-0.1.3 → ocf_data_sampler-0.1.5}/tests/torch_datasets/test_site.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -12,15 +12,20 @@ from ocf_data_sampler.load.utils import (
12
12
 
13
13
 
14
14
  def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
15
- """Helper function to open a zarr from either local or GCP path.
16
-
17
- The local or GCP path may contain wildcard matching (*)
15
+ """Helper function to open a Zarr from either a local or GCP path.
18
16
 
19
17
  Args:
20
- zarr_path: Path to zarr file
18
+ zarr_path: Path to a Zarr file. Wildcards (*) are supported **only** for local paths.
19
+ GCS paths (gs://) **do not support** wildcards.
20
+
21
+ Returns:
22
+ An xarray Dataset containing satellite data.
23
+
24
+ Raises:
25
+ ValueError: If a wildcard (*) is used in a GCS (gs://) path.
21
26
  """
22
27
 
23
- # These kwargs are used if zarr path contains "*"
28
+ # These kwargs are used if the path contains "*"
24
29
  openmf_kwargs = dict(
25
30
  engine="zarr",
26
31
  concat_dim="time",
@@ -29,19 +34,17 @@ def _get_single_sat_data(zarr_path: Path | str) -> xr.Dataset:
29
34
  join="override",
30
35
  )
31
36
 
32
- # Need to generate list of files if using GCP bucket storage
37
+ # Raise an error if a wildcard is used in a GCP path
33
38
  if "gs://" in str(zarr_path) and "*" in str(zarr_path):
34
- result_string = subprocess.run(
35
- f"gsutil ls -d {zarr_path}".split(" "), stdout=subprocess.PIPE
36
- ).stdout.decode("utf-8")
37
- files = result_string.splitlines()
38
-
39
- ds = xr.open_mfdataset(files, **openmf_kwargs)
39
+ raise ValueError("Wildcard (*) paths are not supported for GCP (gs://) URLs.")
40
40
 
41
- elif "*" in str(zarr_path): # Multi-file dataset
41
+ # Handle multi-file dataset for local paths
42
+ if "*" in str(zarr_path):
42
43
  ds = xr.open_mfdataset(zarr_path, **openmf_kwargs)
43
44
  else:
44
45
  ds = xr.open_dataset(zarr_path, engine="zarr", chunks="auto")
46
+
47
+ # Ensure time is unique and sorted
45
48
  ds = ds.drop_duplicates("time").sortby("time")
46
49
 
47
50
  return ds
@@ -53,24 +56,6 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
53
56
  Args:
54
57
  zarr_path: Cloud URL or local path pattern, or list of these. If GCS URL, it must start with
55
58
  'gs://'.
56
-
57
- Example:
58
- With wild cards and GCS path:
59
- ```
60
- zarr_paths = [
61
- "gs://bucket/2020_nonhrv_split_*.zarr",
62
- "gs://bucket/2019_nonhrv_split_*.zarr",
63
- ]
64
- ds = open_sat_data(zarr_paths)
65
- ```
66
- Without wild cards and with local path:
67
- ```
68
- zarr_paths = [
69
- "/data/2020_nonhrv.zarr",
70
- "/data/2019_nonhrv.zarr",
71
- ]
72
- ds = open_sat_data(zarr_paths)
73
- ```
74
59
  """
75
60
 
76
61
  # Open the data
@@ -84,7 +69,7 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
84
69
  else:
85
70
  ds = _get_single_sat_data(zarr_path)
86
71
 
87
- # Rename
72
+ # Rename dimensions
88
73
  ds = ds.rename(
89
74
  {
90
75
  "variable": "channel",
@@ -92,13 +77,13 @@ def open_sat_data(zarr_path: Path | str | list[Path] | list[str]) -> xr.DataArra
92
77
  }
93
78
  )
94
79
 
95
- # Check the timestamps are unique and increasing
80
+ # Check timestamps
96
81
  check_time_unique_increasing(ds.time_utc)
97
82
 
98
- # Make sure the spatial coords are in increasing order
83
+ # Ensure spatial coordinates are sorted
99
84
  ds = make_spatial_coords_increasing(ds, x_coord="x_geostationary", y_coord="y_geostationary")
100
85
 
101
86
  ds = ds.transpose("time_utc", "channel", "x_geostationary", "y_geostationary")
102
-
103
87
  # TODO: should we control the dtype of the DataArray?
88
+
104
89
  return get_xr_data_array_from_xr_dataset(ds)
@@ -84,16 +84,6 @@ def process_and_combine_datasets(
84
84
  )
85
85
  )
86
86
 
87
- # Add coordinate data
88
- # TODO: Do we need all of these?
89
- numpy_modalities.append(
90
- {
91
- GSPSampleKey.gsp_id: location.id,
92
- GSPSampleKey.x_osgb: location.x,
93
- GSPSampleKey.y_osgb: location.y,
94
- }
95
- )
96
-
97
87
  if target_key == 'gsp':
98
88
  # Make sun coords NumpySample
99
89
  datetimes = pd.date_range(
@@ -104,6 +94,14 @@ def process_and_combine_datasets(
104
94
 
105
95
  lon, lat = osgb_to_lon_lat(location.x, location.y)
106
96
 
97
+ numpy_modalities.append(
98
+ {
99
+ GSPSampleKey.gsp_id: location.id,
100
+ GSPSampleKey.x_osgb: location.x,
101
+ GSPSampleKey.y_osgb: location.y,
102
+ }
103
+ )
104
+
107
105
  numpy_modalities.append(
108
106
  make_sun_position_numpy_sample(datetimes, lon, lat, key_prefix=target_key)
109
107
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ocf_data_sampler"
7
- version = "0.1.3"
7
+ version = "0.1.5"
8
8
  license = { file = "LICENSE" }
9
9
  readme = "README.md"
10
10
  description = "Sample from weather data for renewable energy prediction"
@@ -55,6 +55,7 @@ def test_process_and_combine_datasets(pvnet_config_filename):
55
55
  assert "nwp" in sample
56
56
  assert sample["satellite_actual"].shape == (7, 1, 2, 2)
57
57
  assert sample["nwp"]["ukv"]["nwp"].shape == (4, 1, 2, 2)
58
+ assert "gsp_id" in sample
58
59
 
59
60
 
60
61
  def test_compute():