ocf-data-sampler 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocf-data-sampler might be problematic. Click here for more details.

Files changed (87) hide show
  1. {ocf_data_sampler-0.1.6/ocf_data_sampler.egg-info → ocf_data_sampler-0.1.7}/PKG-INFO +1 -1
  2. ocf_data_sampler-0.1.7/ocf_data_sampler/sample/base.py +75 -0
  3. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7/ocf_data_sampler.egg-info}/PKG-INFO +1 -1
  4. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/pyproject.toml +1 -1
  5. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/test_sample/test_base.py +63 -2
  6. ocf_data_sampler-0.1.6/ocf_data_sampler/sample/base.py +0 -44
  7. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/LICENSE +0 -0
  8. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/MANIFEST.in +0 -0
  9. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/README.md +0 -0
  10. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/__init__.py +0 -0
  11. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/config/__init__.py +0 -0
  12. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/config/load.py +0 -0
  13. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/config/model.py +0 -0
  14. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/config/save.py +0 -0
  15. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/constants.py +0 -0
  16. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/data/uk_gsp_locations.csv +0 -0
  17. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/__init__.py +0 -0
  18. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/gsp.py +0 -0
  19. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/load_dataset.py +0 -0
  20. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/nwp/__init__.py +0 -0
  21. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/nwp/nwp.py +0 -0
  22. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/nwp/providers/__init__.py +0 -0
  23. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/nwp/providers/ecmwf.py +0 -0
  24. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/nwp/providers/ukv.py +0 -0
  25. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/nwp/providers/utils.py +0 -0
  26. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/satellite.py +0 -0
  27. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/site.py +0 -0
  28. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/load/utils.py +0 -0
  29. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/numpy_sample/__init__.py +0 -0
  30. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/numpy_sample/collate.py +0 -0
  31. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/numpy_sample/datetime_features.py +0 -0
  32. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/numpy_sample/gsp.py +0 -0
  33. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/numpy_sample/nwp.py +0 -0
  34. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/numpy_sample/satellite.py +0 -0
  35. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/numpy_sample/site.py +0 -0
  36. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/numpy_sample/sun_position.py +0 -0
  37. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/sample/__init__.py +0 -0
  38. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/sample/site.py +0 -0
  39. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/sample/uk_regional.py +0 -0
  40. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/select/__init__.py +0 -0
  41. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/select/dropout.py +0 -0
  42. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/select/fill_time_periods.py +0 -0
  43. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/select/find_contiguous_time_periods.py +0 -0
  44. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/select/geospatial.py +0 -0
  45. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/select/location.py +0 -0
  46. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/select/select_spatial_slice.py +0 -0
  47. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/select/select_time_slice.py +0 -0
  48. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/select/spatial_slice_for_dataset.py +0 -0
  49. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/select/time_slice_for_dataset.py +0 -0
  50. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/torch_datasets/datasets/__init__.py +0 -0
  51. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/torch_datasets/datasets/pvnet_uk.py +0 -0
  52. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/torch_datasets/datasets/site.py +0 -0
  53. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/torch_datasets/utils/merge_and_fill_utils.py +0 -0
  54. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/torch_datasets/utils/valid_time_periods.py +0 -0
  55. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler/utils.py +0 -0
  56. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler.egg-info/SOURCES.txt +0 -0
  57. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler.egg-info/dependency_links.txt +0 -0
  58. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler.egg-info/requires.txt +0 -0
  59. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/ocf_data_sampler.egg-info/top_level.txt +0 -0
  60. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/scripts/refactor_site.py +0 -0
  61. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/setup.cfg +0 -0
  62. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/__init__.py +0 -0
  63. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/config/test_config.py +0 -0
  64. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/config/test_load.py +0 -0
  65. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/config/test_save.py +0 -0
  66. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/conftest.py +0 -0
  67. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/load/test_load_gsp.py +0 -0
  68. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/load/test_load_nwp.py +0 -0
  69. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/load/test_load_satellite.py +0 -0
  70. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/load/test_load_sites.py +0 -0
  71. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/numpy_sample/test_collate.py +0 -0
  72. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/numpy_sample/test_datetime_features.py +0 -0
  73. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/numpy_sample/test_gsp.py +0 -0
  74. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/numpy_sample/test_nwp.py +0 -0
  75. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/numpy_sample/test_satellite.py +0 -0
  76. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/numpy_sample/test_sun_position.py +0 -0
  77. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/select/test_dropout.py +0 -0
  78. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/select/test_fill_time_periods.py +0 -0
  79. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/select/test_find_contiguous_time_periods.py +0 -0
  80. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/select/test_location.py +0 -0
  81. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/select/test_select_spatial_slice.py +0 -0
  82. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/select/test_select_time_slice.py +0 -0
  83. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/test_sample/test_site_sample.py +0 -0
  84. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/test_sample/test_uk_regional_sample.py +0 -0
  85. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/torch_datasets/test_merge_and_fill_utils.py +0 -0
  86. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/torch_datasets/test_pvnet_uk.py +0 -0
  87. {ocf_data_sampler-0.1.6 → ocf_data_sampler-0.1.7}/tests/torch_datasets/test_site.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -0,0 +1,75 @@
1
+ """
2
+ Base class definition - abstract
3
+ Handling of both flat and nested structures - consideration for NWP
4
+ """
5
+
6
+ import logging
7
+ import numpy as np
8
+ import torch
9
+ import xarray as xr
10
+
11
+ from pathlib import Path
12
+ from typing import Any, Dict, Optional, Union, TypeAlias
13
+ from abc import ABC, abstractmethod
14
+
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ NumpySample: TypeAlias = Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
19
+ NumpyBatch: TypeAlias = Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
20
+ TensorBatch: TypeAlias = Dict[str, Union[torch.Tensor, Dict[str, torch.Tensor]]]
21
+
22
+
23
+ class SampleBase(ABC):
24
+ """
25
+ Abstract base class for all sample types
26
+ Provides core data storage functionality
27
+ """
28
+
29
+ def __init__(self, data: Optional[Union[NumpySample, xr.Dataset]] = None):
30
+ """ Initialise data container """
31
+ logger.debug("Initialising SampleBase instance")
32
+ self._data = data
33
+
34
+ @abstractmethod
35
+ def to_numpy(self) -> NumpySample:
36
+ """ Convert data to a numpy array representation """
37
+ raise NotImplementedError
38
+
39
+ @abstractmethod
40
+ def plot(self, **kwargs) -> None:
41
+ """ Abstract method for plotting """
42
+ raise NotImplementedError
43
+
44
+ @abstractmethod
45
+ def save(self, path: Union[str, Path]) -> None:
46
+ """ Abstract method for saving sample data """
47
+ raise NotImplementedError
48
+
49
+ @classmethod
50
+ @abstractmethod
51
+ def load(cls, path: Union[str, Path]) -> 'SampleBase':
52
+ """ Abstract class method for loading sample data """
53
+ raise NotImplementedError
54
+
55
+
56
+ def batch_to_tensor(batch: NumpyBatch) -> TensorBatch:
57
+ """
58
+ Moves ndarrays in a nested dict to torch tensors
59
+ Args:
60
+ batch: NumpyBatch with data in numpy arrays
61
+ Returns:
62
+ TensorBatch with data in torch tensors
63
+ """
64
+ if not batch:
65
+ raise ValueError("Cannot convert empty batch to tensors")
66
+
67
+ for k, v in batch.items():
68
+ if isinstance(v, dict):
69
+ batch[k] = batch_to_tensor(v)
70
+ elif isinstance(v, np.ndarray):
71
+ if v.dtype == np.bool_:
72
+ batch[k] = torch.tensor(v, dtype=torch.bool)
73
+ elif np.issubdtype(v.dtype, np.number):
74
+ batch[k] = torch.as_tensor(v)
75
+ return batch
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: ocf_data_sampler
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: Sample from weather data for renewable energy prediction
5
5
  Author: James Fulton, Peter Dudfield, and the Open Climate Fix team
6
6
  Author-email: info@openclimatefix.org
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ocf_data_sampler"
7
- version = "0.1.6"
7
+ version = "0.1.7"
8
8
  license = { file = "LICENSE" }
9
9
  readme = "README.md"
10
10
  description = "Sample from weather data for renewable energy prediction"
@@ -3,11 +3,14 @@ Base class testing - SampleBase
3
3
  """
4
4
 
5
5
  import pytest
6
+ import torch
6
7
  import numpy as np
7
8
 
8
9
  from pathlib import Path
9
- from ocf_data_sampler.sample.base import SampleBase
10
-
10
+ from ocf_data_sampler.sample.base import (
11
+ SampleBase,
12
+ batch_to_tensor
13
+ )
11
14
 
12
15
  class TestSample(SampleBase):
13
16
  """
@@ -84,3 +87,61 @@ def test_sample_base_to_numpy():
84
87
  assert isinstance(numpy_data, dict)
85
88
  assert all(isinstance(value, np.ndarray) for value in numpy_data.values())
86
89
  assert np.array_equal(numpy_data['list_data'], np.array([1, 2, 3]))
90
+
91
+
92
+ def test_batch_to_tensor_nested():
93
+ """ Test nested dictionary conversion """
94
+ batch = {
95
+ 'outer': {
96
+ 'inner': np.array([1, 2, 3])
97
+ }
98
+ }
99
+ tensor_batch = batch_to_tensor(batch)
100
+
101
+ assert torch.equal(tensor_batch['outer']['inner'], torch.tensor([1, 2, 3]))
102
+
103
+
104
+ def test_batch_to_tensor_mixed_types():
105
+ """ Test handling of mixed data types """
106
+ batch = {
107
+ 'tensor_data': np.array([1, 2, 3]),
108
+ 'string_data': 'not_a_tensor',
109
+ 'nested': {
110
+ 'numbers': np.array([4, 5, 6]),
111
+ 'text': 'still_not_a_tensor'
112
+ }
113
+ }
114
+ tensor_batch = batch_to_tensor(batch)
115
+
116
+ assert isinstance(tensor_batch['tensor_data'], torch.Tensor)
117
+ assert isinstance(tensor_batch['string_data'], str)
118
+ assert isinstance(tensor_batch['nested']['numbers'], torch.Tensor)
119
+ assert isinstance(tensor_batch['nested']['text'], str)
120
+
121
+
122
+ def test_batch_to_tensor_different_dtypes():
123
+ """ Test conversion of arrays with different dtypes """
124
+ batch = {
125
+ 'float_data': np.array([1.0, 2.0, 3.0], dtype=np.float32),
126
+ 'int_data': np.array([1, 2, 3], dtype=np.int64),
127
+ 'bool_data': np.array([True, False, True], dtype=np.bool_)
128
+ }
129
+ tensor_batch = batch_to_tensor(batch)
130
+
131
+ assert isinstance(tensor_batch['bool_data'], torch.Tensor)
132
+ assert tensor_batch['float_data'].dtype == torch.float32
133
+ assert tensor_batch['int_data'].dtype == torch.int64
134
+ assert tensor_batch['bool_data'].dtype == torch.bool
135
+
136
+
137
+ def test_batch_to_tensor_multidimensional():
138
+ """ Test conversion of multidimensional arrays """
139
+ batch = {
140
+ 'matrix': np.array([[1, 2], [3, 4]]),
141
+ 'tensor': np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
142
+ }
143
+ tensor_batch = batch_to_tensor(batch)
144
+
145
+ assert tensor_batch['matrix'].shape == (2, 2)
146
+ assert tensor_batch['tensor'].shape == (2, 2, 2)
147
+ assert torch.equal(tensor_batch['matrix'], torch.tensor([[1, 2], [3, 4]]))
@@ -1,44 +0,0 @@
1
- """
2
- Base class definition - abstract
3
- Handling of both flat and nested structures - consideration for NWP
4
- """
5
-
6
- import logging
7
- import numpy as np
8
-
9
- from pathlib import Path
10
- from typing import Any, Dict, Optional, Union
11
- from abc import ABC, abstractmethod
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
- class SampleBase(ABC):
16
- """
17
- Abstract base class for all sample types
18
- Provides core data storage functionality
19
- """
20
-
21
- def __init__(self):
22
- """ Initialise data container """
23
- logger.debug("Initialising SampleBase instance")
24
-
25
- @abstractmethod
26
- def to_numpy(self) -> Dict[str, Any]:
27
- """ Convert data to a numpy array representation """
28
- raise NotImplementedError
29
-
30
- @abstractmethod
31
- def plot(self, **kwargs) -> None:
32
- """ Abstract method for plotting """
33
- raise NotImplementedError
34
-
35
- @abstractmethod
36
- def save(self, path: Union[str, Path]) -> None:
37
- """ Abstract method for saving sample data """
38
- raise NotImplementedError
39
-
40
- @classmethod
41
- @abstractmethod
42
- def load(cls, path: Union[str, Path]) -> 'SampleBase':
43
- """ Abstract class method for loading sample data """
44
- raise NotImplementedError