anemoi-datasets 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. anemoi/datasets/__init__.py +4 -1
  2. anemoi/datasets/__main__.py +12 -2
  3. anemoi/datasets/_version.py +9 -4
  4. anemoi/datasets/commands/cleanup.py +17 -2
  5. anemoi/datasets/commands/compare.py +18 -2
  6. anemoi/datasets/commands/copy.py +196 -14
  7. anemoi/datasets/commands/create.py +50 -7
  8. anemoi/datasets/commands/finalise-additions.py +17 -2
  9. anemoi/datasets/commands/finalise.py +17 -2
  10. anemoi/datasets/commands/init-additions.py +17 -2
  11. anemoi/datasets/commands/init.py +16 -2
  12. anemoi/datasets/commands/inspect.py +283 -62
  13. anemoi/datasets/commands/load-additions.py +16 -2
  14. anemoi/datasets/commands/load.py +16 -2
  15. anemoi/datasets/commands/patch.py +17 -2
  16. anemoi/datasets/commands/publish.py +17 -2
  17. anemoi/datasets/commands/scan.py +31 -3
  18. anemoi/datasets/compute/recentre.py +47 -11
  19. anemoi/datasets/create/__init__.py +612 -85
  20. anemoi/datasets/create/check.py +142 -20
  21. anemoi/datasets/create/chunks.py +64 -4
  22. anemoi/datasets/create/config.py +185 -21
  23. anemoi/datasets/create/filter.py +50 -0
  24. anemoi/datasets/create/filters/__init__.py +33 -0
  25. anemoi/datasets/create/filters/empty.py +37 -0
  26. anemoi/datasets/create/filters/legacy.py +93 -0
  27. anemoi/datasets/create/filters/noop.py +37 -0
  28. anemoi/datasets/create/filters/orog_to_z.py +58 -0
  29. anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
  30. anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
  31. anemoi/datasets/create/filters/rename.py +205 -0
  32. anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
  33. anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
  34. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
  35. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
  36. anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
  37. anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
  38. anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
  39. anemoi/datasets/create/filters/transform.py +53 -0
  40. anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
  41. anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
  42. anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
  43. anemoi/datasets/create/input/__init__.py +76 -5
  44. anemoi/datasets/create/input/action.py +149 -13
  45. anemoi/datasets/create/input/concat.py +81 -10
  46. anemoi/datasets/create/input/context.py +39 -4
  47. anemoi/datasets/create/input/data_sources.py +72 -6
  48. anemoi/datasets/create/input/empty.py +21 -3
  49. anemoi/datasets/create/input/filter.py +60 -12
  50. anemoi/datasets/create/input/function.py +154 -37
  51. anemoi/datasets/create/input/join.py +86 -14
  52. anemoi/datasets/create/input/misc.py +67 -17
  53. anemoi/datasets/create/input/pipe.py +33 -6
  54. anemoi/datasets/create/input/repeated_dates.py +189 -41
  55. anemoi/datasets/create/input/result.py +202 -87
  56. anemoi/datasets/create/input/step.py +119 -22
  57. anemoi/datasets/create/input/template.py +100 -13
  58. anemoi/datasets/create/input/trace.py +62 -7
  59. anemoi/datasets/create/patch.py +52 -4
  60. anemoi/datasets/create/persistent.py +134 -17
  61. anemoi/datasets/create/size.py +15 -1
  62. anemoi/datasets/create/source.py +51 -0
  63. anemoi/datasets/create/sources/__init__.py +36 -0
  64. anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
  65. anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
  66. anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
  67. anemoi/datasets/create/sources/empty.py +37 -0
  68. anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
  69. anemoi/datasets/create/sources/grib.py +297 -0
  70. anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
  71. anemoi/datasets/create/sources/legacy.py +93 -0
  72. anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
  73. anemoi/datasets/create/sources/netcdf.py +42 -0
  74. anemoi/datasets/create/sources/opendap.py +43 -0
  75. anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
  76. anemoi/datasets/create/sources/recentre.py +150 -0
  77. anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
  78. anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
  79. anemoi/datasets/create/sources/xarray.py +92 -0
  80. anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
  81. anemoi/datasets/create/sources/xarray_support/README.md +1 -0
  82. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
  83. anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
  84. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
  85. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
  86. anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
  87. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
  88. anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
  89. anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
  90. anemoi/datasets/create/sources/xarray_support/time.py +391 -0
  91. anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
  92. anemoi/datasets/create/sources/xarray_zarr.py +41 -0
  93. anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
  94. anemoi/datasets/create/statistics/__init__.py +233 -44
  95. anemoi/datasets/create/statistics/summary.py +52 -6
  96. anemoi/datasets/create/testing.py +76 -0
  97. anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
  98. anemoi/datasets/create/utils.py +97 -6
  99. anemoi/datasets/create/writer.py +26 -4
  100. anemoi/datasets/create/zarr.py +170 -23
  101. anemoi/datasets/data/__init__.py +51 -4
  102. anemoi/datasets/data/complement.py +191 -40
  103. anemoi/datasets/data/concat.py +141 -16
  104. anemoi/datasets/data/dataset.py +552 -61
  105. anemoi/datasets/data/debug.py +197 -26
  106. anemoi/datasets/data/ensemble.py +93 -8
  107. anemoi/datasets/data/fill_missing.py +165 -18
  108. anemoi/datasets/data/forwards.py +428 -56
  109. anemoi/datasets/data/grids.py +323 -97
  110. anemoi/datasets/data/indexing.py +112 -19
  111. anemoi/datasets/data/interpolate.py +92 -12
  112. anemoi/datasets/data/join.py +158 -19
  113. anemoi/datasets/data/masked.py +129 -15
  114. anemoi/datasets/data/merge.py +137 -23
  115. anemoi/datasets/data/misc.py +172 -16
  116. anemoi/datasets/data/missing.py +233 -29
  117. anemoi/datasets/data/rescale.py +111 -10
  118. anemoi/datasets/data/select.py +168 -26
  119. anemoi/datasets/data/statistics.py +67 -6
  120. anemoi/datasets/data/stores.py +149 -64
  121. anemoi/datasets/data/subset.py +159 -25
  122. anemoi/datasets/data/unchecked.py +168 -57
  123. anemoi/datasets/data/xy.py +168 -25
  124. anemoi/datasets/dates/__init__.py +191 -16
  125. anemoi/datasets/dates/groups.py +189 -47
  126. anemoi/datasets/grids.py +270 -31
  127. anemoi/datasets/testing.py +28 -1
  128. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +10 -7
  129. anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
  130. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
  131. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +1 -1
  132. anemoi/datasets/create/functions/__init__.py +0 -66
  133. anemoi/datasets/create/functions/filters/__init__.py +0 -9
  134. anemoi/datasets/create/functions/filters/empty.py +0 -17
  135. anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
  136. anemoi/datasets/create/functions/filters/rename.py +0 -79
  137. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
  138. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
  139. anemoi/datasets/create/functions/sources/empty.py +0 -15
  140. anemoi/datasets/create/functions/sources/grib.py +0 -150
  141. anemoi/datasets/create/functions/sources/netcdf.py +0 -15
  142. anemoi/datasets/create/functions/sources/opendap.py +0 -15
  143. anemoi/datasets/create/functions/sources/recentre.py +0 -60
  144. anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
  145. anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
  146. anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
  147. anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
  148. anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
  149. anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
  150. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
  151. anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
  152. anemoi/datasets/utils/fields.py +0 -47
  153. anemoi_datasets-0.5.15.dist-info/RECORD +0 -129
  154. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
  155. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
@@ -8,13 +8,24 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
 
11
+ import datetime
11
12
  import logging
12
13
  import warnings
14
+ from abc import abstractmethod
13
15
  from functools import cached_property
16
+ from typing import Any
17
+ from typing import Dict
18
+ from typing import List
19
+ from typing import Optional
20
+ from typing import Set
14
21
 
15
22
  import numpy as np
23
+ from numpy.typing import NDArray
16
24
 
17
25
  from .dataset import Dataset
26
+ from .dataset import FullIndex
27
+ from .dataset import Shape
28
+ from .dataset import TupleIndex
18
29
  from .debug import debug_indexing
19
30
  from .indexing import apply_index_to_slices_changes
20
31
  from .indexing import expand_list_indexing
@@ -26,109 +37,225 @@ LOG = logging.getLogger(__name__)
26
37
 
27
38
 
28
39
  class Forwards(Dataset):
29
- def __init__(self, forward):
40
+ """A class to represent a dataset that forwards its properties and methods to another dataset."""
41
+
42
+ def __init__(self, forward: Dataset) -> None:
43
+ """Initializes a Forwards object.
44
+
45
+ Parameters
46
+ ----------
47
+ forward : Dataset
48
+ The forward dataset.
49
+ """
30
50
  self.forward = forward.mutate()
31
51
 
32
- def __len__(self):
52
+ def __len__(self) -> int:
53
+ """Returns the length of the forward dataset.
54
+
55
+ Returns
56
+ -------
57
+ int
58
+ Length of the forward dataset.
59
+ """
33
60
  return len(self.forward)
34
61
 
35
- def __getitem__(self, n):
62
+ def __getitem__(self, n: FullIndex) -> NDArray[Any]:
63
+ """Retrieves data from the forward dataset based on the given index.
64
+
65
+ Parameters
66
+ ----------
67
+ n : Index
68
+ Index specifying the data to retrieve.
69
+
70
+ Returns
71
+ -------
72
+ Any
73
+ Data from the forward dataset based on the index.
74
+ """
36
75
  return self.forward[n]
37
76
 
38
77
  @property
39
- def name(self):
78
+ def name(self) -> Optional[str]:
79
+ """Returns the name of the forward dataset."""
40
80
  if self._name is not None:
41
81
  return self._name
42
82
  return self.forward.name
43
83
 
44
84
  @property
45
- def dates(self):
85
+ def dates(self) -> NDArray[np.datetime64]:
86
+ """Returns the dates of the forward dataset."""
46
87
  return self.forward.dates
47
88
 
48
89
  @property
49
- def resolution(self):
90
+ def resolution(self) -> str:
91
+ """Returns the resolution of the forward dataset."""
50
92
  return self.forward.resolution
51
93
 
52
94
  @property
53
- def field_shape(self):
95
+ def field_shape(self) -> Shape:
96
+ """Returns the field shape of the forward dataset."""
54
97
  return self.forward.field_shape
55
98
 
56
99
  @property
57
- def frequency(self):
100
+ def frequency(self) -> datetime.timedelta:
101
+ """Returns the frequency of the forward dataset."""
58
102
  return self.forward.frequency
59
103
 
60
104
  @property
61
- def latitudes(self):
105
+ def latitudes(self) -> NDArray[Any]:
106
+ """Returns the latitudes of the forward dataset."""
62
107
  return self.forward.latitudes
63
108
 
64
109
  @property
65
- def longitudes(self):
110
+ def longitudes(self) -> NDArray[Any]:
111
+ """Returns the longitudes of the forward dataset."""
66
112
  return self.forward.longitudes
67
113
 
68
114
  @property
69
- def name_to_index(self):
115
+ def name_to_index(self) -> Dict[str, int]:
116
+ """Returns a dictionary mapping variable names to their indices."""
70
117
  return self.forward.name_to_index
71
118
 
72
119
  @property
73
- def variables(self):
120
+ def variables(self) -> List[str]:
121
+ """Returns the variables of the forward dataset."""
74
122
  return self.forward.variables
75
123
 
76
124
  @property
77
- def variables_metadata(self):
125
+ def variables_metadata(self) -> Dict[str, Any]:
126
+ """Returns the metadata of the variables in the forward dataset."""
78
127
  return self.forward.variables_metadata
79
128
 
80
129
  @property
81
- def statistics(self):
130
+ def statistics(self) -> Dict[str, NDArray[Any]]:
131
+ """Returns the statistics of the forward dataset."""
82
132
  return self.forward.statistics
83
133
 
84
- def statistics_tendencies(self, delta=None):
134
+ def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
135
+ """Returns the statistics tendencies of the forward dataset.
136
+
137
+ Parameters
138
+ ----------
139
+ delta : Optional[Any]
140
+ Time delta for calculating tendencies.
141
+
142
+ Returns
143
+ -------
144
+ Any
145
+ Statistics tendencies of the forward dataset.
146
+ """
85
147
  if delta is None:
86
148
  delta = self.frequency
87
149
  return self.forward.statistics_tendencies(delta)
88
150
 
89
151
  @property
90
- def shape(self):
152
+ def shape(self) -> Shape:
153
+ """Returns the shape of the forward dataset."""
91
154
  return self.forward.shape
92
155
 
93
156
  @property
94
- def dtype(self):
157
+ def dtype(self) -> Any:
158
+ """Returns the data type of the forward dataset."""
95
159
  return self.forward.dtype
96
160
 
97
161
  @property
98
- def missing(self):
162
+ def missing(self) -> Set[int]:
163
+ """Returns the missing data information of the forward dataset."""
99
164
  return self.forward.missing
100
165
 
101
166
  @property
102
- def grids(self):
167
+ def grids(self) -> Any:
168
+ """Returns the grids of the forward dataset."""
103
169
  return self.forward.grids
104
170
 
105
- def metadata_specific(self, **kwargs):
171
+ def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
172
+ """Returns metadata specific to the forward dataset.
173
+
174
+ Parameters
175
+ ----------
176
+ **kwargs : Any
177
+ Additional keyword arguments.
178
+
179
+ Returns
180
+ -------
181
+ Dict[str, Any]
182
+ Metadata specific to the forward dataset.
183
+ """
106
184
  return super().metadata_specific(
107
185
  forward=self.forward.metadata_specific(),
108
- **self.subclass_metadata_specific(),
186
+ **self.forwards_subclass_metadata_specific(),
109
187
  **kwargs,
110
188
  )
111
189
 
112
- def collect_supporting_arrays(self, collected, *path):
190
+ def collect_supporting_arrays(self, collected: List[Any], *path: Any) -> None:
191
+ """Collects supporting arrays from the forward dataset.
192
+
193
+ Parameters
194
+ ----------
195
+ collected : List[Any]
196
+ List to which the supporting arrays are appended.
197
+ *path : Any
198
+ Variable length argument list specifying the paths for the arrays.
199
+ """
113
200
  self.forward.collect_supporting_arrays(collected, *path)
114
201
 
115
- def collect_input_sources(self, collected):
202
+ def collect_input_sources(self, collected: List[Any]) -> None:
203
+ """Collects input sources from the forward dataset.
204
+
205
+ Parameters
206
+ ----------
207
+ collected : List[Any]
208
+ List to which the input sources are appended.
209
+ """
116
210
  self.forward.collect_input_sources(collected)
117
211
 
118
- def source(self, index):
212
+ def source(self, index: int) -> Any:
213
+ """Returns the source of the data at the specified index.
214
+
215
+ Parameters
216
+ ----------
217
+ index : int
218
+ Index specifying the data source.
219
+
220
+ Returns
221
+ -------
222
+ Any
223
+ Source of the data at the specified index.
224
+ """
119
225
  return self.forward.source(index)
120
226
 
121
- def subclass_metadata_specific(self):
122
- raise NotImplementedError(
123
- f"subclass_metadata_specific() must be implemented in derived class {self.__class__.__name__}"
124
- )
227
+ @abstractmethod
228
+ def forwards_subclass_metadata_specific(self) -> Dict[str, Any]:
229
+ """Returns metadata specific to the subclass."""
230
+ pass
125
231
 
126
- def get_dataset_names(self, names):
232
+ def get_dataset_names(self, names: Set[str]) -> None:
233
+ """Collects the names of the datasets.
234
+
235
+ Parameters
236
+ ----------
237
+ names : set
238
+ Set to which the dataset names are added.
239
+ """
127
240
  self.forward.get_dataset_names(names)
128
241
 
242
+ @property
243
+ def constant_fields(self) -> List[str]:
244
+ """Returns the constant fields of the forward dataset."""
245
+ return self.forward.constant_fields
246
+
129
247
 
130
248
  class Combined(Forwards):
131
- def __init__(self, datasets):
249
+ """A class to combine multiple datasets into a single dataset."""
250
+
251
+ def __init__(self, datasets: List[Dataset]) -> None:
252
+ """Initializes a Combined object.
253
+
254
+ Parameters
255
+ ----------
256
+ datasets : List[Dataset]
257
+ List of datasets to be combined.
258
+ """
132
259
  self.datasets = datasets
133
260
  assert len(self.datasets) > 1, len(self.datasets)
134
261
 
@@ -138,44 +265,165 @@ class Combined(Forwards):
138
265
  # Forward most properties to the first dataset
139
266
  super().__init__(datasets[0])
140
267
 
141
- def mutate(self):
268
+ def mutate(self) -> Dataset:
269
+ """Returns the mutated dataset.
270
+
271
+ Returns
272
+ -------
273
+ Dataset
274
+ Mutated dataset.
275
+ """
142
276
  return self
143
277
 
144
- def check_same_resolution(self, d1, d2):
278
+ def check_same_resolution(self, d1: Dataset, d2: Dataset) -> None:
279
+ """Checks if the resolutions of two datasets are the same.
280
+
281
+ Parameters
282
+ ----------
283
+ d1 : Any
284
+ First dataset.
285
+ d2 : Any
286
+ Second dataset.
287
+
288
+ Raises
289
+ ------
290
+ ValueError
291
+ If the resolutions are not the same.
292
+ """
145
293
  if d1.resolution != d2.resolution:
146
294
  raise ValueError(f"Incompatible resolutions: {d1.resolution} and {d2.resolution} ({d1} {d2})")
147
295
 
148
- def check_same_frequency(self, d1, d2):
296
+ def check_same_frequency(self, d1: Dataset, d2: Dataset) -> None:
297
+ """Checks if the frequencies of two datasets are the same.
298
+
299
+ Parameters
300
+ ----------
301
+ d1 : Any
302
+ First dataset.
303
+ d2 : Any
304
+ Second dataset.
305
+
306
+ Raises
307
+ ------
308
+ ValueError
309
+ If the frequencies are not the same.
310
+ """
149
311
  if d1.frequency != d2.frequency:
150
312
  raise ValueError(f"Incompatible frequencies: {d1.frequency} and {d2.frequency} ({d1} {d2})")
151
313
 
152
- def check_same_grid(self, d1, d2):
314
+ def check_same_grid(self, d1: Dataset, d2: Dataset) -> None:
315
+ """Checks if the grids of two datasets are the same.
316
+
317
+ Parameters
318
+ ----------
319
+ d1 : Any
320
+ First dataset.
321
+ d2 : Any
322
+ Second dataset.
323
+
324
+ Raises
325
+ ------
326
+ ValueError
327
+ If the grids are not the same.
328
+ """
153
329
  if (d1.latitudes != d2.latitudes).any() or (d1.longitudes != d2.longitudes).any():
154
330
  raise ValueError(f"Incompatible grid ({d1} {d2})")
155
331
 
156
- def check_same_shape(self, d1, d2):
332
+ def check_same_shape(self, d1: Dataset, d2: Dataset) -> None:
333
+ """Checks if the shapes of two datasets are the same.
334
+
335
+ Parameters
336
+ ----------
337
+ d1 : Any
338
+ First dataset.
339
+ d2 : Any
340
+ Second dataset.
341
+
342
+ Raises
343
+ ------
344
+ ValueError
345
+ If the shapes are not the same.
346
+ """
157
347
  if d1.shape[1:] != d2.shape[1:]:
158
348
  raise ValueError(f"Incompatible shapes: {d1.shape} and {d2.shape} ({d1} {d2})")
159
349
 
160
350
  if d1.variables != d2.variables:
161
351
  raise ValueError(f"Incompatible variables: {d1.variables} and {d2.variables} ({d1} {d2})")
162
352
 
163
- def check_same_sub_shapes(self, d1, d2, drop_axis):
353
+ def check_same_sub_shapes(self, d1: Any, d2: Any, drop_axis: int) -> None:
354
+ """Checks if the sub-shapes of two datasets are the same along a given axis.
355
+
356
+ Parameters
357
+ ----------
358
+ d1 : Any
359
+ First dataset.
360
+ d2 : Any
361
+ Second dataset.
362
+ drop_axis : int
363
+ Axis along which to check the sub-shapes.
364
+
365
+ Raises
366
+ ------
367
+ ValueError
368
+ If the sub-shapes are not the same.
369
+ """
164
370
  shape1 = d1.sub_shape(drop_axis)
165
371
  shape2 = d2.sub_shape(drop_axis)
166
372
 
167
373
  if shape1 != shape2:
168
374
  raise ValueError(f"Incompatible shapes: {d1.shape} and {d2.shape} ({d1} {d2})")
169
375
 
170
- def check_same_variables(self, d1, d2):
376
+ def check_same_variables(self, d1: Dataset, d2: Dataset) -> None:
377
+ """Checks if the variables of two datasets are the same.
378
+
379
+ Parameters
380
+ ----------
381
+ d1 : Any
382
+ First dataset.
383
+ d2 : Any
384
+ Second dataset.
385
+
386
+ Raises
387
+ ------
388
+ ValueError
389
+ If the variables are not the same.
390
+ """
171
391
  if d1.variables != d2.variables:
172
392
  raise ValueError(f"Incompatible variables: {d1.variables} and {d2.variables} ({d1} {d2})")
173
393
 
174
- def check_same_lengths(self, d1, d2):
394
+ def check_same_lengths(self, d1: Dataset, d2: Dataset) -> None:
395
+ """Checks if the lengths of two datasets are the same.
396
+
397
+ Parameters
398
+ ----------
399
+ d1 : Any
400
+ First dataset.
401
+ d2 : Any
402
+ Second dataset.
403
+
404
+ Raises
405
+ ------
406
+ ValueError
407
+ If the lengths are not the same.
408
+ """
175
409
  if d1._len != d2._len:
176
410
  raise ValueError(f"Incompatible lengths: {d1._len} and {d2._len}")
177
411
 
178
- def check_same_dates(self, d1, d2):
412
+ def check_same_dates(self, d1: Dataset, d2: Dataset) -> None:
413
+ """Checks if the dates of two datasets are the same.
414
+
415
+ Parameters
416
+ ----------
417
+ d1 : Any
418
+ First dataset.
419
+ d2 : Any
420
+ Second dataset.
421
+
422
+ Raises
423
+ ------
424
+ ValueError
425
+ If the dates are not the same.
426
+ """
179
427
  self.check_same_frequency(d1, d2)
180
428
 
181
429
  if d1.dates[0] != d2.dates[0]:
@@ -184,7 +432,21 @@ class Combined(Forwards):
184
432
  if d1.dates[-1] != d2.dates[-1]:
185
433
  raise ValueError(f"Incompatible end dates: {d1.dates[-1]} and {d2.dates[-1]} ({d1} {d2})")
186
434
 
187
- def check_compatibility(self, d1, d2):
435
+ def check_compatibility(self, d1: Dataset, d2: Dataset) -> None:
436
+ """Checks if two datasets are compatible.
437
+
438
+ Parameters
439
+ ----------
440
+ d1 : Any
441
+ First dataset.
442
+ d2 : Any
443
+ Second dataset.
444
+
445
+ Raises
446
+ ------
447
+ ValueError
448
+ If the datasets are not compatible.
449
+ """
188
450
  # These are the default checks
189
451
  # Derived classes should turn individual checks off if they are not needed
190
452
  self.check_same_resolution(d1, d2)
@@ -194,14 +456,40 @@ class Combined(Forwards):
194
456
  self.check_same_variables(d1, d2)
195
457
  self.check_same_dates(d1, d2)
196
458
 
197
- def provenance(self):
459
+ def provenance(self) -> List[Any]:
460
+ """Returns the provenance of the combined datasets.
461
+
462
+ Returns
463
+ -------
464
+ List[Any]
465
+ Provenance of the combined datasets.
466
+ """
198
467
  return [d.provenance() for d in self.datasets]
199
468
 
200
- def __repr__(self):
469
+ def __repr__(self) -> str:
470
+ """Returns a string representation of the Combined object.
471
+
472
+ Returns
473
+ -------
474
+ str
475
+ String representation of the Combined object.
476
+ """
201
477
  lst = ", ".join(repr(d) for d in self.datasets)
202
478
  return f"{self.__class__.__name__}({lst})"
203
479
 
204
- def metadata_specific(self, **kwargs):
480
+ def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
481
+ """Returns metadata specific to the combined datasets.
482
+
483
+ Parameters
484
+ ----------
485
+ **kwargs : Any
486
+ Additional keyword arguments.
487
+
488
+ Returns
489
+ -------
490
+ Any
491
+ Metadata specific to the combined datasets.
492
+ """
205
493
  # We need to skip the forward superclass
206
494
  # TODO: revisit this
207
495
  return Dataset.metadata_specific(
@@ -210,25 +498,57 @@ class Combined(Forwards):
210
498
  **kwargs,
211
499
  )
212
500
 
213
- def collect_supporting_arrays(self, collected, *path):
501
+ def collect_supporting_arrays(self, collected: List[Any], *path: Any) -> None:
502
+ """Collects supporting arrays from the combined datasets.
503
+
504
+ Parameters
505
+ ----------
506
+ collected : List[Any]
507
+ List to which the supporting arrays are appended.
508
+ *path : Any
509
+ Variable length argument list specifying the paths for the arrays.
510
+ """
214
511
  warnings.warn(f"The behaviour of {self.__class__.__name__}.collect_supporting_arrays() is not well defined")
215
512
  for i, d in enumerate(self.datasets):
216
513
  name = d.name if d.name is not None else i
217
514
  d.collect_supporting_arrays(collected, *path, name)
218
515
 
219
516
  @property
220
- def missing(self):
517
+ def missing(self) -> Set[int]:
518
+ """Returns the missing data information of the combined datasets.
519
+
520
+ Raises
521
+ ------
522
+ NotImplementedError
523
+ If the method is not implemented for Combined.
524
+ """
221
525
  raise NotImplementedError("missing() not implemented for Combined")
222
526
 
223
- def get_dataset_names(self, names):
527
+ def get_dataset_names(self, names: Set[str]) -> None:
528
+ """Collects the names of the combined datasets.
529
+
530
+ Parameters
531
+ ----------
532
+ names : set
533
+ Set to which the dataset names are added.
534
+ """
224
535
  for d in self.datasets:
225
536
  d.get_dataset_names(names)
226
537
 
227
538
 
228
539
  class GivenAxis(Combined):
229
- """Given a given axis, combine the datasets along that axis."""
230
-
231
- def __init__(self, datasets, axis):
540
+ """A class to combine datasets along a given axis."""
541
+
542
+ def __init__(self, datasets: List[Any], axis: int) -> None:
543
+ """Initializes a GivenAxis object.
544
+
545
+ Parameters
546
+ ----------
547
+ datasets : List[Any]
548
+ List of datasets to be combined.
549
+ axis : int
550
+ Axis along which to combine the datasets.
551
+ """
232
552
  self.axis = axis
233
553
  super().__init__(datasets)
234
554
 
@@ -237,12 +557,27 @@ class GivenAxis(Combined):
237
557
  self.datasets[0].shape,
238
558
  )
239
559
 
240
- def check_compatibility(self, d1, d2):
560
+ def check_compatibility(self, d1: Dataset, d2: Dataset) -> None:
561
+ """Checks if two datasets are compatible along the given axis.
562
+
563
+ Parameters
564
+ ----------
565
+ d1 : Any
566
+ First dataset.
567
+ d2 : Any
568
+ Second dataset.
569
+
570
+ Raises
571
+ ------
572
+ ValueError
573
+ If the datasets are not compatible along the given axis.
574
+ """
241
575
  super().check_compatibility(d1, d2)
242
576
  self.check_same_sub_shapes(d1, d2, drop_axis=self.axis)
243
577
 
244
578
  @cached_property
245
- def shape(self):
579
+ def shape(self) -> Shape:
580
+ """Returns the shape of the combined dataset along the given axis."""
246
581
  shapes = [d.shape for d in self.datasets]
247
582
  before = shapes[0][: self.axis]
248
583
  after = shapes[0][self.axis + 1 :]
@@ -252,7 +587,19 @@ class GivenAxis(Combined):
252
587
 
253
588
  @debug_indexing
254
589
  @expand_list_indexing
255
- def _get_tuple(self, index):
590
+ def _get_tuple(self, index: TupleIndex) -> NDArray[Any]:
591
+ """Retrieves data from the combined dataset based on the given index.
592
+
593
+ Parameters
594
+ ----------
595
+ index : Union[int, slice, Tuple[Union[int, slice], ...]]
596
+ Index specifying the data to retrieve.
597
+
598
+ Returns
599
+ -------
600
+ NDArray[Any]
601
+ Data from the combined dataset based on the index.
602
+ """
256
603
  index, changes = index_to_slices(index, self.shape)
257
604
  lengths = [d.shape[self.axis] for d in self.datasets]
258
605
  slices = length_to_slices(index[self.axis], lengths)
@@ -261,11 +608,35 @@ class GivenAxis(Combined):
261
608
  return apply_index_to_slices_changes(result, changes)
262
609
 
263
610
  @debug_indexing
264
- def _get_slice(self, s):
611
+ def _get_slice(self, s: slice) -> NDArray[Any]:
612
+ """Retrieves a slice of data from the combined dataset.
613
+
614
+ Parameters
615
+ ----------
616
+ s : slice
617
+ Slice specifying the data to retrieve.
618
+
619
+ Returns
620
+ -------
621
+ NDArray[Any]
622
+ Slice of data from the combined dataset.
623
+ """
265
624
  return np.stack([self[i] for i in range(*s.indices(self._len))])
266
625
 
267
626
  @debug_indexing
268
- def __getitem__(self, n):
627
+ def __getitem__(self, n: FullIndex) -> NDArray[Any]:
628
+ """Retrieves data from the combined dataset based on the given index.
629
+
630
+ Parameters
631
+ ----------
632
+ n : Union[int, slice, Tuple[Union[int, slice], ...]]
633
+ Index specifying the data to retrieve.
634
+
635
+ Returns
636
+ -------
637
+ NDArray[Any]
638
+ Data from the combined dataset based on the index.
639
+ """
269
640
  if isinstance(n, tuple):
270
641
  return self._get_tuple(n)
271
642
 
@@ -275,9 +646,10 @@ class GivenAxis(Combined):
275
646
  return np.concatenate([d[n] for d in self.datasets], axis=self.axis - 1)
276
647
 
277
648
  @cached_property
278
- def missing(self):
649
+ def missing(self) -> Set[int]:
650
+ """Returns the missing data information of the combined dataset along the given axis."""
279
651
  offset = 0
280
- result = set()
652
+ result: Set[int] = set()
281
653
  for d in self.datasets:
282
654
  result.update(offset + m for m in d.missing)
283
655
  if self.axis == 0: # Advance if axis is time