anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. anemoi/datasets/__init__.py +4 -1
  2. anemoi/datasets/__main__.py +12 -2
  3. anemoi/datasets/_version.py +9 -4
  4. anemoi/datasets/commands/cleanup.py +17 -2
  5. anemoi/datasets/commands/compare.py +18 -2
  6. anemoi/datasets/commands/copy.py +196 -14
  7. anemoi/datasets/commands/create.py +50 -7
  8. anemoi/datasets/commands/finalise-additions.py +17 -2
  9. anemoi/datasets/commands/finalise.py +17 -2
  10. anemoi/datasets/commands/init-additions.py +17 -2
  11. anemoi/datasets/commands/init.py +16 -2
  12. anemoi/datasets/commands/inspect.py +283 -62
  13. anemoi/datasets/commands/load-additions.py +16 -2
  14. anemoi/datasets/commands/load.py +16 -2
  15. anemoi/datasets/commands/patch.py +17 -2
  16. anemoi/datasets/commands/publish.py +17 -2
  17. anemoi/datasets/commands/scan.py +31 -3
  18. anemoi/datasets/compute/recentre.py +47 -11
  19. anemoi/datasets/create/__init__.py +612 -85
  20. anemoi/datasets/create/check.py +142 -20
  21. anemoi/datasets/create/chunks.py +64 -4
  22. anemoi/datasets/create/config.py +185 -21
  23. anemoi/datasets/create/filter.py +50 -0
  24. anemoi/datasets/create/filters/__init__.py +33 -0
  25. anemoi/datasets/create/filters/empty.py +37 -0
  26. anemoi/datasets/create/filters/legacy.py +93 -0
  27. anemoi/datasets/create/filters/noop.py +37 -0
  28. anemoi/datasets/create/filters/orog_to_z.py +58 -0
  29. anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
  30. anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
  31. anemoi/datasets/create/filters/rename.py +205 -0
  32. anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
  33. anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
  34. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
  35. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
  36. anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
  37. anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
  38. anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
  39. anemoi/datasets/create/filters/transform.py +53 -0
  40. anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
  41. anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
  42. anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
  43. anemoi/datasets/create/input/__init__.py +76 -5
  44. anemoi/datasets/create/input/action.py +149 -13
  45. anemoi/datasets/create/input/concat.py +81 -10
  46. anemoi/datasets/create/input/context.py +39 -4
  47. anemoi/datasets/create/input/data_sources.py +72 -6
  48. anemoi/datasets/create/input/empty.py +21 -3
  49. anemoi/datasets/create/input/filter.py +60 -12
  50. anemoi/datasets/create/input/function.py +154 -37
  51. anemoi/datasets/create/input/join.py +86 -14
  52. anemoi/datasets/create/input/misc.py +67 -17
  53. anemoi/datasets/create/input/pipe.py +33 -6
  54. anemoi/datasets/create/input/repeated_dates.py +189 -41
  55. anemoi/datasets/create/input/result.py +202 -87
  56. anemoi/datasets/create/input/step.py +119 -22
  57. anemoi/datasets/create/input/template.py +100 -13
  58. anemoi/datasets/create/input/trace.py +62 -7
  59. anemoi/datasets/create/patch.py +52 -4
  60. anemoi/datasets/create/persistent.py +134 -17
  61. anemoi/datasets/create/size.py +15 -1
  62. anemoi/datasets/create/source.py +51 -0
  63. anemoi/datasets/create/sources/__init__.py +36 -0
  64. anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
  65. anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
  66. anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
  67. anemoi/datasets/create/sources/empty.py +37 -0
  68. anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
  69. anemoi/datasets/create/sources/grib.py +297 -0
  70. anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
  71. anemoi/datasets/create/sources/legacy.py +93 -0
  72. anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
  73. anemoi/datasets/create/sources/netcdf.py +42 -0
  74. anemoi/datasets/create/sources/opendap.py +43 -0
  75. anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
  76. anemoi/datasets/create/sources/recentre.py +150 -0
  77. anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
  78. anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
  79. anemoi/datasets/create/sources/xarray.py +92 -0
  80. anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
  81. anemoi/datasets/create/sources/xarray_support/README.md +1 -0
  82. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
  83. anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
  84. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
  85. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
  86. anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
  87. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
  88. anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
  89. anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
  90. anemoi/datasets/create/sources/xarray_support/time.py +391 -0
  91. anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
  92. anemoi/datasets/create/sources/xarray_zarr.py +41 -0
  93. anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
  94. anemoi/datasets/create/statistics/__init__.py +233 -44
  95. anemoi/datasets/create/statistics/summary.py +52 -6
  96. anemoi/datasets/create/testing.py +76 -0
  97. anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
  98. anemoi/datasets/create/utils.py +97 -6
  99. anemoi/datasets/create/writer.py +26 -4
  100. anemoi/datasets/create/zarr.py +170 -23
  101. anemoi/datasets/data/__init__.py +51 -4
  102. anemoi/datasets/data/complement.py +191 -40
  103. anemoi/datasets/data/concat.py +141 -16
  104. anemoi/datasets/data/dataset.py +552 -61
  105. anemoi/datasets/data/debug.py +197 -26
  106. anemoi/datasets/data/ensemble.py +93 -8
  107. anemoi/datasets/data/fill_missing.py +165 -18
  108. anemoi/datasets/data/forwards.py +428 -56
  109. anemoi/datasets/data/grids.py +323 -97
  110. anemoi/datasets/data/indexing.py +112 -19
  111. anemoi/datasets/data/interpolate.py +92 -12
  112. anemoi/datasets/data/join.py +158 -19
  113. anemoi/datasets/data/masked.py +129 -15
  114. anemoi/datasets/data/merge.py +137 -23
  115. anemoi/datasets/data/misc.py +172 -16
  116. anemoi/datasets/data/missing.py +233 -29
  117. anemoi/datasets/data/rescale.py +111 -10
  118. anemoi/datasets/data/select.py +168 -26
  119. anemoi/datasets/data/statistics.py +67 -6
  120. anemoi/datasets/data/stores.py +149 -64
  121. anemoi/datasets/data/subset.py +159 -25
  122. anemoi/datasets/data/unchecked.py +168 -57
  123. anemoi/datasets/data/xy.py +168 -25
  124. anemoi/datasets/dates/__init__.py +191 -16
  125. anemoi/datasets/dates/groups.py +189 -47
  126. anemoi/datasets/grids.py +270 -31
  127. anemoi/datasets/testing.py +28 -1
  128. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
  129. anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
  130. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
  131. anemoi/datasets/create/functions/__init__.py +0 -66
  132. anemoi/datasets/create/functions/filters/__init__.py +0 -9
  133. anemoi/datasets/create/functions/filters/empty.py +0 -17
  134. anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
  135. anemoi/datasets/create/functions/filters/rename.py +0 -79
  136. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
  137. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
  138. anemoi/datasets/create/functions/sources/empty.py +0 -15
  139. anemoi/datasets/create/functions/sources/grib.py +0 -150
  140. anemoi/datasets/create/functions/sources/netcdf.py +0 -15
  141. anemoi/datasets/create/functions/sources/opendap.py +0 -15
  142. anemoi/datasets/create/functions/sources/recentre.py +0 -60
  143. anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
  144. anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
  145. anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
  146. anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
  147. anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
  148. anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
  149. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
  150. anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
  151. anemoi/datasets/utils/fields.py +0 -47
  152. anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
  153. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
  154. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
  155. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
@@ -15,10 +15,14 @@ import time
15
15
  import uuid
16
16
  import warnings
17
17
  from functools import cached_property
18
+ from typing import Any
19
+ from typing import Optional
20
+ from typing import Union
18
21
 
19
22
  import cftime
20
23
  import numpy as np
21
24
  import tqdm
25
+ import zarr
22
26
  from anemoi.utils.dates import as_datetime
23
27
  from anemoi.utils.dates import frequency_to_string
24
28
  from anemoi.utils.dates import frequency_to_timedelta
@@ -55,8 +59,19 @@ LOG = logging.getLogger(__name__)
55
59
  VERSION = "0.30"
56
60
 
57
61
 
58
- def json_tidy(o):
62
+ def json_tidy(o: Any) -> Any:
63
+ """Convert various types to JSON serializable format.
59
64
 
65
+ Parameters
66
+ ----------
67
+ o : Any
68
+ The object to convert.
69
+
70
+ Returns
71
+ -------
72
+ Any
73
+ The JSON serializable object.
74
+ """
60
75
  if isinstance(o, datetime.datetime):
61
76
  return o.isoformat()
62
77
 
@@ -85,12 +100,24 @@ def json_tidy(o):
85
100
  raise TypeError(f"{repr(o)} is not JSON serializable {type(o)}")
86
101
 
87
102
 
88
- def build_statistics_dates(dates, start, end):
89
- """Compute the start and end dates for the statistics, based on :
90
- - The start and end dates in the config
91
- - The default statistics dates convention
92
-
93
- Then adapt according to the actual dates in the dataset.
103
+ def build_statistics_dates(
104
+ dates: list[datetime.datetime], start: Optional[datetime.datetime], end: Optional[datetime.datetime]
105
+ ) -> tuple[str, str]:
106
+ """Compute the start and end dates for the statistics.
107
+
108
+ Parameters
109
+ ----------
110
+ dates : list of datetime.datetime
111
+ The list of dates.
112
+ start : Optional[datetime.datetime]
113
+ The start date.
114
+ end : Optional[datetime.datetime]
115
+ The end date.
116
+
117
+ Returns
118
+ -------
119
+ tuple of str
120
+ The start and end dates in ISO format.
94
121
  """
95
122
  # if not specified, use the default statistics dates
96
123
  default_start, default_end = default_statistics_dates(dates)
@@ -109,7 +136,19 @@ def build_statistics_dates(dates, start, end):
109
136
  return (start.isoformat(), end.isoformat())
110
137
 
111
138
 
112
- def _path_readable(path):
139
+ def _path_readable(path: str) -> bool:
140
+ """Check if the path is readable.
141
+
142
+ Parameters
143
+ ----------
144
+ path : str
145
+ The path to check.
146
+
147
+ Returns
148
+ -------
149
+ bool
150
+ True if the path is readable, False otherwise.
151
+ """
113
152
  import zarr
114
153
 
115
154
  try:
@@ -120,14 +159,37 @@ def _path_readable(path):
120
159
 
121
160
 
122
161
  class Dataset:
123
- def __init__(self, path):
162
+ """A class to represent a dataset."""
163
+
164
+ def __init__(self, path: str):
165
+ """Initialize a Dataset instance.
166
+
167
+ Parameters
168
+ ----------
169
+ path : str
170
+ The path to the dataset.
171
+ """
124
172
  self.path = path
125
173
 
126
174
  _, ext = os.path.splitext(self.path)
127
175
  if ext != ".zarr":
128
176
  raise ValueError(f"Unsupported extension={ext} for path={self.path}")
129
177
 
130
- def add_dataset(self, mode="r+", **kwargs):
178
+ def add_dataset(self, mode: str = "r+", **kwargs: Any) -> zarr.Array:
179
+ """Add a dataset to the Zarr store.
180
+
181
+ Parameters
182
+ ----------
183
+ mode : str, optional
184
+ The mode to open the Zarr store.
185
+ **kwargs
186
+ Additional arguments for the dataset.
187
+
188
+ Returns
189
+ -------
190
+ zarr.Array
191
+ The added dataset.
192
+ """
131
193
  import zarr
132
194
 
133
195
  z = zarr.open(self.path, mode=mode)
@@ -135,7 +197,14 @@ class Dataset:
135
197
 
136
198
  return add_zarr_dataset(zarr_root=z, **kwargs)
137
199
 
138
- def update_metadata(self, **kwargs):
200
+ def update_metadata(self, **kwargs: Any) -> None:
201
+ """Update the metadata of the dataset.
202
+
203
+ Parameters
204
+ ----------
205
+ **kwargs
206
+ The metadata to update.
207
+ """
139
208
  import zarr
140
209
 
141
210
  LOG.debug(f"Updating metadata {kwargs}")
@@ -148,16 +217,19 @@ class Dataset:
148
217
  z.attrs[k] = json.loads(json.dumps(v, default=json_tidy))
149
218
 
150
219
  @cached_property
151
- def anemoi_dataset(self):
220
+ def anemoi_dataset(self) -> Any:
221
+ """Get the Anemoi dataset."""
152
222
  return open_dataset(self.path)
153
223
 
154
224
  @cached_property
155
- def zarr_metadata(self):
225
+ def zarr_metadata(self) -> dict:
226
+ """Get the Zarr metadata."""
156
227
  import zarr
157
228
 
158
229
  return dict(zarr.open(self.path, mode="r").attrs)
159
230
 
160
- def print_info(self):
231
+ def print_info(self) -> None:
232
+ """Print information about the dataset."""
161
233
  import zarr
162
234
 
163
235
  z = zarr.open(self.path, mode="r")
@@ -166,13 +238,42 @@ class Dataset:
166
238
  except Exception as e:
167
239
  LOG.info(e)
168
240
 
169
- def get_zarr_chunks(self):
241
+ def get_zarr_chunks(self) -> tuple:
242
+ """Get the chunks of the Zarr dataset.
243
+
244
+ Returns
245
+ -------
246
+ tuple
247
+ The chunks of the Zarr dataset.
248
+ """
170
249
  import zarr
171
250
 
172
251
  z = zarr.open(self.path, mode="r")
173
252
  return z["data"].chunks
174
253
 
175
- def check_name(self, resolution, dates, frequency, raise_exception=True, is_test=False):
254
+ def check_name(
255
+ self,
256
+ resolution: str,
257
+ dates: list[datetime.datetime],
258
+ frequency: datetime.timedelta,
259
+ raise_exception: bool = True,
260
+ is_test: bool = False,
261
+ ) -> None:
262
+ """Check the name of the dataset.
263
+
264
+ Parameters
265
+ ----------
266
+ resolution : str
267
+ The resolution of the dataset.
268
+ dates : list of datetime.datetime
269
+ The dates of the dataset.
270
+ frequency : datetime.timedelta
271
+ The frequency of the dataset.
272
+ raise_exception : bool, optional
273
+ Whether to raise an exception if the name is invalid.
274
+ is_test : bool, optional
275
+ Whether this is a test.
276
+ """
176
277
  basename, _ = os.path.splitext(os.path.basename(self.path))
177
278
  try:
178
279
  DatasetName(basename, resolution, dates[0], dates[-1], frequency).raise_if_not_valid()
@@ -182,8 +283,14 @@ class Dataset:
182
283
  else:
183
284
  LOG.warning(f"Dataset name error: {e}")
184
285
 
185
- def get_main_config(self):
186
- """Returns None if the config is not found."""
286
+ def get_main_config(self) -> Any:
287
+ """Get the main configuration of the dataset.
288
+
289
+ Returns
290
+ -------
291
+ Any
292
+ The main configuration.
293
+ """
187
294
  import zarr
188
295
 
189
296
  z = zarr.open(self.path, mode="r")
@@ -191,7 +298,16 @@ class Dataset:
191
298
 
192
299
 
193
300
  class WritableDataset(Dataset):
194
- def __init__(self, path):
301
+ """A class to represent a writable dataset."""
302
+
303
+ def __init__(self, path: str):
304
+ """Initialize a WritableDataset instance.
305
+
306
+ Parameters
307
+ ----------
308
+ path : str
309
+ The path to the dataset.
310
+ """
195
311
  super().__init__(path)
196
312
  self.path = path
197
313
 
@@ -200,14 +316,26 @@ class WritableDataset(Dataset):
200
316
  self.z = zarr.open(self.path, mode="r+")
201
317
 
202
318
  @cached_property
203
- def data_array(self):
319
+ def data_array(self) -> Any:
320
+ """Get the data array of the dataset."""
204
321
  import zarr
205
322
 
206
323
  return zarr.open(self.path, mode="r+")["data"]
207
324
 
208
325
 
209
326
  class NewDataset(Dataset):
210
- def __init__(self, path, overwrite=False):
327
+ """A class to represent a new dataset."""
328
+
329
+ def __init__(self, path: str, overwrite: bool = False):
330
+ """Initialize a NewDataset instance.
331
+
332
+ Parameters
333
+ ----------
334
+ path : str
335
+ The path to the dataset.
336
+ overwrite : bool, optional
337
+ Whether to overwrite the existing dataset.
338
+ """
211
339
  super().__init__(path)
212
340
  self.path = path
213
341
 
@@ -218,9 +346,20 @@ class NewDataset(Dataset):
218
346
 
219
347
 
220
348
  class Actor: # TODO: rename to Creator
349
+ """A base class for dataset creation actors."""
350
+
221
351
  dataset_class = WritableDataset
222
352
 
223
- def __init__(self, path, cache=None):
353
+ def __init__(self, path: str, cache: Optional[str] = None):
354
+ """Initialize an Actor instance.
355
+
356
+ Parameters
357
+ ----------
358
+ path : str
359
+ The path to the dataset.
360
+ cache : Optional[str], optional
361
+ The cache directory.
362
+ """
224
363
  # Catch all floating point errors, including overflow, sqrt(<0), etc
225
364
  np.seterr(all="raise", under="warn")
226
365
 
@@ -228,23 +367,52 @@ class Actor: # TODO: rename to Creator
228
367
  self.cache = cache
229
368
  self.dataset = self.dataset_class(self.path)
230
369
 
231
- def run(self):
370
+ def run(self) -> None:
371
+ """Run the actor."""
232
372
  # to be implemented in the sub-classes
233
373
  raise NotImplementedError()
234
374
 
235
- def update_metadata(self, **kwargs):
375
+ def update_metadata(self, **kwargs: Any) -> None:
376
+ """Update the metadata of the dataset.
377
+
378
+ Parameters
379
+ ----------
380
+ **kwargs
381
+ The metadata to update.
382
+ """
236
383
  self.dataset.update_metadata(**kwargs)
237
384
 
238
- def _cache_context(self):
385
+ def _cache_context(self) -> Any:
386
+ """Get the cache context.
387
+
388
+ Returns
389
+ -------
390
+ Any
391
+ The cache context.
392
+ """
239
393
  from .utils import cache_context
240
394
 
241
395
  return cache_context(self.cache)
242
396
 
243
- def check_unkown_kwargs(self, kwargs):
397
+ def check_unkown_kwargs(self, kwargs: dict) -> None:
398
+ """Check for unknown keyword arguments.
399
+
400
+ Parameters
401
+ ----------
402
+ kwargs : dict
403
+ The keyword arguments.
404
+ """
244
405
  # remove this latter
245
406
  LOG.warning(f"💬 Unknown kwargs for {self.__class__.__name__}: {kwargs}")
246
407
 
247
- def read_dataset_metadata(self, path):
408
+ def read_dataset_metadata(self, path: str) -> None:
409
+ """Read the metadata of the dataset.
410
+
411
+ Parameters
412
+ ----------
413
+ path : str
414
+ The path to the dataset.
415
+ """
248
416
  ds = open_dataset(path)
249
417
  self.dataset_shape = ds.shape
250
418
  self.variables_names = ds.variables
@@ -253,7 +421,19 @@ class Actor: # TODO: rename to Creator
253
421
 
254
422
  self.missing_dates = sorted(list([self.dates[i] for i in ds.missing]))
255
423
 
256
- def check_missing_dates(expected):
424
+ def check_missing_dates(expected: list[np.datetime64]) -> None:
425
+ """Check if the missing dates in the dataset match the expected dates.
426
+
427
+ Parameters
428
+ ----------
429
+ expected : list of np.datetime64
430
+ The expected missing dates.
431
+
432
+ Raises
433
+ ------
434
+ ValueError
435
+ If the missing dates in the dataset do not match the expected dates.
436
+ """
257
437
  import zarr
258
438
 
259
439
  z = zarr.open(path, "r")
@@ -269,21 +449,43 @@ class Actor: # TODO: rename to Creator
269
449
 
270
450
 
271
451
  class Patch(Actor):
272
- def __init__(self, path, options=None, **kwargs):
452
+ """A class to apply patches to a dataset."""
453
+
454
+ def __init__(self, path: str, options: dict = None, **kwargs: Any):
455
+ """Initialize a Patch instance.
456
+
457
+ Parameters
458
+ ----------
459
+ path : str
460
+ The path to the dataset.
461
+ options : dict, optional
462
+ The patch options.
463
+ """
273
464
  self.path = path
274
465
  self.options = options or {}
275
466
 
276
- def run(self):
467
+ def run(self) -> None:
468
+ """Run the patch."""
277
469
  from .patch import apply_patch
278
470
 
279
471
  apply_patch(self.path, **self.options)
280
472
 
281
473
 
282
474
  class Size(Actor):
283
- def __init__(self, path, **kwargs):
475
+ """A class to compute the size of a dataset."""
476
+
477
+ def __init__(self, path: str, **kwargs: Any):
478
+ """Initialize a Size instance.
479
+
480
+ Parameters
481
+ ----------
482
+ path : str
483
+ The path to the dataset.
484
+ """
284
485
  super().__init__(path)
285
486
 
286
- def run(self):
487
+ def run(self) -> None:
488
+ """Run the size computation."""
287
489
  from .size import compute_directory_sizes
288
490
 
289
491
  metadata = compute_directory_sizes(self.path)
@@ -301,23 +503,37 @@ class Size(Actor):
301
503
 
302
504
 
303
505
  class HasRegistryMixin:
506
+ """A mixin class to provide registry functionality."""
507
+
304
508
  @cached_property
305
- def registry(self):
509
+ def registry(self) -> Any:
510
+ """Get the registry."""
306
511
  from .zarr import ZarrBuiltRegistry
307
512
 
308
513
  return ZarrBuiltRegistry(self.path, use_threads=self.use_threads)
309
514
 
310
515
 
311
516
  class HasStatisticTempMixin:
517
+ """A mixin class to provide temporary statistics functionality."""
518
+
312
519
  @cached_property
313
- def tmp_statistics(self):
520
+ def tmp_statistics(self) -> TmpStatistics:
521
+ """Get the temporary statistics."""
314
522
  directory = self.statistics_temp_dir or os.path.join(self.path + ".storage_for_statistics.tmp")
315
523
  return TmpStatistics(directory)
316
524
 
317
525
 
318
526
  class HasElementForDataMixin:
319
- def create_elements(self, config):
527
+ """A mixin class to provide element creation functionality for data."""
528
+
529
+ def create_elements(self, config: Any) -> None:
530
+ """Create elements for the dataset.
320
531
 
532
+ Parameters
533
+ ----------
534
+ config : Any
535
+ The configuration.
536
+ """
321
537
  assert self.registry
322
538
  assert self.tmp_statistics
323
539
 
@@ -329,11 +545,24 @@ class HasElementForDataMixin:
329
545
  self.output = build_output(config.output, parent=self)
330
546
 
331
547
  self.input = build_input_(main_config=config, output_config=self.output)
332
- LOG.info("%s", self.input)
548
+ # LOG.info("%s", self.input)
333
549
 
334
550
 
335
- def build_input_(main_config, output_config):
551
+ def build_input_(main_config: Any, output_config: Any) -> Any:
552
+ """Build the input for the dataset.
336
553
 
554
+ Parameters
555
+ ----------
556
+ main_config : Any
557
+ The main configuration.
558
+ output_config : Any
559
+ The output configuration.
560
+
561
+ Returns
562
+ -------
563
+ Any
564
+ The input builder.
565
+ """
337
566
  builder = build_input(
338
567
  main_config.input,
339
568
  data_sources=main_config.get("data_sources", {}),
@@ -348,21 +577,46 @@ def build_input_(main_config, output_config):
348
577
 
349
578
 
350
579
  class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
580
+ """A class to initialize a new dataset."""
581
+
351
582
  dataset_class = NewDataset
352
583
 
353
584
  def __init__(
354
585
  self,
355
- path,
356
- config,
357
- check_name=False,
358
- overwrite=False,
359
- use_threads=False,
360
- statistics_temp_dir=None,
361
- progress=None,
362
- test=False,
363
- cache=None,
364
- **kwargs,
586
+ path: str,
587
+ config: dict,
588
+ check_name: bool = False,
589
+ overwrite: bool = False,
590
+ use_threads: bool = False,
591
+ statistics_temp_dir: Optional[str] = None,
592
+ progress: Any = None,
593
+ test: bool = False,
594
+ cache: Optional[str] = None,
595
+ **kwargs: Any,
365
596
  ):
597
+ """Initialize an Init instance.
598
+
599
+ Parameters
600
+ ----------
601
+ path : str
602
+ The path to the dataset.
603
+ config : dict
604
+ The configuration.
605
+ check_name : bool, optional
606
+ Whether to check the dataset name.
607
+ overwrite : bool, optional
608
+ Whether to overwrite the existing dataset.
609
+ use_threads : bool, optional
610
+ Whether to use threads.
611
+ statistics_temp_dir : Optional[str], optional
612
+ The directory for temporary statistics.
613
+ progress : Any, optional
614
+ The progress indicator.
615
+ test : bool, optional
616
+ Whether this is a test.
617
+ cache : Optional[str], optional
618
+ The cache directory.
619
+ """
366
620
  if _path_readable(path) and not overwrite:
367
621
  raise Exception(f"{path} already exists. Use overwrite=True to overwrite.")
368
622
 
@@ -390,12 +644,26 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
390
644
  LOG.info(f"Minimal input for 'init' step (using only the first date) : {one_date}")
391
645
  LOG.info(self.minimal_input)
392
646
 
393
- def run(self):
647
+ def run(self) -> int:
648
+ """Run the initialization.
649
+
650
+ Returns
651
+ -------
652
+ int
653
+ The number of groups to process.
654
+ """
394
655
  with self._cache_context():
395
656
  return self._run()
396
657
 
397
- def _run(self):
398
- """Create an empty dataset of the right final shape
658
+ def _run(self) -> int:
659
+ """Internal method to run the initialization.
660
+
661
+ Returns
662
+ -------
663
+ int
664
+ The number of groups to process.
665
+ """
666
+ """Create an empty dataset of the right final shape.
399
667
 
400
668
  Read a small part of the data to get the shape of the data and the resolution and more metadata.
401
669
  """
@@ -547,9 +815,35 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
547
815
 
548
816
 
549
817
  class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
818
+ """A class to load data into a dataset."""
819
+
550
820
  def __init__(
551
- self, path, parts=None, use_threads=False, statistics_temp_dir=None, progress=None, cache=None, **kwargs
821
+ self,
822
+ path: str,
823
+ parts: Optional[str] = None,
824
+ use_threads: bool = False,
825
+ statistics_temp_dir: Optional[str] = None,
826
+ progress: Any = None,
827
+ cache: Optional[str] = None,
828
+ **kwargs: Any,
552
829
  ):
830
+ """Initialize a Load instance.
831
+
832
+ Parameters
833
+ ----------
834
+ path : str
835
+ The path to the dataset.
836
+ parts : Optional[str], optional
837
+ The parts to load.
838
+ use_threads : bool, optional
839
+ Whether to use threads.
840
+ statistics_temp_dir : Optional[str], optional
841
+ The directory for temporary statistics.
842
+ progress : Any, optional
843
+ The progress indicator.
844
+ cache : Optional[str], optional
845
+ The cache directory.
846
+ """
553
847
  super().__init__(path, cache=cache)
554
848
  self.use_threads = use_threads
555
849
  self.statistics_temp_dir = statistics_temp_dir
@@ -567,11 +861,13 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
567
861
  self.data_array = self.dataset.data_array
568
862
  self.n_groups = len(self.groups)
569
863
 
570
- def run(self):
864
+ def run(self) -> None:
865
+ """Run the data loading."""
571
866
  with self._cache_context():
572
867
  self._run()
573
868
 
574
- def _run(self):
869
+ def _run(self) -> None:
870
+ """Internal method to run the data loading."""
575
871
  for igroup, group in enumerate(self.groups):
576
872
  if not self.chunk_filter(igroup):
577
873
  continue
@@ -595,7 +891,14 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
595
891
 
596
892
  self.dataset.print_info()
597
893
 
598
- def load_result(self, result):
894
+ def load_result(self, result: Any) -> None:
895
+ """Load the result into the dataset.
896
+
897
+ Parameters
898
+ ----------
899
+ result : Any
900
+ The result to load.
901
+ """
599
902
  # There is one cube to load for each result.
600
903
  dates = list(result.group_of_dates)
601
904
 
@@ -656,14 +959,30 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
656
959
  array.flush()
657
960
  LOG.info("Flushed data array")
658
961
 
659
- def _get_allow_nans(self):
962
+ def _get_allow_nans(self) -> Union[bool, list]:
963
+ """Get the allow_nans configuration.
964
+
965
+ Returns
966
+ -------
967
+ bool | list
968
+ The allow_nans configuration.
969
+ """
660
970
  config = self.main_config
661
971
  if "allow_nans" in config.build:
662
972
  return config.build.allow_nans
663
973
 
664
974
  return config.statistics.get("allow_nans", [])
665
975
 
666
- def load_cube(self, cube, array):
976
+ def load_cube(self, cube: Any, array: ViewCacheArray) -> None:
977
+ """Load the cube into the array.
978
+
979
+ Parameters
980
+ ----------
981
+ cube : Any
982
+ The cube to load.
983
+ array : ViewCacheArray
984
+ The array to load into.
985
+ """
667
986
  # There are several cubelets for each cube
668
987
  start = time.time()
669
988
  load = 0
@@ -673,7 +992,7 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
673
992
  total = cube.count(reading_chunks)
674
993
  LOG.debug(f"Loading datacube: {cube}")
675
994
 
676
- def position(x):
995
+ def position(x: Any) -> Optional[int]:
677
996
  if isinstance(x, str) and "/" in x:
678
997
  x = x.split("/")
679
998
  return int(x[0])
@@ -715,7 +1034,29 @@ class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
715
1034
 
716
1035
 
717
1036
  class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
718
- def __init__(self, path, statistics_temp_dir=None, delta=[], use_threads=False, **kwargs):
1037
+ """A class to clean up temporary data and registry entries."""
1038
+
1039
+ def __init__(
1040
+ self,
1041
+ path: str,
1042
+ statistics_temp_dir: Optional[str] = None,
1043
+ delta: list = [],
1044
+ use_threads: bool = False,
1045
+ **kwargs: Any,
1046
+ ):
1047
+ """Initialize a Cleanup instance.
1048
+
1049
+ Parameters
1050
+ ----------
1051
+ path : str
1052
+ The path to the dataset.
1053
+ statistics_temp_dir : Optional[str], optional
1054
+ The directory for temporary statistics.
1055
+ delta : list, optional
1056
+ The delta values.
1057
+ use_threads : bool, optional
1058
+ Whether to use threads.
1059
+ """
719
1060
  super().__init__(path)
720
1061
  self.use_threads = use_threads
721
1062
  self.statistics_temp_dir = statistics_temp_dir
@@ -725,7 +1066,8 @@ class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
725
1066
  for d in delta
726
1067
  ]
727
1068
 
728
- def run(self):
1069
+ def run(self) -> None:
1070
+ """Run the cleanup."""
729
1071
  self.tmp_statistics.delete()
730
1072
  self.registry.clean()
731
1073
  for actor in self.actors:
@@ -733,16 +1075,35 @@ class Cleanup(Actor, HasRegistryMixin, HasStatisticTempMixin):
733
1075
 
734
1076
 
735
1077
  class Verify(Actor):
736
- def __init__(self, path, **kwargs):
1078
+ """A class to verify the integrity of a dataset."""
1079
+
1080
+ def __init__(self, path: str, **kwargs: Any):
1081
+ """Initialize a Verify instance.
1082
+
1083
+ Parameters
1084
+ ----------
1085
+ path : str
1086
+ The path to the dataset.
1087
+ """
737
1088
  super().__init__(path)
738
1089
 
739
- def run(self):
1090
+ def run(self) -> None:
1091
+ """Run the verification."""
740
1092
  LOG.info(f"Verifying dataset at {self.path}")
741
1093
  LOG.info(str(self.dataset.anemoi_dataset))
742
1094
 
743
1095
 
744
1096
  class AdditionsMixin:
745
- def skip(self):
1097
+ """A mixin class to handle dataset additions."""
1098
+
1099
+ def skip(self) -> bool:
1100
+ """Check if the additions should be skipped.
1101
+
1102
+ Returns
1103
+ -------
1104
+ bool
1105
+ Whether to skip the additions.
1106
+ """
746
1107
  frequency = frequency_to_timedelta(self.dataset.anemoi_dataset.frequency)
747
1108
  if not self.delta.total_seconds() % frequency.total_seconds() == 0:
748
1109
  LOG.debug(f"Delta {self.delta} is not a multiple of frequency {frequency}. Skipping.")
@@ -755,13 +1116,15 @@ class AdditionsMixin:
755
1116
  return False
756
1117
 
757
1118
  @cached_property
758
- def tmp_storage_path(self):
1119
+ def tmp_storage_path(self) -> str:
1120
+ """Get the path to the temporary storage."""
759
1121
  name = "storage_for_additions"
760
1122
  if self.delta:
761
1123
  name += frequency_to_string(self.delta)
762
1124
  return os.path.join(f"{self.path}.{name}.tmp")
763
1125
 
764
- def read_from_dataset(self):
1126
+ def read_from_dataset(self) -> None:
1127
+ """Read data from the dataset."""
765
1128
  self.variables = self.dataset.anemoi_dataset.variables
766
1129
  self.frequency = frequency_to_timedelta(self.dataset.anemoi_dataset.frequency)
767
1130
  start = self.dataset.zarr_metadata["statistics_start_date"]
@@ -780,11 +1143,34 @@ class AdditionsMixin:
780
1143
 
781
1144
 
782
1145
  class DeltaDataset:
783
- def __init__(self, ds, idelta):
1146
+ """A class to represent a dataset with delta values."""
1147
+
1148
+ def __init__(self, ds: Any, idelta: int):
1149
+ """Initialize a DeltaDataset instance.
1150
+
1151
+ Parameters
1152
+ ----------
1153
+ ds : Any
1154
+ The dataset.
1155
+ idelta : int
1156
+ The delta value.
1157
+ """
784
1158
  self.ds = ds
785
1159
  self.idelta = idelta
786
1160
 
787
- def __getitem__(self, i):
1161
+ def __getitem__(self, i: int) -> Any:
1162
+ """Get an item from the dataset.
1163
+
1164
+ Parameters
1165
+ ----------
1166
+ i : int
1167
+ The index.
1168
+
1169
+ Returns
1170
+ -------
1171
+ Any
1172
+ The item.
1173
+ """
788
1174
  j = i - self.idelta
789
1175
  if j < 0:
790
1176
  raise MissingDateError(f"Missing date {j}")
@@ -792,13 +1178,29 @@ class DeltaDataset:
792
1178
 
793
1179
 
794
1180
  class _InitAdditions(Actor, HasRegistryMixin, AdditionsMixin):
795
- def __init__(self, path, delta, use_threads=False, progress=None, **kwargs):
1181
+ """A class to initialize dataset additions."""
1182
+
1183
+ def __init__(self, path: str, delta: str, use_threads: bool = False, progress: Any = None, **kwargs: Any):
1184
+ """Initialize an _InitAdditions instance.
1185
+
1186
+ Parameters
1187
+ ----------
1188
+ path : str
1189
+ The path to the dataset.
1190
+ delta : str
1191
+ The delta value.
1192
+ use_threads : bool, optional
1193
+ Whether to use threads.
1194
+ progress : Any, optional
1195
+ The progress indicator.
1196
+ """
796
1197
  super().__init__(path)
797
1198
  self.delta = frequency_to_timedelta(delta)
798
1199
  self.use_threads = use_threads
799
1200
  self.progress = progress
800
1201
 
801
- def run(self):
1202
+ def run(self) -> None:
1203
+ """Run the additions initialization."""
802
1204
  if self.skip():
803
1205
  LOG.info(f"Skipping delta={self.delta}")
804
1206
  return
@@ -808,14 +1210,40 @@ class _InitAdditions(Actor, HasRegistryMixin, AdditionsMixin):
808
1210
  self.tmp_storage.create()
809
1211
  LOG.info(f"Dataset {self.tmp_storage_path} additions initialized.")
810
1212
 
811
- def cleanup(self):
1213
+ def cleanup(self) -> None:
1214
+ """Clean up the temporary storage."""
812
1215
  self.tmp_storage = build_storage(directory=self.tmp_storage_path, create=False)
813
1216
  self.tmp_storage.delete()
814
1217
  LOG.info(f"Cleaned temporary storage {self.tmp_storage_path}")
815
1218
 
816
1219
 
817
1220
  class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
818
- def __init__(self, path, delta, parts=None, use_threads=False, progress=None, **kwargs):
1221
+ """A class to run dataset additions."""
1222
+
1223
+ def __init__(
1224
+ self,
1225
+ path: str,
1226
+ delta: str,
1227
+ parts: Optional[str] = None,
1228
+ use_threads: bool = False,
1229
+ progress: Any = None,
1230
+ **kwargs: Any,
1231
+ ):
1232
+ """Initialize a _RunAdditions instance.
1233
+
1234
+ Parameters
1235
+ ----------
1236
+ path : str
1237
+ The path to the dataset.
1238
+ delta : str
1239
+ The delta value.
1240
+ parts : Optional[str], optional
1241
+ The parts to load.
1242
+ use_threads : bool, optional
1243
+ Whether to use threads.
1244
+ progress : Any, optional
1245
+ The progress indicator.
1246
+ """
819
1247
  super().__init__(path)
820
1248
  self.delta = frequency_to_timedelta(delta)
821
1249
  self.use_threads = use_threads
@@ -825,7 +1253,8 @@ class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
825
1253
  self.tmp_storage = build_storage(directory=self.tmp_storage_path, create=False)
826
1254
  LOG.info(f"Writing in {self.tmp_storage_path}")
827
1255
 
828
- def run(self):
1256
+ def run(self) -> None:
1257
+ """Run the additions."""
829
1258
  if self.skip():
830
1259
  LOG.info(f"Skipping delta={self.delta}")
831
1260
  return
@@ -846,7 +1275,14 @@ class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
846
1275
  self.tmp_storage.flush()
847
1276
  LOG.debug(f"Dataset {self.path} additions run.")
848
1277
 
849
- def allow_nans(self):
1278
+ def allow_nans(self) -> bool:
1279
+ """Check if NaNs are allowed.
1280
+
1281
+ Returns
1282
+ -------
1283
+ bool
1284
+ Whether NaNs are allowed.
1285
+ """
850
1286
  if self.dataset.anemoi_dataset.metadata.get("allow_nans", False):
851
1287
  return True
852
1288
 
@@ -858,7 +1294,22 @@ class _RunAdditions(Actor, HasRegistryMixin, AdditionsMixin):
858
1294
 
859
1295
 
860
1296
  class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
861
- def __init__(self, path, delta, use_threads=False, progress=None, **kwargs):
1297
+ """A class to finalize dataset additions."""
1298
+
1299
+ def __init__(self, path: str, delta: str, use_threads: bool = False, progress: Any = None, **kwargs: Any):
1300
+ """Initialize a _FinaliseAdditions instance.
1301
+
1302
+ Parameters
1303
+ ----------
1304
+ path : str
1305
+ The path to the dataset.
1306
+ delta : str
1307
+ The delta value.
1308
+ use_threads : bool, optional
1309
+ Whether to use threads.
1310
+ progress : Any, optional
1311
+ The progress indicator.
1312
+ """
862
1313
  super().__init__(path)
863
1314
  self.delta = frequency_to_timedelta(delta)
864
1315
  self.use_threads = use_threads
@@ -867,7 +1318,8 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
867
1318
  self.tmp_storage = build_storage(directory=self.tmp_storage_path, create=False)
868
1319
  LOG.info(f"Reading from {self.tmp_storage_path}.")
869
1320
 
870
- def run(self):
1321
+ def run(self) -> None:
1322
+ """Run the additions finalization."""
871
1323
  if self.skip():
872
1324
  LOG.info(f"Skipping delta={self.delta}.")
873
1325
  return
@@ -969,7 +1421,14 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
969
1421
  self._write(self.summary)
970
1422
  self.tmp_storage.delete()
971
1423
 
972
- def _write(self, summary):
1424
+ def _write(self, summary: Summary) -> None:
1425
+ """Write the summary to the dataset.
1426
+
1427
+ Parameters
1428
+ ----------
1429
+ summary : Summary
1430
+ The summary to write.
1431
+ """
973
1432
  for k in ["mean", "stdev", "minimum", "maximum", "sums", "squares", "count", "has_nans"]:
974
1433
  name = f"statistics_tendencies_{frequency_to_string(self.delta)}_{k}"
975
1434
  self.dataset.add_dataset(name=name, array=summary[k], dimensions=("variable",))
@@ -977,9 +1436,22 @@ class _FinaliseAdditions(Actor, HasRegistryMixin, AdditionsMixin):
977
1436
  LOG.debug(f"Wrote additions in {self.path}")
978
1437
 
979
1438
 
980
- def multi_addition(cls):
1439
+ def multi_addition(cls: type) -> type:
1440
+ """Create a class to handle multiple additions.
1441
+
1442
+ Parameters
1443
+ ----------
1444
+ cls : type
1445
+ The class to handle additions.
1446
+
1447
+ Returns
1448
+ -------
1449
+ type
1450
+ The class to handle multiple additions.
1451
+ """
1452
+
981
1453
  class MultiAdditions:
982
- def __init__(self, *args, **kwargs):
1454
+ def __init__(self, *args, **kwargs: Any):
983
1455
  self.actors = []
984
1456
 
985
1457
  for k in kwargs.pop("delta", []):
@@ -988,7 +1460,8 @@ def multi_addition(cls):
988
1460
  if not self.actors:
989
1461
  LOG.warning("No delta found in kwargs, no additions will be computed.")
990
1462
 
991
- def run(self):
1463
+ def run(self) -> None:
1464
+ """Run the additions."""
992
1465
  for actor in self.actors:
993
1466
  actor.run()
994
1467
 
@@ -1001,13 +1474,36 @@ FinaliseAdditions = multi_addition(_FinaliseAdditions)
1001
1474
 
1002
1475
 
1003
1476
  class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
1004
- def __init__(self, path, use_threads=False, statistics_temp_dir=None, progress=None, **kwargs):
1477
+ """A class to compute statistics for a dataset."""
1478
+
1479
+ def __init__(
1480
+ self,
1481
+ path: str,
1482
+ use_threads: bool = False,
1483
+ statistics_temp_dir: Optional[str] = None,
1484
+ progress: Any = None,
1485
+ **kwargs: Any,
1486
+ ):
1487
+ """Initialize a Statistics instance.
1488
+
1489
+ Parameters
1490
+ ----------
1491
+ path : str
1492
+ The path to the dataset.
1493
+ use_threads : bool, optional
1494
+ Whether to use threads.
1495
+ statistics_temp_dir : Optional[str], optional
1496
+ The directory for temporary statistics.
1497
+ progress : Any, optional
1498
+ The progress indicator.
1499
+ """
1005
1500
  super().__init__(path)
1006
1501
  self.use_threads = use_threads
1007
1502
  self.progress = progress
1008
1503
  self.statistics_temp_dir = statistics_temp_dir
1009
1504
 
1010
- def run(self):
1505
+ def run(self) -> None:
1506
+ """Run the statistics computation."""
1011
1507
  start, end = (
1012
1508
  self.dataset.zarr_metadata["statistics_start_date"],
1013
1509
  self.dataset.zarr_metadata["statistics_end_date"],
@@ -1034,7 +1530,8 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
1034
1530
  LOG.info(f"Wrote statistics in {self.path}")
1035
1531
 
1036
1532
  @cached_property
1037
- def allow_nans(self):
1533
+ def allow_nans(self) -> Union[bool, list]:
1534
+ """Check if NaNs are allowed."""
1038
1535
  import zarr
1039
1536
 
1040
1537
  z = zarr.open(self.path, mode="r")
@@ -1048,12 +1545,26 @@ class Statistics(Actor, HasStatisticTempMixin, HasRegistryMixin):
1048
1545
  return True
1049
1546
 
1050
1547
 
1051
- def chain(tasks):
1548
+ def chain(tasks: list) -> type:
1549
+ """Create a class to chain multiple tasks.
1550
+
1551
+ Parameters
1552
+ ----------
1553
+ tasks : list
1554
+ The list of tasks to chain.
1555
+
1556
+ Returns
1557
+ -------
1558
+ type
1559
+ The class to chain multiple tasks.
1560
+ """
1561
+
1052
1562
  class Chain(Actor):
1053
- def __init__(self, **kwargs):
1563
+ def __init__(self, **kwargs: Any):
1054
1564
  self.kwargs = kwargs
1055
1565
 
1056
- def run(self):
1566
+ def run(self) -> None:
1567
+ """Run the chained tasks."""
1057
1568
  for cls in tasks:
1058
1569
  t = cls(**self.kwargs)
1059
1570
  t.run()
@@ -1061,7 +1572,23 @@ def chain(tasks):
1061
1572
  return Chain
1062
1573
 
1063
1574
 
1064
- def creator_factory(name, trace=None, **kwargs):
1575
+ def creator_factory(name: str, trace: Optional[str] = None, **kwargs: Any) -> Any:
1576
+ """Create a dataset creator.
1577
+
1578
+ Parameters
1579
+ ----------
1580
+ name : str
1581
+ The name of the creator.
1582
+ trace : Optional[str], optional
1583
+ The trace file.
1584
+ **kwargs
1585
+ Additional arguments for the creator.
1586
+
1587
+ Returns
1588
+ -------
1589
+ Any
1590
+ The dataset creator.
1591
+ """
1065
1592
  if trace:
1066
1593
 
1067
1594
  enable_trace(trace)