anemoi-datasets 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. anemoi/datasets/__init__.py +4 -1
  2. anemoi/datasets/__main__.py +12 -2
  3. anemoi/datasets/_version.py +9 -4
  4. anemoi/datasets/commands/cleanup.py +17 -2
  5. anemoi/datasets/commands/compare.py +18 -2
  6. anemoi/datasets/commands/copy.py +196 -14
  7. anemoi/datasets/commands/create.py +50 -7
  8. anemoi/datasets/commands/finalise-additions.py +17 -2
  9. anemoi/datasets/commands/finalise.py +17 -2
  10. anemoi/datasets/commands/init-additions.py +17 -2
  11. anemoi/datasets/commands/init.py +16 -2
  12. anemoi/datasets/commands/inspect.py +283 -62
  13. anemoi/datasets/commands/load-additions.py +16 -2
  14. anemoi/datasets/commands/load.py +16 -2
  15. anemoi/datasets/commands/patch.py +17 -2
  16. anemoi/datasets/commands/publish.py +17 -2
  17. anemoi/datasets/commands/scan.py +31 -3
  18. anemoi/datasets/compute/recentre.py +47 -11
  19. anemoi/datasets/create/__init__.py +612 -85
  20. anemoi/datasets/create/check.py +142 -20
  21. anemoi/datasets/create/chunks.py +64 -4
  22. anemoi/datasets/create/config.py +185 -21
  23. anemoi/datasets/create/filter.py +50 -0
  24. anemoi/datasets/create/filters/__init__.py +33 -0
  25. anemoi/datasets/create/filters/empty.py +37 -0
  26. anemoi/datasets/create/filters/legacy.py +93 -0
  27. anemoi/datasets/create/filters/noop.py +37 -0
  28. anemoi/datasets/create/filters/orog_to_z.py +58 -0
  29. anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
  30. anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
  31. anemoi/datasets/create/filters/rename.py +205 -0
  32. anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
  33. anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
  34. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
  35. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
  36. anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
  37. anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
  38. anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
  39. anemoi/datasets/create/filters/transform.py +53 -0
  40. anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
  41. anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
  42. anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
  43. anemoi/datasets/create/input/__init__.py +76 -5
  44. anemoi/datasets/create/input/action.py +149 -13
  45. anemoi/datasets/create/input/concat.py +81 -10
  46. anemoi/datasets/create/input/context.py +39 -4
  47. anemoi/datasets/create/input/data_sources.py +72 -6
  48. anemoi/datasets/create/input/empty.py +21 -3
  49. anemoi/datasets/create/input/filter.py +60 -12
  50. anemoi/datasets/create/input/function.py +154 -37
  51. anemoi/datasets/create/input/join.py +86 -14
  52. anemoi/datasets/create/input/misc.py +67 -17
  53. anemoi/datasets/create/input/pipe.py +33 -6
  54. anemoi/datasets/create/input/repeated_dates.py +189 -41
  55. anemoi/datasets/create/input/result.py +202 -87
  56. anemoi/datasets/create/input/step.py +119 -22
  57. anemoi/datasets/create/input/template.py +100 -13
  58. anemoi/datasets/create/input/trace.py +62 -7
  59. anemoi/datasets/create/patch.py +52 -4
  60. anemoi/datasets/create/persistent.py +134 -17
  61. anemoi/datasets/create/size.py +15 -1
  62. anemoi/datasets/create/source.py +51 -0
  63. anemoi/datasets/create/sources/__init__.py +36 -0
  64. anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
  65. anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
  66. anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
  67. anemoi/datasets/create/sources/empty.py +37 -0
  68. anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
  69. anemoi/datasets/create/sources/grib.py +297 -0
  70. anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
  71. anemoi/datasets/create/sources/legacy.py +93 -0
  72. anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
  73. anemoi/datasets/create/sources/netcdf.py +42 -0
  74. anemoi/datasets/create/sources/opendap.py +43 -0
  75. anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
  76. anemoi/datasets/create/sources/recentre.py +150 -0
  77. anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
  78. anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
  79. anemoi/datasets/create/sources/xarray.py +92 -0
  80. anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
  81. anemoi/datasets/create/sources/xarray_support/README.md +1 -0
  82. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
  83. anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
  84. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
  85. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
  86. anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
  87. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
  88. anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
  89. anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
  90. anemoi/datasets/create/sources/xarray_support/time.py +391 -0
  91. anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
  92. anemoi/datasets/create/sources/xarray_zarr.py +41 -0
  93. anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
  94. anemoi/datasets/create/statistics/__init__.py +233 -44
  95. anemoi/datasets/create/statistics/summary.py +52 -6
  96. anemoi/datasets/create/testing.py +76 -0
  97. anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
  98. anemoi/datasets/create/utils.py +97 -6
  99. anemoi/datasets/create/writer.py +26 -4
  100. anemoi/datasets/create/zarr.py +170 -23
  101. anemoi/datasets/data/__init__.py +51 -4
  102. anemoi/datasets/data/complement.py +191 -40
  103. anemoi/datasets/data/concat.py +141 -16
  104. anemoi/datasets/data/dataset.py +552 -61
  105. anemoi/datasets/data/debug.py +197 -26
  106. anemoi/datasets/data/ensemble.py +93 -8
  107. anemoi/datasets/data/fill_missing.py +165 -18
  108. anemoi/datasets/data/forwards.py +428 -56
  109. anemoi/datasets/data/grids.py +323 -97
  110. anemoi/datasets/data/indexing.py +112 -19
  111. anemoi/datasets/data/interpolate.py +92 -12
  112. anemoi/datasets/data/join.py +158 -19
  113. anemoi/datasets/data/masked.py +129 -15
  114. anemoi/datasets/data/merge.py +137 -23
  115. anemoi/datasets/data/misc.py +172 -16
  116. anemoi/datasets/data/missing.py +233 -29
  117. anemoi/datasets/data/rescale.py +111 -10
  118. anemoi/datasets/data/select.py +168 -26
  119. anemoi/datasets/data/statistics.py +67 -6
  120. anemoi/datasets/data/stores.py +149 -64
  121. anemoi/datasets/data/subset.py +159 -25
  122. anemoi/datasets/data/unchecked.py +168 -57
  123. anemoi/datasets/data/xy.py +168 -25
  124. anemoi/datasets/dates/__init__.py +191 -16
  125. anemoi/datasets/dates/groups.py +189 -47
  126. anemoi/datasets/grids.py +270 -31
  127. anemoi/datasets/testing.py +28 -1
  128. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +10 -7
  129. anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
  130. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
  131. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +1 -1
  132. anemoi/datasets/create/functions/__init__.py +0 -66
  133. anemoi/datasets/create/functions/filters/__init__.py +0 -9
  134. anemoi/datasets/create/functions/filters/empty.py +0 -17
  135. anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
  136. anemoi/datasets/create/functions/filters/rename.py +0 -79
  137. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
  138. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
  139. anemoi/datasets/create/functions/sources/empty.py +0 -15
  140. anemoi/datasets/create/functions/sources/grib.py +0 -150
  141. anemoi/datasets/create/functions/sources/netcdf.py +0 -15
  142. anemoi/datasets/create/functions/sources/opendap.py +0 -15
  143. anemoi/datasets/create/functions/sources/recentre.py +0 -60
  144. anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
  145. anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
  146. anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
  147. anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
  148. anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
  149. anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
  150. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
  151. anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
  152. anemoi/datasets/utils/fields.py +0 -47
  153. anemoi_datasets-0.5.15.dist-info/RECORD +0 -129
  154. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
  155. {anemoi_datasets-0.5.15.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@
9
9
 
10
10
  import logging
11
11
  import time
12
+ from typing import Any
12
13
 
13
14
  from anemoi.utils.humanize import seconds_to_human
14
15
 
@@ -25,8 +26,14 @@ class Init(Command):
25
26
  internal = True
26
27
  timestamp = True
27
28
 
28
- def add_arguments(self, subparser):
29
+ def add_arguments(self, subparser: Any) -> None:
30
+ """Add command-line arguments to the parser.
29
31
 
32
+ Parameters
33
+ ----------
34
+ subparser : Any
35
+ The argument parser to which the arguments will be added.
36
+ """
30
37
  subparser.add_argument("config", help="Configuration yaml file defining the recipe to create the dataset.")
31
38
  subparser.add_argument("path", help="Path to store the created data.")
32
39
 
@@ -57,7 +64,14 @@ class Init(Command):
57
64
 
58
65
  subparser.add_argument("--trace", action="store_true")
59
66
 
60
- def run(self, args):
67
+ def run(self, args: Any) -> None:
68
+ """Execute the command with the provided arguments.
69
+
70
+ Parameters
71
+ ----------
72
+ args : Any
73
+ The arguments passed to the command.
74
+ """
61
75
  options = vars(args)
62
76
  options.pop("command")
63
77
  now = time.time()
@@ -13,6 +13,11 @@ import logging
13
13
  import os
14
14
  from copy import deepcopy
15
15
  from functools import cached_property
16
+ from typing import Any
17
+ from typing import Dict
18
+ from typing import List
19
+ from typing import Optional
20
+ from typing import Union
16
21
 
17
22
  import numpy as np
18
23
  import semantic_version
@@ -23,6 +28,7 @@ from anemoi.utils.humanize import when
23
28
  from anemoi.utils.text import dotted_line
24
29
  from anemoi.utils.text import progress
25
30
  from anemoi.utils.text import table
31
+ from numpy.typing import NDArray
26
32
 
27
33
  from anemoi.datasets import open_dataset
28
34
  from anemoi.datasets.data.stores import open_zarr
@@ -33,7 +39,20 @@ from . import Command
33
39
  LOG = logging.getLogger(__name__)
34
40
 
35
41
 
36
- def compute_directory_size(path):
42
+ def compute_directory_size(path: str) -> Union[tuple[int, int], tuple[None, None]]:
43
+ """Compute the total size and number of files in a directory.
44
+
45
+ Parameters
46
+ ----------
47
+ path : str
48
+ The path to the directory.
49
+
50
+ Returns
51
+ -------
52
+ tuple[int, int] | tuple[None, None]
53
+ The total size in bytes and the number of files,
54
+ or (None, None) if the path is not a directory.
55
+ """
37
56
  if not os.path.isdir(path):
38
57
  return None, None
39
58
  size = 0
@@ -46,18 +65,60 @@ def compute_directory_size(path):
46
65
  return size, n
47
66
 
48
67
 
49
- def local_time_bug(lon, date):
68
+ def local_time_bug(lon: float, date: datetime.datetime) -> float:
69
+ """Calculate the local time bug based on longitude and date.
70
+
71
+ Parameters
72
+ ----------
73
+ lon : float
74
+ Longitude.
75
+ date : datetime.datetime
76
+ Date and time.
77
+
78
+ Returns
79
+ -------
80
+ float
81
+ Local time bug in hours.
82
+ """
50
83
  delta = date - datetime.datetime(date.year, date.month, date.day)
51
84
  hours_since_midnight = delta.days + delta.seconds / 86400.0 # * 24 is missing
52
85
  return (lon / 360.0 * 24.0 + hours_since_midnight) % 24
53
86
 
54
87
 
55
- def cos_local_time_bug(lon, date):
88
+ def cos_local_time_bug(lon: float, date: datetime.datetime) -> float:
89
+ """Calculate the cosine of the local time bug.
90
+
91
+ Parameters
92
+ ----------
93
+ lon : float
94
+ Longitude.
95
+ date : datetime.datetime
96
+ Date and time.
97
+
98
+ Returns
99
+ -------
100
+ float
101
+ Cosine of the local time bug.
102
+ """
56
103
  radians = local_time_bug(lon, date) / 24 * np.pi * 2
57
104
  return np.cos(radians)
58
105
 
59
106
 
60
- def find(config, name):
107
+ def find(config: Union[dict, list], name: str) -> Any:
108
+ """Recursively search for a key in a nested dictionary or list.
109
+
110
+ Parameters
111
+ ----------
112
+ config : dict or list
113
+ The configuration to search.
114
+ name : str
115
+ The key to search for.
116
+
117
+ Returns
118
+ -------
119
+ Any
120
+ The value associated with the key, or None if not found.
121
+ """
61
122
  if isinstance(config, dict):
62
123
  if name in config:
63
124
  return config[name]
@@ -77,7 +138,22 @@ def find(config, name):
77
138
 
78
139
 
79
140
  class Version:
80
- def __init__(self, path, zarr, metadata, version):
141
+ """Represents a version of a dataset."""
142
+
143
+ def __init__(self, path: str, zarr: Any, metadata: dict, version: semantic_version.Version) -> None:
144
+ """Initialize the Version object.
145
+
146
+ Parameters
147
+ ----------
148
+ path : str
149
+ Path to the dataset.
150
+ zarr : Any
151
+ Zarr object.
152
+ metadata : dict
153
+ Metadata of the dataset.
154
+ version : semantic_version.Version
155
+ Version of the dataset.
156
+ """
81
157
  self.path = path
82
158
  self.zarr = zarr
83
159
  self.metadata = metadata
@@ -85,69 +161,91 @@ class Version:
85
161
  self.dataset = None
86
162
  self.dataset = open_dataset(self.path)
87
163
 
88
- def describe(self):
164
+ def describe(self) -> None:
165
+ """Print a description of the dataset."""
89
166
  print(f"📦 Path : {self.path}")
90
167
  print(f"🔢 Format version: {self.version}")
91
168
 
92
169
  @property
93
- def name_to_index(self):
170
+ def name_to_index(self) -> Dict[str, int]:
171
+ """Get a mapping of variable names to their indices."""
94
172
  return find(self.metadata, "name_to_index")
95
173
 
96
174
  @property
97
- def longitudes(self):
175
+ def longitudes(self) -> NDArray[Any]:
176
+ """Get the longitudes of the dataset."""
98
177
  try:
99
178
  return self.zarr.longitudes[:]
100
179
  except (KeyError, AttributeError):
101
180
  return self.zarr.longitude[:]
102
181
 
103
182
  @property
104
- def data(self):
183
+ def data(self) -> Any:
184
+ """Get the data of the dataset."""
105
185
  try:
106
186
  return self.zarr.data
107
187
  except AttributeError:
108
188
  return self.zarr
109
189
 
110
190
  @property
111
- def first_date(self):
191
+ def first_date(self) -> datetime.datetime:
192
+ """Get the first date of the dataset."""
112
193
  return datetime.datetime.fromisoformat(self.metadata["first_date"])
113
194
 
114
195
  @property
115
- def last_date(self):
196
+ def last_date(self) -> datetime.datetime:
197
+ """Get the last date of the dataset."""
116
198
  return datetime.datetime.fromisoformat(self.metadata["last_date"])
117
199
 
118
200
  @property
119
- def frequency(self):
201
+ def frequency(self) -> str:
202
+ """Get the frequency of the dataset."""
120
203
  return self.metadata["frequency"]
121
204
 
122
205
  @property
123
- def resolution(self):
206
+ def resolution(self) -> str:
207
+ """Get the resolution of the dataset."""
124
208
  return self.metadata["resolution"]
125
209
 
126
210
  @property
127
- def field_shape(self):
211
+ def field_shape(self) -> Optional[tuple]:
212
+ """Get the field shape of the dataset."""
128
213
  return self.metadata.get("field_shape")
129
214
 
130
215
  @property
131
- def proj_string(self):
216
+ def proj_string(self) -> Optional[str]:
217
+ """Get the projection string of the dataset."""
132
218
  return self.metadata.get("proj_string")
133
219
 
134
220
  @property
135
- def shape(self):
221
+ def shape(self) -> Optional[tuple]:
222
+ """Get the shape of the dataset."""
136
223
  if self.data and hasattr(self.data, "shape"):
137
224
  return self.data.shape
138
225
 
139
226
  @property
140
- def n_missing_dates(self):
227
+ def n_missing_dates(self) -> Optional[int]:
228
+ """Get the number of missing dates in the dataset."""
141
229
  if "missing_dates" in self.metadata:
142
230
  return len(self.metadata["missing_dates"])
143
231
  return None
144
232
 
145
233
  @property
146
- def uncompressed_data_size(self):
234
+ def uncompressed_data_size(self) -> Optional[int]:
235
+ """Get the uncompressed data size of the dataset."""
147
236
  if self.data and hasattr(self.data, "dtype") and hasattr(self.data, "size"):
148
237
  return self.data.dtype.itemsize * self.data.size
149
238
 
150
- def info(self, detailed, size):
239
+ def info(self, detailed: bool, size: bool) -> None:
240
+ """Print detailed information about the dataset.
241
+
242
+ Parameters
243
+ ----------
244
+ detailed : bool
245
+ Whether to print detailed information.
246
+ size : bool
247
+ Whether to print the size of the dataset.
248
+ """
151
249
  print()
152
250
  print(f'📅 Start : {self.first_date.strftime("%Y-%m-%d %H:%M")}')
153
251
  print(f'📅 End : {self.last_date.strftime("%Y-%m-%d %H:%M")}')
@@ -195,18 +293,28 @@ class Version:
195
293
  print()
196
294
 
197
295
  @property
198
- def variables(self):
296
+ def variables(self) -> List[str]:
297
+ """Get the list of variables in the dataset."""
199
298
  return [v[0] for v in sorted(self.name_to_index.items(), key=lambda x: x[1])]
200
299
 
201
300
  @property
202
- def total_size(self):
301
+ def total_size(self) -> Optional[int]:
302
+ """Get the total size of the dataset."""
203
303
  return self.zarr.attrs.get("total_size")
204
304
 
205
305
  @property
206
- def total_number_of_files(self):
306
+ def total_number_of_files(self) -> Optional[int]:
307
+ """Get the total number of files in the dataset."""
207
308
  return self.zarr.attrs.get("total_number_of_files")
208
309
 
209
- def print_sizes(self, size):
310
+ def print_sizes(self, size: bool) -> None:
311
+ """Print the size and number of files in the dataset.
312
+
313
+ Parameters
314
+ ----------
315
+ size : bool
316
+ Whether to compute and print the size.
317
+ """
210
318
  total_size = self.total_size
211
319
  n = self.total_number_of_files
212
320
 
@@ -222,7 +330,8 @@ class Version:
222
330
  print(f"📁 Files : {n:,}")
223
331
 
224
332
  @property
225
- def statistics(self):
333
+ def statistics(self) -> tuple[list, list, list, list]:
334
+ """Get the statistics of the dataset."""
226
335
  try:
227
336
  if self.dataset is not None:
228
337
  stats = self.dataset.statistics
@@ -231,31 +340,36 @@ class Version:
231
340
  return [["-"] * len(self.variables)] * 4
232
341
 
233
342
  @property
234
- def statistics_ready(self):
343
+ def statistics_ready(self) -> bool:
344
+ """Check if the statistics are ready."""
235
345
  for d in reversed(self.metadata.get("history", [])):
236
346
  if d["action"] == "compute_statistics_end":
237
347
  return True
238
348
  return False
239
349
 
240
350
  @property
241
- def statistics_started(self):
351
+ def statistics_started(self) -> Optional[datetime.datetime]:
352
+ """Get the timestamp when statistics computation started."""
242
353
  for d in reversed(self.metadata.get("history", [])):
243
354
  if d["action"] == "compute_statistics_start":
244
355
  return datetime.datetime.fromisoformat(d["timestamp"])
245
356
  return None
246
357
 
247
358
  @property
248
- def build_flags(self):
359
+ def build_flags(self) -> Optional[NDArray[Any]]:
360
+ """Get the build flags of the dataset."""
249
361
  return self.zarr.get("_build_flags")
250
362
 
251
363
  @cached_property
252
- def copy_flags(self):
364
+ def copy_flags(self) -> Optional[NDArray[Any]]:
365
+ """Get the copy flags of the dataset."""
253
366
  if "_copy" not in self.zarr:
254
367
  return None
255
368
  return self.zarr["_copy"][:]
256
369
 
257
370
  @property
258
- def copy_in_progress(self):
371
+ def copy_in_progress(self) -> bool:
372
+ """Check if a copy operation is in progress."""
259
373
  if "_copy" not in self.zarr:
260
374
  return False
261
375
 
@@ -267,10 +381,12 @@ class Version:
267
381
  return not all(self.copy_flags)
268
382
 
269
383
  @property
270
- def build_lengths(self):
384
+ def build_lengths(self) -> Optional[NDArray]:
385
+ """Get the build lengths of the dataset."""
271
386
  return self.zarr.get("_build_lengths")
272
387
 
273
- def progress(self):
388
+ def progress(self) -> None:
389
+ """Print the progress of dataset initialization or copying."""
274
390
  if self.copy_in_progress:
275
391
  copy_flags = self.copy_flags
276
392
  print("🪫 Dataset not ready, copy in progress.")
@@ -329,7 +445,8 @@ class Version:
329
445
  else:
330
446
  print("⏳ Statistics not ready.")
331
447
 
332
- def brute_force_statistics(self):
448
+ def brute_force_statistics(self) -> None:
449
+ """Compute and print statistics for the dataset."""
333
450
  if self.dataset is None:
334
451
  return
335
452
  print("📊 Computing statistics...")
@@ -376,13 +493,17 @@ class Version:
376
493
 
377
494
 
378
495
  class NoVersion(Version):
496
+ """Represents a dataset with no version."""
497
+
379
498
  @property
380
- def first_date(self):
499
+ def first_date(self) -> datetime.datetime:
500
+ """Get the first date of the dataset."""
381
501
  monthly = find(self.metadata, "monthly")
382
502
  return datetime.datetime.fromisoformat(monthly["start"])
383
503
 
384
504
  @property
385
- def last_date(self):
505
+ def last_date(self) -> datetime.datetime:
506
+ """Get the last date of the dataset."""
386
507
  monthly = find(self.metadata, "monthly")
387
508
  time = max([int(t) for t in find(self.metadata["earthkit-data"], "time")])
388
509
  assert isinstance(time, int), (time, type(time))
@@ -391,48 +512,67 @@ class NoVersion(Version):
391
512
  return datetime.datetime.fromisoformat(monthly["stop"]) + datetime.timedelta(hours=time)
392
513
 
393
514
  @property
394
- def frequency(self):
515
+ def frequency(self) -> int:
516
+ """Get the frequency of the dataset."""
395
517
  time = find(self.metadata["earthkit-data"], "time")
396
518
  return 24 // len(time)
397
519
 
398
520
  @property
399
- def statistics(self):
521
+ def statistics(self) -> tuple[list, list, list, list]:
522
+ """Get the statistics of the dataset."""
400
523
  stats = find(self.metadata, "statistics_by_index")
401
524
  return stats["minimum"], stats["maximum"], stats["mean"], stats["stdev"]
402
525
 
403
526
  @property
404
- def statistics_ready(self):
527
+ def statistics_ready(self) -> bool:
528
+ """Check if the statistics are ready."""
405
529
  return find(self.metadata, "statistics_by_index") is not None
406
530
 
407
531
  @property
408
- def resolution(self):
532
+ def resolution(self) -> str:
533
+ """Get the resolution of the dataset."""
409
534
  return find(self.metadata, "grid")
410
535
 
411
- def details(self):
536
+ def details(self) -> None:
537
+ """Print details of the dataset."""
412
538
  pass
413
539
 
414
- def progress(self):
540
+ def progress(self) -> None:
541
+ """Print the progress of dataset initialization or copying."""
415
542
  pass
416
543
 
417
- def ready(self):
544
+ def ready(self) -> bool:
545
+ """Check if the dataset is ready.
546
+
547
+ Returns
548
+ -------
549
+ bool
550
+ True if the dataset is ready, False otherwise.
551
+ """
418
552
  return True
419
553
 
420
554
 
421
555
  class Version0_4(Version):
422
- def details(self):
556
+ """Represents version 0.4 of a dataset."""
557
+
558
+ def details(self) -> None:
559
+ """Print details of the dataset."""
423
560
  pass
424
561
 
425
562
  @property
426
- def initialised(self):
563
+ def initialised(self) -> datetime.datetime:
564
+ """Get the initialization timestamp of the dataset."""
427
565
  return datetime.datetime.fromisoformat(self.metadata["creation_timestamp"])
428
566
 
429
- def statistics_ready(self):
567
+ def statistics_ready(self) -> bool:
568
+ """Check if the statistics are ready."""
430
569
  if not self.ready():
431
570
  return False
432
571
  build_flags = self.zarr["_build_flags"]
433
572
  return build_flags.attrs.get("_statistics_computed")
434
573
 
435
- def ready(self):
574
+ def ready(self) -> bool:
575
+ """Check if the dataset is ready."""
436
576
  if "_build_flags" not in self.zarr:
437
577
  return False
438
578
 
@@ -442,7 +582,20 @@ class Version0_4(Version):
442
582
 
443
583
  return all(build_flags)
444
584
 
445
- def _info(self, verbose, history, statistics, **kwargs):
585
+ def _info(self, verbose: bool, history: bool, statistics: bool, **kwargs: Any) -> None:
586
+ """Print information about the dataset.
587
+
588
+ Parameters
589
+ ----------
590
+ verbose : bool
591
+ Whether to print verbose information.
592
+ history : bool
593
+ Whether to print the history of the dataset.
594
+ statistics : bool
595
+ Whether to print statistics of the dataset.
596
+ **kwargs : Any
597
+ Additional keyword arguments.
598
+ """
446
599
  z = self.zarr
447
600
 
448
601
  # for backward compatibility
@@ -467,8 +620,11 @@ class Version0_4(Version):
467
620
 
468
621
 
469
622
  class Version0_6(Version):
623
+ """Represents version 0.6 of a dataset."""
624
+
470
625
  @property
471
- def initialised(self):
626
+ def initialised(self) -> Optional[datetime.datetime]:
627
+ """Get the initialization timestamp of the dataset."""
472
628
  for record in self.metadata.get("history", []):
473
629
  if record["action"] == "initialised":
474
630
  return datetime.datetime.fromisoformat(record["timestamp"])
@@ -480,7 +636,8 @@ class Version0_6(Version):
480
636
 
481
637
  return None
482
638
 
483
- def details(self):
639
+ def details(self) -> None:
640
+ """Print details of the dataset."""
484
641
  print()
485
642
  for d in self.metadata.get("history", []):
486
643
  d = deepcopy(d)
@@ -493,7 +650,8 @@ class Version0_6(Version):
493
650
  print(f" {timestamp} : {action} ({versions}) {more}")
494
651
  print()
495
652
 
496
- def ready(self):
653
+ def ready(self) -> bool:
654
+ """Check if the dataset is ready."""
497
655
  if "_build_flags" not in self.zarr:
498
656
  return False
499
657
 
@@ -501,20 +659,26 @@ class Version0_6(Version):
501
659
  return all(build_flags)
502
660
 
503
661
  @property
504
- def name_to_index(self):
662
+ def name_to_index(self) -> Dict[str, int]:
663
+ """Get a mapping of variable names to their indices."""
505
664
  return {n: i for i, n in enumerate(self.metadata["variables"])}
506
665
 
507
666
  @property
508
- def variables(self):
667
+ def variables(self) -> List[str]:
668
+ """Get the list of variables in the dataset."""
509
669
  return self.metadata["variables"]
510
670
 
511
671
  @property
512
- def variables_metadata(self):
672
+ def variables_metadata(self) -> dict:
673
+ """Get the metadata for the variables."""
513
674
  return self.metadata.get("variables_metadata", {})
514
675
 
515
676
 
516
677
  class Version0_12(Version0_6):
517
- def details(self):
678
+ """Represents version 0.12 of a dataset."""
679
+
680
+ def details(self) -> None:
681
+ """Print details of the dataset."""
518
682
  print()
519
683
  for d in self.metadata.get("history", []):
520
684
  d = deepcopy(d)
@@ -528,24 +692,30 @@ class Version0_12(Version0_6):
528
692
  print()
529
693
 
530
694
  @property
531
- def first_date(self):
695
+ def first_date(self) -> datetime.datetime:
696
+ """Get the first date of the dataset."""
532
697
  return datetime.datetime.fromisoformat(self.metadata["start_date"])
533
698
 
534
699
  @property
535
- def last_date(self):
700
+ def last_date(self) -> datetime.datetime:
701
+ """Get the last date of the dataset."""
536
702
  return datetime.datetime.fromisoformat(self.metadata["end_date"])
537
703
 
538
704
 
539
705
  class Version0_13(Version0_12):
706
+ """Represents version 0.13 of a dataset."""
707
+
540
708
  @property
541
- def build_flags(self):
709
+ def build_flags(self) -> Optional[NDArray]:
710
+ """Get the build flags for the dataset."""
542
711
  if "_build" not in self.zarr:
543
712
  return None
544
713
  build = self.zarr["_build"]
545
714
  return build.get("flags")
546
715
 
547
716
  @property
548
- def build_lengths(self):
717
+ def build_lengths(self) -> Optional[NDArray]:
718
+ """Get the build lengths for the dataset."""
549
719
  if "_build" not in self.zarr:
550
720
  return None
551
721
  build = self.zarr["_build"]
@@ -562,9 +732,16 @@ VERSIONS = {
562
732
 
563
733
 
564
734
  class InspectZarr(Command):
565
- """Inspect a zarr dataset."""
735
+ """Command to inspect a zarr dataset."""
736
+
737
+ def add_arguments(self, command_parser: Any) -> None:
738
+ """Add arguments to the command parser.
566
739
 
567
- def add_arguments(self, command_parser):
740
+ Parameters
741
+ ----------
742
+ command_parser : Any
743
+ The command parser.
744
+ """
568
745
  command_parser.add_argument("path", metavar="DATASET")
569
746
  command_parser.add_argument("--detailed", action="store_true")
570
747
 
@@ -572,10 +749,42 @@ class InspectZarr(Command):
572
749
  command_parser.add_argument("--statistics", action="store_true")
573
750
  command_parser.add_argument("--size", action="store_true", help="Print size")
574
751
 
575
- def run(self, args):
752
+ def run(self, args: Any) -> None:
753
+ """Run the command.
754
+
755
+ Parameters
756
+ ----------
757
+ args : Any
758
+ The command arguments.
759
+ """
576
760
  self.inspect_zarr(**vars(args))
577
761
 
578
- def inspect_zarr(self, path, progress=False, statistics=False, detailed=False, size=False, **kwargs):
762
+ def inspect_zarr(
763
+ self,
764
+ path: str,
765
+ progress: bool = False,
766
+ statistics: bool = False,
767
+ detailed: bool = False,
768
+ size: bool = False,
769
+ **kwargs: Any,
770
+ ) -> None:
771
+ """Inspect a zarr dataset.
772
+
773
+ Parameters
774
+ ----------
775
+ path : str
776
+ Path to the dataset.
777
+ progress : bool, optional
778
+ Whether to print progress, by default False.
779
+ statistics : bool, optional
780
+ Whether to compute and print statistics, by default False.
781
+ detailed : bool, optional
782
+ Whether to print detailed information, by default False.
783
+ size : bool, optional
784
+ Whether to print the size of the dataset, by default False.
785
+ **kwargs : Any
786
+ Additional keyword arguments.
787
+ """
579
788
  version = self._info(path)
580
789
 
581
790
  dotted_line()
@@ -596,7 +805,19 @@ class InspectZarr(Command):
596
805
  print(type(version))
597
806
  raise
598
807
 
599
- def _info(self, path):
808
+ def _info(self, path: str) -> Version:
809
+ """Get version information of the dataset.
810
+
811
+ Parameters
812
+ ----------
813
+ path : str
814
+ Path to the dataset.
815
+
816
+ Returns
817
+ -------
818
+ Version
819
+ The version object of the dataset.
820
+ """
600
821
  z = open_zarr(zarr_lookup(path))
601
822
 
602
823
  metadata = dict(z.attrs)