anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. anemoi/datasets/__init__.py +4 -1
  2. anemoi/datasets/__main__.py +12 -2
  3. anemoi/datasets/_version.py +9 -4
  4. anemoi/datasets/commands/cleanup.py +17 -2
  5. anemoi/datasets/commands/compare.py +18 -2
  6. anemoi/datasets/commands/copy.py +196 -14
  7. anemoi/datasets/commands/create.py +50 -7
  8. anemoi/datasets/commands/finalise-additions.py +17 -2
  9. anemoi/datasets/commands/finalise.py +17 -2
  10. anemoi/datasets/commands/init-additions.py +17 -2
  11. anemoi/datasets/commands/init.py +16 -2
  12. anemoi/datasets/commands/inspect.py +283 -62
  13. anemoi/datasets/commands/load-additions.py +16 -2
  14. anemoi/datasets/commands/load.py +16 -2
  15. anemoi/datasets/commands/patch.py +17 -2
  16. anemoi/datasets/commands/publish.py +17 -2
  17. anemoi/datasets/commands/scan.py +31 -3
  18. anemoi/datasets/compute/recentre.py +47 -11
  19. anemoi/datasets/create/__init__.py +612 -85
  20. anemoi/datasets/create/check.py +142 -20
  21. anemoi/datasets/create/chunks.py +64 -4
  22. anemoi/datasets/create/config.py +185 -21
  23. anemoi/datasets/create/filter.py +50 -0
  24. anemoi/datasets/create/filters/__init__.py +33 -0
  25. anemoi/datasets/create/filters/empty.py +37 -0
  26. anemoi/datasets/create/filters/legacy.py +93 -0
  27. anemoi/datasets/create/filters/noop.py +37 -0
  28. anemoi/datasets/create/filters/orog_to_z.py +58 -0
  29. anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
  30. anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
  31. anemoi/datasets/create/filters/rename.py +205 -0
  32. anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
  33. anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
  34. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
  35. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
  36. anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
  37. anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
  38. anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
  39. anemoi/datasets/create/filters/transform.py +53 -0
  40. anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
  41. anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
  42. anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
  43. anemoi/datasets/create/input/__init__.py +76 -5
  44. anemoi/datasets/create/input/action.py +149 -13
  45. anemoi/datasets/create/input/concat.py +81 -10
  46. anemoi/datasets/create/input/context.py +39 -4
  47. anemoi/datasets/create/input/data_sources.py +72 -6
  48. anemoi/datasets/create/input/empty.py +21 -3
  49. anemoi/datasets/create/input/filter.py +60 -12
  50. anemoi/datasets/create/input/function.py +154 -37
  51. anemoi/datasets/create/input/join.py +86 -14
  52. anemoi/datasets/create/input/misc.py +67 -17
  53. anemoi/datasets/create/input/pipe.py +33 -6
  54. anemoi/datasets/create/input/repeated_dates.py +189 -41
  55. anemoi/datasets/create/input/result.py +202 -87
  56. anemoi/datasets/create/input/step.py +119 -22
  57. anemoi/datasets/create/input/template.py +100 -13
  58. anemoi/datasets/create/input/trace.py +62 -7
  59. anemoi/datasets/create/patch.py +52 -4
  60. anemoi/datasets/create/persistent.py +134 -17
  61. anemoi/datasets/create/size.py +15 -1
  62. anemoi/datasets/create/source.py +51 -0
  63. anemoi/datasets/create/sources/__init__.py +36 -0
  64. anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
  65. anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
  66. anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
  67. anemoi/datasets/create/sources/empty.py +37 -0
  68. anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
  69. anemoi/datasets/create/sources/grib.py +297 -0
  70. anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
  71. anemoi/datasets/create/sources/legacy.py +93 -0
  72. anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
  73. anemoi/datasets/create/sources/netcdf.py +42 -0
  74. anemoi/datasets/create/sources/opendap.py +43 -0
  75. anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
  76. anemoi/datasets/create/sources/recentre.py +150 -0
  77. anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
  78. anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
  79. anemoi/datasets/create/sources/xarray.py +92 -0
  80. anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
  81. anemoi/datasets/create/sources/xarray_support/README.md +1 -0
  82. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
  83. anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
  84. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
  85. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
  86. anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
  87. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
  88. anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
  89. anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
  90. anemoi/datasets/create/sources/xarray_support/time.py +391 -0
  91. anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
  92. anemoi/datasets/create/sources/xarray_zarr.py +41 -0
  93. anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
  94. anemoi/datasets/create/statistics/__init__.py +233 -44
  95. anemoi/datasets/create/statistics/summary.py +52 -6
  96. anemoi/datasets/create/testing.py +76 -0
  97. anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
  98. anemoi/datasets/create/utils.py +97 -6
  99. anemoi/datasets/create/writer.py +26 -4
  100. anemoi/datasets/create/zarr.py +170 -23
  101. anemoi/datasets/data/__init__.py +51 -4
  102. anemoi/datasets/data/complement.py +191 -40
  103. anemoi/datasets/data/concat.py +141 -16
  104. anemoi/datasets/data/dataset.py +552 -61
  105. anemoi/datasets/data/debug.py +197 -26
  106. anemoi/datasets/data/ensemble.py +93 -8
  107. anemoi/datasets/data/fill_missing.py +165 -18
  108. anemoi/datasets/data/forwards.py +428 -56
  109. anemoi/datasets/data/grids.py +323 -97
  110. anemoi/datasets/data/indexing.py +112 -19
  111. anemoi/datasets/data/interpolate.py +92 -12
  112. anemoi/datasets/data/join.py +158 -19
  113. anemoi/datasets/data/masked.py +129 -15
  114. anemoi/datasets/data/merge.py +137 -23
  115. anemoi/datasets/data/misc.py +172 -16
  116. anemoi/datasets/data/missing.py +233 -29
  117. anemoi/datasets/data/rescale.py +111 -10
  118. anemoi/datasets/data/select.py +168 -26
  119. anemoi/datasets/data/statistics.py +67 -6
  120. anemoi/datasets/data/stores.py +149 -64
  121. anemoi/datasets/data/subset.py +159 -25
  122. anemoi/datasets/data/unchecked.py +168 -57
  123. anemoi/datasets/data/xy.py +168 -25
  124. anemoi/datasets/dates/__init__.py +191 -16
  125. anemoi/datasets/dates/groups.py +189 -47
  126. anemoi/datasets/grids.py +270 -31
  127. anemoi/datasets/testing.py +28 -1
  128. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/METADATA +9 -6
  129. anemoi_datasets-0.5.17.dist-info/RECORD +137 -0
  130. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/WHEEL +1 -1
  131. anemoi/datasets/create/functions/__init__.py +0 -66
  132. anemoi/datasets/create/functions/filters/__init__.py +0 -9
  133. anemoi/datasets/create/functions/filters/empty.py +0 -17
  134. anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
  135. anemoi/datasets/create/functions/filters/rename.py +0 -79
  136. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
  137. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
  138. anemoi/datasets/create/functions/sources/empty.py +0 -15
  139. anemoi/datasets/create/functions/sources/grib.py +0 -150
  140. anemoi/datasets/create/functions/sources/netcdf.py +0 -15
  141. anemoi/datasets/create/functions/sources/opendap.py +0 -15
  142. anemoi/datasets/create/functions/sources/recentre.py +0 -60
  143. anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
  144. anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
  145. anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
  146. anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
  147. anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
  148. anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
  149. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
  150. anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
  151. anemoi/datasets/utils/fields.py +0 -47
  152. anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
  153. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/entry_points.txt +0 -0
  154. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info/licenses}/LICENSE +0 -0
  155. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.17.dist-info}/top_level.txt +0 -0
@@ -8,18 +8,29 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
 
11
+ import datetime
11
12
  import logging
12
13
  import os
13
14
  import warnings
14
15
  from functools import cached_property
16
+ from typing import Any
17
+ from typing import Dict
18
+ from typing import List
19
+ from typing import Optional
20
+ from typing import Set
21
+ from typing import Union
15
22
  from urllib.parse import urlparse
16
23
 
17
24
  import numpy as np
18
25
  import zarr
19
26
  from anemoi.utils.dates import frequency_to_timedelta
27
+ from numpy.typing import NDArray
20
28
 
21
29
  from . import MissingDateError
22
30
  from .dataset import Dataset
31
+ from .dataset import FullIndex
32
+ from .dataset import Shape
33
+ from .dataset import TupleIndex
23
34
  from .debug import DEBUG_ZARR_LOADING
24
35
  from .debug import Node
25
36
  from .debug import Source
@@ -31,28 +42,34 @@ LOG = logging.getLogger(__name__)
31
42
 
32
43
 
33
44
  class ReadOnlyStore(zarr.storage.BaseStore):
34
- def __delitem__(self, key):
45
+ """A base class for read-only stores."""
46
+
47
+ def __delitem__(self, key: str) -> None:
48
+ """Prevent deletion of items."""
35
49
  raise NotImplementedError()
36
50
 
37
- def __setitem__(self, key, value):
51
+ def __setitem__(self, key: str, value: bytes) -> None:
52
+ """Prevent setting of items."""
38
53
  raise NotImplementedError()
39
54
 
40
- def __len__(self):
55
+ def __len__(self) -> int:
56
+ """Return the number of items in the store."""
41
57
  raise NotImplementedError()
42
58
 
43
- def __iter__(self):
59
+ def __iter__(self) -> iter:
60
+ """Return an iterator over the store."""
44
61
  raise NotImplementedError()
45
62
 
46
63
 
47
64
  class HTTPStore(ReadOnlyStore):
48
- """We write our own HTTPStore because the one used by zarr (s3fs)
49
- does not play well with fork() and multiprocessing.
50
- """
65
+ """A read-only store for HTTP(S) resources."""
51
66
 
52
- def __init__(self, url):
67
+ def __init__(self, url: str) -> None:
68
+ """Initialize the HTTPStore with a URL."""
53
69
  self.url = url
54
70
 
55
- def __getitem__(self, key):
71
+ def __getitem__(self, key: str) -> bytes:
72
+ """Retrieve an item from the store."""
56
73
  import requests
57
74
 
58
75
  r = requests.get(self.url + "/" + key)
@@ -65,18 +82,22 @@ class HTTPStore(ReadOnlyStore):
65
82
 
66
83
 
67
84
  class S3Store(ReadOnlyStore):
85
+ """A read-only store for S3 resources."""
86
+
68
87
  """We write our own S3Store because the one used by zarr (s3fs)
69
88
  does not play well with fork(). We also get to control the s3 client
70
89
  options using the anemoi configs.
71
90
  """
72
91
 
73
- def __init__(self, url, region=None):
92
+ def __init__(self, url: str, region: Optional[str] = None) -> None:
93
+ """Initialize the S3Store with a URL and optional region."""
74
94
  from anemoi.utils.remote.s3 import s3_client
75
95
 
76
96
  _, _, self.bucket, self.key = url.split("/", 3)
77
97
  self.s3 = s3_client(self.bucket, region=region)
78
98
 
79
- def __getitem__(self, key):
99
+ def __getitem__(self, key: str) -> bytes:
100
+ """Retrieve an item from the store."""
80
101
  try:
81
102
  response = self.s3.get_object(Bucket=self.bucket, Key=self.key + "/" + key)
82
103
  except self.s3.exceptions.NoSuchKey:
@@ -87,13 +108,19 @@ class S3Store(ReadOnlyStore):
87
108
 
88
109
  class PlanetaryComputerStore(ReadOnlyStore):
89
110
  """We write our own Store to access catalogs on Planetary Computer,
90
- as it requires some extra arguements to use xr.open_zarr.
111
+ as it requires some extra arguments to use xr.open_zarr.
91
112
  """
92
113
 
93
- def __init__(self, data_catalog_id):
114
+ def __init__(self, data_catalog_id: str) -> None:
115
+ """Initialize the PlanetaryComputerStore with a data catalog ID.
116
+
117
+ Parameters
118
+ ----------
119
+ data_catalog_id : str
120
+ The data catalog ID.
121
+ """
94
122
  self.data_catalog_id = data_catalog_id
95
123
 
96
- def __getitem__(self):
97
124
  import planetary_computer
98
125
  import pystac_client
99
126
 
@@ -117,34 +144,44 @@ class PlanetaryComputerStore(ReadOnlyStore):
117
144
  **asset.extra_fields["xarray:open_kwargs"],
118
145
  }
119
146
 
120
- return store
147
+ self.store = store
148
+
149
+ def __getitem__(self, key: str) -> bytes:
150
+ """Retrieve an item from the store."""
151
+ raise NotImplementedError()
121
152
 
122
153
 
123
154
  class DebugStore(ReadOnlyStore):
124
155
  """A store to debug the zarr loading."""
125
156
 
126
- def __init__(self, store):
157
+ def __init__(self, store: ReadOnlyStore) -> None:
158
+ """Initialize the DebugStore with another store."""
127
159
  assert not isinstance(store, DebugStore)
128
160
  self.store = store
129
161
 
130
- def __getitem__(self, key):
162
+ def __getitem__(self, key: str) -> bytes:
163
+ """Retrieve an item from the store and print debug information."""
131
164
  # print()
132
165
  print("GET", key, self)
133
166
  # traceback.print_stack(file=sys.stdout)
134
167
  return self.store[key]
135
168
 
136
- def __len__(self):
169
+ def __len__(self) -> int:
170
+ """Return the number of items in the store."""
137
171
  return len(self.store)
138
172
 
139
- def __iter__(self):
173
+ def __iter__(self) -> iter:
174
+ """Return an iterator over the store."""
140
175
  warnings.warn("DebugStore: iterating over the store")
141
176
  return iter(self.store)
142
177
 
143
- def __contains__(self, key):
178
+ def __contains__(self, key: str) -> bool:
179
+ """Check if the store contains a key."""
144
180
  return key in self.store
145
181
 
146
182
 
147
- def name_to_zarr_store(path_or_url):
183
+ def name_to_zarr_store(path_or_url: str) -> ReadOnlyStore:
184
+ """Convert a path or URL to a zarr store."""
148
185
  store = path_or_url
149
186
 
150
187
  if store.startswith("s3://"):
@@ -158,14 +195,15 @@ def name_to_zarr_store(path_or_url):
158
195
  store = S3Store(s3_url, region=bits[2])
159
196
  elif store.startswith("https://planetarycomputer.microsoft.com/"):
160
197
  data_catalog_id = store.rsplit("/", 1)[-1]
161
- store = PlanetaryComputerStore(data_catalog_id).__getitem__()
198
+ store = PlanetaryComputerStore(data_catalog_id).store
162
199
  else:
163
200
  store = HTTPStore(store)
164
201
 
165
202
  return store
166
203
 
167
204
 
168
- def open_zarr(path, dont_fail=False, cache=None):
205
+ def open_zarr(path: str, dont_fail: bool = False, cache: int = None) -> zarr.hierarchy.Group:
206
+ """Open a zarr store from a path."""
169
207
  try:
170
208
  store = name_to_zarr_store(path)
171
209
 
@@ -193,7 +231,8 @@ def open_zarr(path, dont_fail=False, cache=None):
193
231
  class Zarr(Dataset):
194
232
  """A zarr dataset."""
195
233
 
196
- def __init__(self, path):
234
+ def __init__(self, path: Union[str, zarr.hierarchy.Group]) -> None:
235
+ """Initialize the Zarr dataset with a path or zarr group."""
197
236
  if isinstance(path, zarr.hierarchy.Group):
198
237
  self.was_zarr = True
199
238
  self.path = str(id(path))
@@ -205,23 +244,32 @@ class Zarr(Dataset):
205
244
 
206
245
  # This seems to speed up the reading of the data a lot
207
246
  self.data = self.z.data
208
- self.missing = set()
247
+ self._missing = set()
248
+
249
+ @property
250
+ def missing(self) -> Set[int]:
251
+ """Return the missing dates of the dataset."""
252
+ return self._missing
209
253
 
210
254
  @classmethod
211
- def from_name(cls, name):
255
+ def from_name(cls, name: str) -> "Zarr":
256
+ """Create a Zarr dataset from a name."""
212
257
  if name.endswith(".zip") or name.endswith(".zarr"):
213
258
  return Zarr(name)
214
259
  return Zarr(zarr_lookup(name))
215
260
 
216
- def __len__(self):
261
+ def __len__(self) -> int:
262
+ """Return the length of the dataset."""
217
263
  return self.data.shape[0]
218
264
 
219
265
  @debug_indexing
220
266
  @expand_list_indexing
221
- def __getitem__(self, n):
267
+ def __getitem__(self, n: FullIndex) -> NDArray[Any]:
268
+ """Retrieve an item from the dataset."""
222
269
  return self.data[n]
223
270
 
224
- def _unwind(self, index, rest, shape, axis, axes):
271
+ def _unwind(self, index: Union[int, slice, list, tuple], rest: list, shape: tuple, axis: int, axes: list) -> iter:
272
+ """Unwind the index for multi-dimensional indexing."""
225
273
  if not isinstance(index, (int, slice, list, tuple)):
226
274
  try:
227
275
  # NumPy arrays, TensorFlow tensors, etc.
@@ -244,23 +292,28 @@ class Zarr(Dataset):
244
292
  yield (index,) + n
245
293
 
246
294
  @cached_property
247
- def chunks(self):
295
+ def chunks(self) -> TupleIndex:
296
+ """Return the chunks of the dataset."""
248
297
  return self.z.data.chunks
249
298
 
250
299
  @cached_property
251
- def shape(self):
300
+ def shape(self) -> Shape:
301
+ """Return the shape of the dataset."""
252
302
  return self.data.shape
253
303
 
254
304
  @cached_property
255
- def dtype(self):
305
+ def dtype(self) -> np.dtype:
306
+ """Return the data type of the dataset."""
256
307
  return self.z.data.dtype
257
308
 
258
309
  @cached_property
259
- def dates(self):
310
+ def dates(self) -> NDArray[np.datetime64]:
311
+ """Return the dates of the dataset."""
260
312
  return self.z.dates[:] # Convert to numpy
261
313
 
262
314
  @property
263
- def latitudes(self):
315
+ def latitudes(self) -> NDArray[Any]:
316
+ """Return the latitudes of the dataset."""
264
317
  try:
265
318
  return self.z.latitudes[:]
266
319
  except AttributeError:
@@ -268,7 +321,8 @@ class Zarr(Dataset):
268
321
  return self.z.latitude[:]
269
322
 
270
323
  @property
271
- def longitudes(self):
324
+ def longitudes(self) -> NDArray[Any]:
325
+ """Return the longitudes of the dataset."""
272
326
  try:
273
327
  return self.z.longitudes[:]
274
328
  except AttributeError:
@@ -276,7 +330,8 @@ class Zarr(Dataset):
276
330
  return self.z.longitude[:]
277
331
 
278
332
  @property
279
- def statistics(self):
333
+ def statistics(self) -> Dict[str, NDArray[Any]]:
334
+ """Return the statistics of the dataset."""
280
335
  return dict(
281
336
  mean=self.z.mean[:],
282
337
  stdev=self.z.stdev[:],
@@ -284,7 +339,8 @@ class Zarr(Dataset):
284
339
  minimum=self.z.minimum[:],
285
340
  )
286
341
 
287
- def statistics_tendencies(self, delta=None):
342
+ def statistics_tendencies(self, delta: Optional[datetime.timedelta] = None) -> Dict[str, NDArray[Any]]:
343
+ """Return the statistical tendencies of the dataset."""
288
344
  if delta is None:
289
345
  delta = self.frequency
290
346
  if isinstance(delta, int):
@@ -295,7 +351,7 @@ class Zarr(Dataset):
295
351
  delta = frequency_to_timedelta(delta)
296
352
  delta = frequency_to_string(delta)
297
353
 
298
- def func(k):
354
+ def func(k: str) -> str:
299
355
  return f"statistics_tendencies_{delta}_{k}"
300
356
 
301
357
  return dict(
@@ -306,11 +362,13 @@ class Zarr(Dataset):
306
362
  )
307
363
 
308
364
  @property
309
- def resolution(self):
365
+ def resolution(self) -> str:
366
+ """Return the resolution of the dataset."""
310
367
  return self.z.attrs["resolution"]
311
368
 
312
369
  @property
313
- def field_shape(self):
370
+ def field_shape(self) -> tuple:
371
+ """Return the field shape of the dataset."""
314
372
  try:
315
373
  return tuple(self.z.attrs["field_shape"])
316
374
  except KeyError:
@@ -318,7 +376,8 @@ class Zarr(Dataset):
318
376
  return (self.shape[-1],)
319
377
 
320
378
  @property
321
- def frequency(self):
379
+ def frequency(self) -> datetime.timedelta:
380
+ """Return the frequency of the dataset."""
322
381
  try:
323
382
  return frequency_to_timedelta(self.z.attrs["frequency"])
324
383
  except KeyError:
@@ -327,13 +386,15 @@ class Zarr(Dataset):
327
386
  return dates[1].astype(object) - dates[0].astype(object)
328
387
 
329
388
  @property
330
- def name_to_index(self):
389
+ def name_to_index(self) -> Dict[str, int]:
390
+ """Return the name to index mapping of the dataset."""
331
391
  if "variables" in self.z.attrs:
332
392
  return {n: i for i, n in enumerate(self.z.attrs["variables"])}
333
393
  return self.z.attrs["name_to_index"]
334
394
 
335
395
  @property
336
- def variables(self):
396
+ def variables(self) -> List[str]:
397
+ """Return the variables of the dataset."""
337
398
  return [
338
399
  k
339
400
  for k, v in sorted(
@@ -343,23 +404,28 @@ class Zarr(Dataset):
343
404
  ]
344
405
 
345
406
  @cached_property
346
- def constant_fields(self):
407
+ def constant_fields(self) -> List[str]:
408
+ """Return the constant fields of the dataset."""
347
409
  result = self.z.attrs.get("constant_fields")
348
410
  if result is None:
349
411
  LOG.warning("No 'constant_fields' attribute in %r, computing them", self)
350
412
  return self.computed_constant_fields()
351
413
 
352
414
  @property
353
- def variables_metadata(self):
415
+ def variables_metadata(self) -> Dict[str, Any]:
416
+ """Return the metadata of the variables."""
354
417
  return self.z.attrs.get("variables_metadata", {})
355
418
 
356
- def __repr__(self):
419
+ def __repr__(self) -> str:
420
+ """Return the string representation of the dataset."""
357
421
  return self.path
358
422
 
359
- def end_of_statistics_date(self):
423
+ def end_of_statistics_date(self) -> np.datetime64:
424
+ """Return the end date of the statistics."""
360
425
  return self.dates[-1]
361
426
 
362
- def metadata_specific(self):
427
+ def metadata_specific(self, **kwargs: Any) -> Dict[str, Any]:
428
+ """Return the specific metadata of the dataset."""
363
429
  return super().metadata_specific(
364
430
  attrs=dict(self.z.attrs),
365
431
  chunks=self.chunks,
@@ -367,46 +433,60 @@ class Zarr(Dataset):
367
433
  path=self.path,
368
434
  )
369
435
 
370
- def source(self, index):
436
+ def source(self, index: int) -> Source:
437
+ """Return the source of the dataset."""
371
438
  return Source(self, index, info=self.path)
372
439
 
373
- def mutate(self):
440
+ def mutate(self) -> Dataset:
441
+ """Mutate the dataset if it has missing dates."""
374
442
  if len(self.z.attrs.get("missing_dates", [])):
375
443
  LOG.warning(f"Dataset {self} has missing dates")
376
444
  return ZarrWithMissingDates(self.z if self.was_zarr else self.path)
377
445
  return self
378
446
 
379
- def tree(self):
447
+ def tree(self) -> Node:
448
+ """Return the tree representation of the dataset."""
380
449
  return Node(self, [], path=self.path)
381
450
 
382
- def get_dataset_names(self, names):
451
+ def get_dataset_names(self, names: Set[str]) -> None:
452
+ """Get the names of the datasets."""
383
453
  name, _ = os.path.splitext(os.path.basename(self.path))
384
454
  names.add(name)
385
455
 
386
- def collect_supporting_arrays(self, collected, *path):
456
+ def collect_supporting_arrays(self, collected: set, *path: str) -> None:
457
+ """Collect supporting arrays."""
387
458
  pass
388
459
 
389
- def collect_input_sources(self, collected):
460
+ def collect_input_sources(self, collected: set) -> None:
461
+ """Collect input sources."""
390
462
  pass
391
463
 
392
464
 
393
465
  class ZarrWithMissingDates(Zarr):
394
466
  """A zarr dataset with missing dates."""
395
467
 
396
- def __init__(self, path):
468
+ def __init__(self, path: Union[str, zarr.hierarchy.Group]) -> None:
469
+ """Initialize the ZarrWithMissingDates dataset with a path or zarr group."""
397
470
  super().__init__(path)
398
471
 
399
472
  missing_dates = self.z.attrs.get("missing_dates", [])
400
473
  missing_dates = set([np.datetime64(x, "s") for x in missing_dates])
401
474
  self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
402
- self.missing = set(self.missing_to_dates)
475
+ self._missing = set(self.missing_to_dates)
476
+
477
+ @property
478
+ def missing(self) -> Set[int]:
479
+ """Return the missing dates of the dataset."""
480
+ return self._missing
403
481
 
404
- def mutate(self):
482
+ def mutate(self) -> Dataset:
483
+ """Mutate the dataset."""
405
484
  return self
406
485
 
407
486
  @debug_indexing
408
487
  @expand_list_indexing
409
- def __getitem__(self, n):
488
+ def __getitem__(self, n: FullIndex) -> NDArray[Any]:
489
+ """Retrieve an item from the dataset."""
410
490
  if isinstance(n, int):
411
491
  if n in self.missing:
412
492
  self._report_missing(n)
@@ -437,24 +517,29 @@ class ZarrWithMissingDates(Zarr):
437
517
  self._report_missing(list(common)[0])
438
518
  return self.data[n]
439
519
 
520
+ raise TypeError(f"Unsupported index {n} {type(n)}, {first} {type(first)}")
521
+
440
522
  raise TypeError(f"Unsupported index {n} {type(n)}")
441
523
 
442
- def _report_missing(self, n):
524
+ def _report_missing(self, n: int) -> None:
525
+ """Report a missing date."""
443
526
  raise MissingDateError(f"Date {self.missing_to_dates[n]} is missing (index={n})")
444
527
 
445
- def tree(self):
528
+ def tree(self) -> Node:
529
+ """Return the tree representation of the dataset."""
446
530
  return Node(self, [], path=self.path, missing=sorted(self.missing))
447
531
 
448
532
  @property
449
- def label(self):
533
+ def label(self) -> str:
534
+ """Return the label of the dataset."""
450
535
  return "zarr*"
451
536
 
452
537
 
453
538
  QUIET = set()
454
539
 
455
540
 
456
- def zarr_lookup(name, fail=True):
457
-
541
+ def zarr_lookup(name: str, fail: bool = True) -> Optional[str]:
542
+ """Look up a zarr dataset by name."""
458
543
  if name.endswith(".zarr") or name.endswith(".zip"):
459
544
  return name
460
545
 
@@ -464,7 +549,7 @@ def zarr_lookup(name, fail=True):
464
549
  if name not in QUIET:
465
550
  LOG.info("Opening `%s` as `%s`", name, config["named"][name])
466
551
  QUIET.add(name)
467
- return config["named"][name]
552
+ return str(config["named"][name])
468
553
 
469
554
  tried = []
470
555
  for location in config["path"]: