anemoi-datasets 0.5.16__py3-none-any.whl → 0.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. anemoi/datasets/__init__.py +4 -1
  2. anemoi/datasets/__main__.py +12 -2
  3. anemoi/datasets/_version.py +9 -4
  4. anemoi/datasets/commands/cleanup.py +17 -2
  5. anemoi/datasets/commands/compare.py +18 -2
  6. anemoi/datasets/commands/copy.py +196 -14
  7. anemoi/datasets/commands/create.py +50 -7
  8. anemoi/datasets/commands/finalise-additions.py +17 -2
  9. anemoi/datasets/commands/finalise.py +17 -2
  10. anemoi/datasets/commands/init-additions.py +17 -2
  11. anemoi/datasets/commands/init.py +16 -2
  12. anemoi/datasets/commands/inspect.py +283 -62
  13. anemoi/datasets/commands/load-additions.py +16 -2
  14. anemoi/datasets/commands/load.py +16 -2
  15. anemoi/datasets/commands/patch.py +17 -2
  16. anemoi/datasets/commands/publish.py +17 -2
  17. anemoi/datasets/commands/scan.py +31 -3
  18. anemoi/datasets/compute/recentre.py +47 -11
  19. anemoi/datasets/create/__init__.py +612 -85
  20. anemoi/datasets/create/check.py +142 -20
  21. anemoi/datasets/create/chunks.py +64 -4
  22. anemoi/datasets/create/config.py +185 -21
  23. anemoi/datasets/create/filter.py +50 -0
  24. anemoi/datasets/create/filters/__init__.py +33 -0
  25. anemoi/datasets/create/filters/empty.py +37 -0
  26. anemoi/datasets/create/filters/legacy.py +93 -0
  27. anemoi/datasets/create/filters/noop.py +37 -0
  28. anemoi/datasets/create/filters/orog_to_z.py +58 -0
  29. anemoi/datasets/create/{functions/filters → filters}/pressure_level_relative_humidity_to_specific_humidity.py +33 -10
  30. anemoi/datasets/create/{functions/filters → filters}/pressure_level_specific_humidity_to_relative_humidity.py +32 -8
  31. anemoi/datasets/create/filters/rename.py +205 -0
  32. anemoi/datasets/create/{functions/filters → filters}/rotate_winds.py +43 -28
  33. anemoi/datasets/create/{functions/filters → filters}/single_level_dewpoint_to_relative_humidity.py +32 -9
  34. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_dewpoint.py +33 -9
  35. anemoi/datasets/create/{functions/filters → filters}/single_level_relative_humidity_to_specific_humidity.py +55 -7
  36. anemoi/datasets/create/{functions/filters → filters}/single_level_specific_humidity_to_relative_humidity.py +98 -37
  37. anemoi/datasets/create/filters/speeddir_to_uv.py +95 -0
  38. anemoi/datasets/create/{functions/filters → filters}/sum.py +24 -27
  39. anemoi/datasets/create/filters/transform.py +53 -0
  40. anemoi/datasets/create/{functions/filters → filters}/unrotate_winds.py +27 -18
  41. anemoi/datasets/create/filters/uv_to_speeddir.py +94 -0
  42. anemoi/datasets/create/{functions/filters → filters}/wz_to_w.py +51 -33
  43. anemoi/datasets/create/input/__init__.py +76 -5
  44. anemoi/datasets/create/input/action.py +149 -13
  45. anemoi/datasets/create/input/concat.py +81 -10
  46. anemoi/datasets/create/input/context.py +39 -4
  47. anemoi/datasets/create/input/data_sources.py +72 -6
  48. anemoi/datasets/create/input/empty.py +21 -3
  49. anemoi/datasets/create/input/filter.py +60 -12
  50. anemoi/datasets/create/input/function.py +154 -37
  51. anemoi/datasets/create/input/join.py +86 -14
  52. anemoi/datasets/create/input/misc.py +67 -17
  53. anemoi/datasets/create/input/pipe.py +33 -6
  54. anemoi/datasets/create/input/repeated_dates.py +189 -41
  55. anemoi/datasets/create/input/result.py +202 -87
  56. anemoi/datasets/create/input/step.py +119 -22
  57. anemoi/datasets/create/input/template.py +100 -13
  58. anemoi/datasets/create/input/trace.py +62 -7
  59. anemoi/datasets/create/patch.py +52 -4
  60. anemoi/datasets/create/persistent.py +134 -17
  61. anemoi/datasets/create/size.py +15 -1
  62. anemoi/datasets/create/source.py +51 -0
  63. anemoi/datasets/create/sources/__init__.py +36 -0
  64. anemoi/datasets/create/{functions/sources → sources}/accumulations.py +296 -30
  65. anemoi/datasets/create/{functions/sources → sources}/constants.py +27 -2
  66. anemoi/datasets/create/{functions/sources → sources}/eccc_fstd.py +7 -3
  67. anemoi/datasets/create/sources/empty.py +37 -0
  68. anemoi/datasets/create/{functions/sources → sources}/forcings.py +25 -1
  69. anemoi/datasets/create/sources/grib.py +297 -0
  70. anemoi/datasets/create/{functions/sources → sources}/hindcasts.py +38 -4
  71. anemoi/datasets/create/sources/legacy.py +93 -0
  72. anemoi/datasets/create/{functions/sources → sources}/mars.py +168 -20
  73. anemoi/datasets/create/sources/netcdf.py +42 -0
  74. anemoi/datasets/create/sources/opendap.py +43 -0
  75. anemoi/datasets/create/{functions/sources/__init__.py → sources/patterns.py} +35 -4
  76. anemoi/datasets/create/sources/recentre.py +150 -0
  77. anemoi/datasets/create/{functions/sources → sources}/source.py +27 -5
  78. anemoi/datasets/create/{functions/sources → sources}/tendencies.py +64 -7
  79. anemoi/datasets/create/sources/xarray.py +92 -0
  80. anemoi/datasets/create/sources/xarray_kerchunk.py +36 -0
  81. anemoi/datasets/create/sources/xarray_support/README.md +1 -0
  82. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/__init__.py +109 -8
  83. anemoi/datasets/create/sources/xarray_support/coordinates.py +442 -0
  84. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/field.py +94 -16
  85. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/fieldlist.py +90 -25
  86. anemoi/datasets/create/sources/xarray_support/flavour.py +1036 -0
  87. anemoi/datasets/create/{functions/sources/xarray → sources/xarray_support}/grid.py +92 -31
  88. anemoi/datasets/create/sources/xarray_support/metadata.py +395 -0
  89. anemoi/datasets/create/sources/xarray_support/patch.py +91 -0
  90. anemoi/datasets/create/sources/xarray_support/time.py +391 -0
  91. anemoi/datasets/create/sources/xarray_support/variable.py +331 -0
  92. anemoi/datasets/create/sources/xarray_zarr.py +41 -0
  93. anemoi/datasets/create/{functions/sources → sources}/zenodo.py +34 -5
  94. anemoi/datasets/create/statistics/__init__.py +233 -44
  95. anemoi/datasets/create/statistics/summary.py +52 -6
  96. anemoi/datasets/create/testing.py +76 -0
  97. anemoi/datasets/create/{functions/filters/noop.py → typing.py} +6 -3
  98. anemoi/datasets/create/utils.py +97 -6
  99. anemoi/datasets/create/writer.py +26 -4
  100. anemoi/datasets/create/zarr.py +170 -23
  101. anemoi/datasets/data/__init__.py +51 -4
  102. anemoi/datasets/data/complement.py +191 -40
  103. anemoi/datasets/data/concat.py +141 -16
  104. anemoi/datasets/data/dataset.py +558 -62
  105. anemoi/datasets/data/debug.py +197 -26
  106. anemoi/datasets/data/ensemble.py +93 -8
  107. anemoi/datasets/data/fill_missing.py +165 -18
  108. anemoi/datasets/data/forwards.py +428 -56
  109. anemoi/datasets/data/grids.py +323 -97
  110. anemoi/datasets/data/indexing.py +112 -19
  111. anemoi/datasets/data/interpolate.py +92 -12
  112. anemoi/datasets/data/join.py +158 -19
  113. anemoi/datasets/data/masked.py +129 -15
  114. anemoi/datasets/data/merge.py +137 -23
  115. anemoi/datasets/data/misc.py +172 -16
  116. anemoi/datasets/data/missing.py +233 -29
  117. anemoi/datasets/data/rescale.py +111 -10
  118. anemoi/datasets/data/select.py +168 -26
  119. anemoi/datasets/data/statistics.py +67 -6
  120. anemoi/datasets/data/stores.py +149 -64
  121. anemoi/datasets/data/subset.py +159 -25
  122. anemoi/datasets/data/unchecked.py +168 -57
  123. anemoi/datasets/data/xy.py +168 -25
  124. anemoi/datasets/dates/__init__.py +191 -16
  125. anemoi/datasets/dates/groups.py +189 -47
  126. anemoi/datasets/grids.py +270 -31
  127. anemoi/datasets/testing.py +28 -1
  128. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/METADATA +9 -6
  129. anemoi_datasets-0.5.18.dist-info/RECORD +137 -0
  130. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/WHEEL +1 -1
  131. anemoi/datasets/create/functions/__init__.py +0 -66
  132. anemoi/datasets/create/functions/filters/__init__.py +0 -9
  133. anemoi/datasets/create/functions/filters/empty.py +0 -17
  134. anemoi/datasets/create/functions/filters/orog_to_z.py +0 -58
  135. anemoi/datasets/create/functions/filters/rename.py +0 -79
  136. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +0 -78
  137. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +0 -56
  138. anemoi/datasets/create/functions/sources/empty.py +0 -15
  139. anemoi/datasets/create/functions/sources/grib.py +0 -150
  140. anemoi/datasets/create/functions/sources/netcdf.py +0 -15
  141. anemoi/datasets/create/functions/sources/opendap.py +0 -15
  142. anemoi/datasets/create/functions/sources/recentre.py +0 -60
  143. anemoi/datasets/create/functions/sources/xarray/coordinates.py +0 -255
  144. anemoi/datasets/create/functions/sources/xarray/flavour.py +0 -472
  145. anemoi/datasets/create/functions/sources/xarray/metadata.py +0 -148
  146. anemoi/datasets/create/functions/sources/xarray/patch.py +0 -44
  147. anemoi/datasets/create/functions/sources/xarray/time.py +0 -177
  148. anemoi/datasets/create/functions/sources/xarray/variable.py +0 -188
  149. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +0 -42
  150. anemoi/datasets/create/functions/sources/xarray_zarr.py +0 -15
  151. anemoi/datasets/utils/fields.py +0 -47
  152. anemoi_datasets-0.5.16.dist-info/RECORD +0 -129
  153. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/entry_points.txt +0 -0
  154. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info/licenses}/LICENSE +0 -0
  155. {anemoi_datasets-0.5.16.dist-info → anemoi_datasets-0.5.18.dist-info}/top_level.txt +0 -0
@@ -12,12 +12,28 @@ import datetime
12
12
  import os
13
13
  import warnings
14
14
  from contextlib import contextmanager
15
+ from typing import Any
16
+ from typing import Union
15
17
 
16
18
  import numpy as np
17
19
  from earthkit.data import settings
20
+ from numpy.typing import NDArray
18
21
 
19
22
 
20
- def cache_context(dirname):
23
+ def cache_context(dirname: str) -> contextmanager:
24
+ """Context manager for setting a temporary cache directory.
25
+
26
+ Parameters
27
+ ----------
28
+ dirname : str
29
+ The directory name for the cache.
30
+
31
+ Returns
32
+ -------
33
+ contextmanager
34
+ A context manager that sets the cache directory.
35
+ """
36
+
21
37
  @contextmanager
22
38
  def no_cache_context():
23
39
  yield
@@ -30,7 +46,21 @@ def cache_context(dirname):
30
46
  return settings.temporary({"cache-policy": "user", "user-cache-directory": dirname})
31
47
 
32
48
 
33
- def to_datetime_list(*args, **kwargs):
49
+ def to_datetime_list(*args: Any, **kwargs: Any) -> list[datetime.datetime]:
50
+ """Convert various date formats to a list of datetime objects.
51
+
52
+ Parameters
53
+ ----------
54
+ *args : Any
55
+ Positional arguments for date conversion.
56
+ **kwargs : Any
57
+ Keyword arguments for date conversion.
58
+
59
+ Returns
60
+ -------
61
+ list[datetime.datetime]
62
+ A list of datetime objects.
63
+ """
34
64
  from earthkit.data.utils.dates import to_datetime_list as to_datetime_list_
35
65
 
36
66
  warnings.warn(
@@ -41,7 +71,21 @@ def to_datetime_list(*args, **kwargs):
41
71
  return to_datetime_list_(*args, **kwargs)
42
72
 
43
73
 
44
- def to_datetime(*args, **kwargs):
74
+ def to_datetime(*args: Any, **kwargs: Any) -> datetime.datetime:
75
+ """Convert various date formats to a single datetime object.
76
+
77
+ Parameters
78
+ ----------
79
+ *args : Any
80
+ Positional arguments for date conversion.
81
+ **kwargs : Any
82
+ Keyword arguments for date conversion.
83
+
84
+ Returns
85
+ -------
86
+ datetime.datetime
87
+ A datetime object.
88
+ """
45
89
  from earthkit.data.utils.dates import to_datetime as to_datetime_
46
90
 
47
91
  warnings.warn(
@@ -53,7 +97,24 @@ def to_datetime(*args, **kwargs):
53
97
  return to_datetime_(*args, **kwargs)
54
98
 
55
99
 
56
- def make_list_int(value):
100
+ def make_list_int(value: Union[str, list, tuple, int]) -> list[int]:
101
+ """Convert a string, list, tuple, or integer to a list of integers.
102
+
103
+ Parameters
104
+ ----------
105
+ value : str or list or tuple or int
106
+ The value to convert.
107
+
108
+ Returns
109
+ -------
110
+ list[int]
111
+ A list of integers.
112
+
113
+ Raises
114
+ ------
115
+ ValueError
116
+ If the value cannot be converted to a list of integers.
117
+ """
57
118
  # Convert a string like "1/2/3" or "1/to/3" or "1/to/10/by/2" to a list of integers.
58
119
  # Moved to anemoi.utils.humanize
59
120
  # replace with from anemoi.utils.humanize import make_list_int
@@ -78,8 +139,38 @@ def make_list_int(value):
78
139
  raise ValueError(f"Cannot make list from {value}")
79
140
 
80
141
 
81
- def normalize_and_check_dates(dates, start, end, frequency, dtype="datetime64[s]"):
82
-
142
+ def normalize_and_check_dates(
143
+ dates: list[datetime.datetime],
144
+ start: datetime.datetime,
145
+ end: datetime.datetime,
146
+ frequency: datetime.timedelta,
147
+ dtype: str = "datetime64[s]",
148
+ ) -> NDArray[Any]:
149
+ """Normalize and check a list of dates against a specified frequency.
150
+
151
+ Parameters
152
+ ----------
153
+ dates : list[datetime.datetime]
154
+ The list of dates to check.
155
+ start : datetime.datetime
156
+ The start date.
157
+ end : datetime.datetime
158
+ The end date.
159
+ frequency : datetime.timedelta
160
+ The frequency of the dates.
161
+ dtype : str, optional
162
+ The data type of the dates, by default "datetime64[s]".
163
+
164
+ Returns
165
+ -------
166
+ NDArray[Any]
167
+ An array of normalized dates.
168
+
169
+ Raises
170
+ ------
171
+ ValueError
172
+ If the final date size does not match the data shape.
173
+ """
83
174
  dates = [d.hdate if hasattr(d, "hdate") else d for d in dates]
84
175
 
85
176
  assert isinstance(frequency, datetime.timedelta), frequency
@@ -9,8 +9,10 @@
9
9
 
10
10
 
11
11
  import logging
12
+ from typing import Any
12
13
 
13
14
  import numpy as np
15
+ from numpy.typing import NDArray
14
16
 
15
17
  LOG = logging.getLogger(__name__)
16
18
 
@@ -23,20 +25,40 @@ class ViewCacheArray:
23
25
  temporarily store the data before flushing it to the array.
24
26
 
25
27
  The `flush` method copies the contents of the cache to the final array.
26
-
27
28
  """
28
29
 
29
- def __init__(self, array, *, shape, indexes):
30
+ def __init__(self, array: NDArray[Any], *, shape: tuple[int, ...], indexes: list[int]):
31
+ """Initialize the ViewCacheArray.
32
+
33
+ Parameters
34
+ ----------
35
+ array : NDArray[Any]
36
+ The NumPy-like array to store the final data.
37
+ shape : tuple[int, ...]
38
+ The shape of the cache array.
39
+ indexes : list[int]
40
+ List to reindex the first dimension.
41
+ """
30
42
  assert len(indexes) == shape[0], (len(indexes), shape[0])
31
43
  self.array = array
32
44
  self.dtype = array.dtype
33
45
  self.cache = np.full(shape, np.nan, dtype=self.dtype)
34
46
  self.indexes = indexes
35
47
 
36
- def __setitem__(self, key, value):
48
+ def __setitem__(self, key: tuple[int, ...], value: NDArray[Any]) -> None:
49
+ """Set the value in the cache array at the specified key.
50
+
51
+ Parameters
52
+ ----------
53
+ key : tuple[int, ...]
54
+ The index key to set the value.
55
+ value : NDArray[Any]
56
+ The value to set in the cache array.
57
+ """
37
58
  self.cache[key] = value
38
59
 
39
- def flush(self):
60
+ def flush(self) -> None:
61
+ """Copy the contents of the cache to the final array."""
40
62
  for i in range(self.cache.shape[0]):
41
63
  global_i = self.indexes[i]
42
64
  self.array[global_i] = self.cache[i]
@@ -10,24 +10,56 @@
10
10
  import datetime
11
11
  import logging
12
12
  import shutil
13
+ from typing import Any
14
+ from typing import Optional
13
15
 
14
16
  import numpy as np
17
+ import zarr
18
+ from numpy.typing import NDArray
15
19
 
16
20
  LOG = logging.getLogger(__name__)
17
21
 
18
22
 
19
23
  def add_zarr_dataset(
20
24
  *,
21
- name,
22
- dtype=None,
23
- fill_value=None,
24
- zarr_root,
25
- shape=None,
26
- array=None,
27
- overwrite=True,
28
- dimensions=None,
25
+ name: str,
26
+ dtype: np.dtype = None,
27
+ fill_value: np.generic = None,
28
+ zarr_root: zarr.Group,
29
+ shape: tuple[int, ...] = None,
30
+ array: NDArray[Any] = None,
31
+ overwrite: bool = True,
32
+ dimensions: tuple[str, ...] = None,
29
33
  **kwargs,
30
- ):
34
+ ) -> zarr.Array:
35
+ """Add a dataset to a Zarr group.
36
+
37
+ Parameters
38
+ ----------
39
+ name : str
40
+ Name of the dataset.
41
+ dtype : np.dtype, optional
42
+ Data type of the dataset.
43
+ fill_value : np.generic, optional
44
+ Fill value for the dataset.
45
+ zarr_root : zarr.Group
46
+ Root Zarr group.
47
+ shape : tuple[int, ...], optional
48
+ Shape of the dataset.
49
+ array : NDArray[Any], optional
50
+ Array to initialize the dataset with.
51
+ overwrite : bool
52
+ Whether to overwrite existing dataset.
53
+ dimensions : tuple[str, ...]
54
+ Dimensions of the dataset.
55
+ **kwargs
56
+ Additional arguments for Zarr dataset creation.
57
+
58
+ Returns
59
+ -------
60
+ zarr.Array
61
+ The created Zarr array.
62
+ """
31
63
  assert dimensions is not None, "Please pass dimensions to add_zarr_dataset."
32
64
  assert isinstance(dimensions, (tuple, list))
33
65
 
@@ -80,13 +112,26 @@ def add_zarr_dataset(
80
112
 
81
113
 
82
114
  class ZarrBuiltRegistry:
115
+ """A class to manage the creation and access of Zarr datasets."""
116
+
83
117
  name_lengths = "lengths"
84
118
  name_flags = "flags"
85
119
  lengths = None
86
120
  flags = None
87
121
  z = None
88
122
 
89
- def __init__(self, path, synchronizer_path=None, use_threads=False):
123
+ def __init__(self, path: str, synchronizer_path: Optional[str] = None, use_threads: bool = False):
124
+ """Initialize the ZarrBuiltRegistry.
125
+
126
+ Parameters
127
+ ----------
128
+ path : str
129
+ Path to the Zarr store.
130
+ synchronizer_path : Optional[str], optional
131
+ Path to the synchronizer.
132
+ use_threads : bool
133
+ Whether to use thread-based synchronization.
134
+ """
90
135
  import zarr
91
136
 
92
137
  assert isinstance(path, str), path
@@ -101,19 +146,33 @@ class ZarrBuiltRegistry:
101
146
  self.synchronizer_path = synchronizer_path
102
147
  self.synchronizer = zarr.ProcessSynchronizer(self.synchronizer_path)
103
148
 
104
- def clean(self):
149
+ def clean(self) -> None:
150
+ """Clean up the synchronizer path."""
105
151
  if self.synchronizer_path is not None:
106
152
  try:
107
153
  shutil.rmtree(self.synchronizer_path)
108
154
  except FileNotFoundError:
109
155
  pass
110
156
 
111
- def _open_write(self):
157
+ def _open_write(self) -> zarr.Group:
158
+ """Open the Zarr store in write mode."""
112
159
  import zarr
113
160
 
114
161
  return zarr.open(self.zarr_path, mode="r+", synchronizer=self.synchronizer)
115
162
 
116
- def _open_read(self, sync=True):
163
+ def _open_read(self, sync: bool = True) -> zarr.Group:
164
+ """Open the Zarr store in read mode.
165
+
166
+ Parameters
167
+ ----------
168
+ sync : bool
169
+ Whether to use synchronization.
170
+
171
+ Returns
172
+ -------
173
+ zarr.Group
174
+ The opened Zarr group.
175
+ """
117
176
  import zarr
118
177
 
119
178
  if sync:
@@ -121,12 +180,30 @@ class ZarrBuiltRegistry:
121
180
  else:
122
181
  return zarr.open(self.zarr_path, mode="r")
123
182
 
124
- def new_dataset(self, *args, **kwargs):
183
+ def new_dataset(self, *args, **kwargs) -> None:
184
+ """Create a new dataset in the Zarr store.
185
+
186
+ Parameters
187
+ ----------
188
+ *args
189
+ Positional arguments for dataset creation.
190
+ **kwargs
191
+ Keyword arguments for dataset creation.
192
+ """
125
193
  z = self._open_write()
126
194
  zarr_root = z["_build"]
127
195
  add_zarr_dataset(*args, zarr_root=zarr_root, overwrite=True, dimensions=("tmp",), **kwargs)
128
196
 
129
- def add_to_history(self, action, **kwargs):
197
+ def add_to_history(self, action: str, **kwargs) -> None:
198
+ """Add an action to the history attribute of the Zarr store.
199
+
200
+ Parameters
201
+ ----------
202
+ action : str
203
+ The action to record.
204
+ **kwargs
205
+ Additional information about the action.
206
+ """
130
207
  new = dict(
131
208
  action=action,
132
209
  timestamp=datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None).isoformat(),
@@ -138,37 +215,107 @@ class ZarrBuiltRegistry:
138
215
  history.append(new)
139
216
  z.attrs["history"] = history
140
217
 
141
- def get_lengths(self):
218
+ def get_lengths(self) -> list[int]:
219
+ """Get the lengths dataset.
220
+
221
+ Returns
222
+ -------
223
+ list[int]
224
+ The lengths dataset.
225
+ """
142
226
  z = self._open_read()
143
227
  return list(z["_build"][self.name_lengths][:])
144
228
 
145
- def get_flags(self, **kwargs):
229
+ def get_flags(self, **kwargs) -> list[bool]:
230
+ """Get the flags dataset.
231
+
232
+ Parameters
233
+ ----------
234
+ **kwargs
235
+ Additional arguments for reading the dataset.
236
+
237
+ Returns
238
+ -------
239
+ list[bool]
240
+ The flags dataset.
241
+ """
146
242
  z = self._open_read(**kwargs)
147
243
  return list(z["_build"][self.name_flags][:])
148
244
 
149
- def get_flag(self, i):
245
+ def get_flag(self, i: int) -> bool:
246
+ """Get a specific flag.
247
+
248
+ Parameters
249
+ ----------
250
+ i : int
251
+ Index of the flag.
252
+
253
+ Returns
254
+ -------
255
+ bool
256
+ The flag value.
257
+ """
150
258
  z = self._open_read()
151
259
  return z["_build"][self.name_flags][i]
152
260
 
153
- def set_flag(self, i, value=True):
261
+ def set_flag(self, i: int, value: bool = True) -> None:
262
+ """Set a specific flag.
263
+
264
+ Parameters
265
+ ----------
266
+ i : int
267
+ Index of the flag.
268
+ value : bool
269
+ Value to set the flag to.
270
+ """
154
271
  z = self._open_write()
155
272
  z.attrs["latest_write_timestamp"] = (
156
273
  datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None).isoformat()
157
274
  )
158
275
  z["_build"][self.name_flags][i] = value
159
276
 
160
- def ready(self):
277
+ def ready(self) -> bool:
278
+ """Check if all flags are set.
279
+
280
+ Returns
281
+ -------
282
+ bool
283
+ True if all flags are set, False otherwise.
284
+ """
161
285
  return all(self.get_flags())
162
286
 
163
- def create(self, lengths, overwrite=False):
287
+ def create(self, lengths: list[int], overwrite: bool = False) -> None:
288
+ """Create the lengths and flags datasets.
289
+
290
+ Parameters
291
+ ----------
292
+ lengths : list[int]
293
+ Lengths to initialize the dataset with.
294
+ overwrite : bool
295
+ Whether to overwrite existing datasets.
296
+ """
164
297
  self.new_dataset(name=self.name_lengths, array=np.array(lengths, dtype="i4"))
165
298
  self.new_dataset(name=self.name_flags, array=np.array([False] * len(lengths), dtype=bool))
166
299
  self.add_to_history("initialised")
167
300
 
168
- def reset(self, lengths):
301
+ def reset(self, lengths: list[int]) -> None:
302
+ """Reset the lengths and flags datasets.
303
+
304
+ Parameters
305
+ ----------
306
+ lengths : list[int]
307
+ Lengths to initialize the dataset with.
308
+ """
169
309
  return self.create(lengths, overwrite=True)
170
310
 
171
- def add_provenance(self, name):
311
+ def add_provenance(self, name: str) -> None:
312
+ """Add provenance information to the Zarr store.
313
+
314
+ Parameters
315
+ ----------
316
+ name : str
317
+ Name of the provenance attribute.
318
+ """
172
319
  z = self._open_write()
173
320
 
174
321
  if name in z.attrs:
@@ -8,11 +8,20 @@
8
8
  # nor does it submit to any jurisdiction.
9
9
 
10
10
  import logging
11
+ from typing import TYPE_CHECKING
12
+ from typing import Any
13
+ from typing import Set
11
14
 
15
+ # from .dataset import FullIndex
16
+ # from .dataset import Shape
17
+ # from .dataset import TupleIndex
12
18
  from .misc import _open_dataset
13
19
  from .misc import add_dataset_path
14
20
  from .misc import add_named_dataset
15
21
 
22
+ if TYPE_CHECKING:
23
+ from .dataset import Dataset
24
+
16
25
  LOG = logging.getLogger(__name__)
17
26
 
18
27
  __all__ = [
@@ -27,8 +36,19 @@ class MissingDateError(Exception):
27
36
  pass
28
37
 
29
38
 
30
- def _convert(x):
39
+ def _convert(x: Any) -> Any:
40
+ """Convert OmegaConf objects to standard Python containers.
41
+
42
+ Parameters
43
+ ----------
44
+ x : Any
45
+ The object to convert.
31
46
 
47
+ Returns
48
+ -------
49
+ Any
50
+ The converted object.
51
+ """
32
52
  if isinstance(x, list):
33
53
  return [_convert(a) for a in x]
34
54
 
@@ -46,8 +66,21 @@ def _convert(x):
46
66
  return x
47
67
 
48
68
 
49
- def open_dataset(*args, **kwargs):
69
+ def open_dataset(*args: Any, **kwargs: Any) -> "Dataset":
70
+ """Open a dataset.
71
+
72
+ Parameters
73
+ ----------
74
+ *args : Any
75
+ Positional arguments.
76
+ **kwargs : Any
77
+ Keyword arguments.
50
78
 
79
+ Returns
80
+ -------
81
+ Dataset
82
+ The opened dataset.
83
+ """
51
84
  # That will get rid of OmegaConf objects
52
85
 
53
86
  args, kwargs = _convert(args), _convert(kwargs)
@@ -59,8 +92,22 @@ def open_dataset(*args, **kwargs):
59
92
  return ds
60
93
 
61
94
 
62
- def list_dataset_names(*args, **kwargs):
95
+ def list_dataset_names(*args: Any, **kwargs: Any) -> list[str]:
96
+ """List the names of datasets.
97
+
98
+ Parameters
99
+ ----------
100
+ *args : Any
101
+ Positional arguments.
102
+ **kwargs : Any
103
+ Keyword arguments.
104
+
105
+ Returns
106
+ -------
107
+ list of str
108
+ The list of dataset names.
109
+ """
63
110
  ds = _open_dataset(*args, **kwargs)
64
- names = set()
111
+ names: Set[str] = set()
65
112
  ds.get_dataset_names(names)
66
113
  return sorted(names)