anemoi-datasets 0.5.7__py3-none-any.whl → 0.5.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. anemoi/datasets/__init__.py +11 -3
  2. anemoi/datasets/__main__.py +2 -3
  3. anemoi/datasets/_version.py +2 -2
  4. anemoi/datasets/commands/__init__.py +2 -3
  5. anemoi/datasets/commands/cleanup.py +9 -0
  6. anemoi/datasets/commands/compare.py +3 -3
  7. anemoi/datasets/commands/copy.py +38 -68
  8. anemoi/datasets/commands/create.py +20 -5
  9. anemoi/datasets/commands/finalise-additions.py +9 -0
  10. anemoi/datasets/commands/finalise.py +9 -0
  11. anemoi/datasets/commands/init-additions.py +9 -0
  12. anemoi/datasets/commands/init.py +9 -0
  13. anemoi/datasets/commands/inspect.py +3 -1
  14. anemoi/datasets/commands/load-additions.py +9 -0
  15. anemoi/datasets/commands/load.py +9 -0
  16. anemoi/datasets/commands/patch.py +9 -0
  17. anemoi/datasets/commands/publish.py +9 -0
  18. anemoi/datasets/commands/scan.py +9 -0
  19. anemoi/datasets/compute/__init__.py +8 -0
  20. anemoi/datasets/compute/recentre.py +3 -2
  21. anemoi/datasets/create/__init__.py +62 -12
  22. anemoi/datasets/create/check.py +4 -3
  23. anemoi/datasets/create/chunks.py +3 -2
  24. anemoi/datasets/create/config.py +5 -5
  25. anemoi/datasets/create/functions/__init__.py +22 -7
  26. anemoi/datasets/create/functions/filters/__init__.py +2 -1
  27. anemoi/datasets/create/functions/filters/empty.py +3 -2
  28. anemoi/datasets/create/functions/filters/noop.py +2 -2
  29. anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +3 -2
  30. anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +3 -2
  31. anemoi/datasets/create/functions/filters/rename.py +16 -11
  32. anemoi/datasets/create/functions/filters/rotate_winds.py +3 -2
  33. anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +3 -2
  34. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +3 -2
  35. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +2 -2
  36. anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +2 -2
  37. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +3 -2
  38. anemoi/datasets/create/functions/filters/unrotate_winds.py +3 -2
  39. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +3 -2
  40. anemoi/datasets/create/functions/sources/__init__.py +2 -2
  41. anemoi/datasets/create/functions/sources/accumulations.py +10 -4
  42. anemoi/datasets/create/functions/sources/constants.py +3 -2
  43. anemoi/datasets/create/functions/sources/empty.py +3 -2
  44. anemoi/datasets/create/functions/sources/forcings.py +3 -2
  45. anemoi/datasets/create/functions/sources/grib.py +8 -2
  46. anemoi/datasets/create/functions/sources/hindcasts.py +3 -2
  47. anemoi/datasets/create/functions/sources/mars.py +97 -17
  48. anemoi/datasets/create/functions/sources/netcdf.py +3 -2
  49. anemoi/datasets/create/functions/sources/opendap.py +2 -2
  50. anemoi/datasets/create/functions/sources/recentre.py +3 -2
  51. anemoi/datasets/create/functions/sources/source.py +3 -2
  52. anemoi/datasets/create/functions/sources/tendencies.py +3 -2
  53. anemoi/datasets/create/functions/sources/xarray/__init__.py +8 -3
  54. anemoi/datasets/create/functions/sources/xarray/coordinates.py +3 -2
  55. anemoi/datasets/create/functions/sources/xarray/field.py +6 -5
  56. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +12 -4
  57. anemoi/datasets/create/functions/sources/xarray/flavour.py +2 -2
  58. anemoi/datasets/create/functions/sources/xarray/grid.py +2 -2
  59. anemoi/datasets/create/functions/sources/xarray/metadata.py +3 -2
  60. anemoi/datasets/create/functions/sources/xarray/time.py +2 -2
  61. anemoi/datasets/create/functions/sources/xarray/variable.py +6 -9
  62. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +2 -2
  63. anemoi/datasets/create/functions/sources/xarray_zarr.py +2 -2
  64. anemoi/datasets/create/functions/sources/zenodo.py +2 -2
  65. anemoi/datasets/create/input/__init__.py +3 -17
  66. anemoi/datasets/create/input/action.py +3 -8
  67. anemoi/datasets/create/input/concat.py +3 -2
  68. anemoi/datasets/create/input/context.py +3 -8
  69. anemoi/datasets/create/input/data_sources.py +3 -9
  70. anemoi/datasets/create/input/empty.py +3 -9
  71. anemoi/datasets/create/input/filter.py +3 -9
  72. anemoi/datasets/create/input/function.py +3 -9
  73. anemoi/datasets/create/input/join.py +3 -2
  74. anemoi/datasets/create/input/misc.py +3 -8
  75. anemoi/datasets/create/input/pipe.py +9 -3
  76. anemoi/datasets/create/input/repeated_dates.py +14 -8
  77. anemoi/datasets/create/input/result.py +154 -12
  78. anemoi/datasets/create/input/step.py +4 -9
  79. anemoi/datasets/create/input/template.py +3 -2
  80. anemoi/datasets/create/input/trace.py +3 -2
  81. anemoi/datasets/create/patch.py +9 -1
  82. anemoi/datasets/create/persistent.py +3 -2
  83. anemoi/datasets/create/size.py +3 -2
  84. anemoi/datasets/create/statistics/__init__.py +3 -2
  85. anemoi/datasets/create/statistics/summary.py +3 -2
  86. anemoi/datasets/create/utils.py +15 -2
  87. anemoi/datasets/create/writer.py +3 -2
  88. anemoi/datasets/create/zarr.py +3 -2
  89. anemoi/datasets/data/__init__.py +27 -1
  90. anemoi/datasets/data/concat.py +5 -1
  91. anemoi/datasets/data/dataset.py +216 -37
  92. anemoi/datasets/data/debug.py +4 -1
  93. anemoi/datasets/data/ensemble.py +4 -1
  94. anemoi/datasets/data/fill_missing.py +165 -0
  95. anemoi/datasets/data/forwards.py +23 -1
  96. anemoi/datasets/data/grids.py +236 -58
  97. anemoi/datasets/data/indexing.py +4 -1
  98. anemoi/datasets/data/interpolate.py +4 -1
  99. anemoi/datasets/data/join.py +12 -9
  100. anemoi/datasets/data/masked.py +36 -10
  101. anemoi/datasets/data/merge.py +180 -0
  102. anemoi/datasets/data/misc.py +18 -3
  103. anemoi/datasets/data/missing.py +4 -1
  104. anemoi/datasets/data/rescale.py +4 -1
  105. anemoi/datasets/data/select.py +4 -1
  106. anemoi/datasets/data/statistics.py +4 -1
  107. anemoi/datasets/data/stores.py +66 -3
  108. anemoi/datasets/data/subset.py +6 -1
  109. anemoi/datasets/data/unchecked.py +4 -1
  110. anemoi/datasets/data/xy.py +20 -5
  111. anemoi/datasets/dates/__init__.py +9 -7
  112. anemoi/datasets/dates/groups.py +4 -2
  113. anemoi/datasets/grids.py +86 -2
  114. anemoi/datasets/testing.py +3 -2
  115. anemoi/datasets/utils/__init__.py +8 -0
  116. anemoi/datasets/utils/fields.py +2 -2
  117. {anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/METADATA +11 -29
  118. anemoi_datasets-0.5.11.dist-info/RECORD +123 -0
  119. {anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/WHEEL +1 -1
  120. anemoi/datasets/fields.py +0 -66
  121. anemoi_datasets-0.5.7.dist-info/RECORD +0 -122
  122. {anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/LICENSE +0 -0
  123. {anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/entry_points.txt +0 -0
  124. {anemoi_datasets-0.5.7.dist-info → anemoi_datasets-0.5.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,180 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ import logging
12
+ from functools import cached_property
13
+
14
+ import numpy as np
15
+
16
+ from . import MissingDateError
17
+ from .debug import Node
18
+ from .debug import debug_indexing
19
+ from .forwards import Combined
20
+ from .indexing import apply_index_to_slices_changes
21
+ from .indexing import expand_list_indexing
22
+ from .indexing import index_to_slices
23
+ from .indexing import update_tuple
24
+ from .misc import _auto_adjust
25
+ from .misc import _open
26
+
27
+ LOG = logging.getLogger(__name__)
28
+
29
+
30
+ class Merge(Combined):
31
+
32
+ # d0 d2 d4 d6 ...
33
+ # d1 d3 d5 d7 ...
34
+
35
+ # gives
36
+ # d0 d1 d2 d3 ...
37
+
38
+ def __init__(self, datasets, allow_gaps_in_dates=False):
39
+ super().__init__(datasets)
40
+
41
+ self.allow_gaps_in_dates = allow_gaps_in_dates
42
+
43
+ dates = dict() # date -> (dataset_index, date_index)
44
+
45
+ for i, d in enumerate(datasets):
46
+ for j, date in enumerate(d.dates):
47
+ date = date.astype(object)
48
+ if date in dates:
49
+
50
+ d1 = datasets[dates[date][0]] # Selected
51
+ d2 = datasets[i] # The new one
52
+
53
+ if j in d2.missing:
54
+ # LOG.warning(f"Duplicate date {date} found in datasets {d1} and {d2}, but {date} is missing in {d}, ignoring")
55
+ continue
56
+
57
+ k = dates[date][1]
58
+ if k in d1.missing:
59
+ # LOG.warning(f"Duplicate date {date} found in datasets {d1} and {d2}, but {date} is missing in {d}, ignoring")
60
+ dates[date] = (i, j) # Replace the missing date with the new one
61
+ continue
62
+
63
+ raise ValueError(f"Duplicate date {date} found in datasets {d1} and {d2}")
64
+ else:
65
+ dates[date] = (i, j)
66
+
67
+ all_dates = sorted(dates)
68
+ start = all_dates[0]
69
+ end = all_dates[-1]
70
+
71
+ frequency = min(d2 - d1 for d1, d2 in zip(all_dates[:-1], all_dates[1:]))
72
+
73
+ date = start
74
+ indices = []
75
+ _dates = []
76
+
77
+ self._missing_index = len(datasets)
78
+
79
+ while date <= end:
80
+ if date not in dates:
81
+ if self.allow_gaps_in_dates:
82
+ dates[date] = (self._missing_index, -1)
83
+ else:
84
+ raise ValueError(
85
+ f"merge: date {date} not covered by dataset. Start={start}, end={end}, frequency={frequency}"
86
+ )
87
+
88
+ indices.append(dates[date])
89
+ _dates.append(date)
90
+ date += frequency
91
+
92
+ self._dates = np.array(_dates, dtype="datetime64[s]")
93
+ self._indices = np.array(indices)
94
+ self._frequency = frequency # .astype(object)
95
+
96
+ def __len__(self):
97
+ return len(self._dates)
98
+
99
+ @property
100
+ def dates(self):
101
+ return self._dates
102
+
103
+ @property
104
+ def frequency(self):
105
+ return self._frequency
106
+
107
+ @cached_property
108
+ def missing(self):
109
+ # TODO: optimize
110
+ result = set()
111
+
112
+ for i, (dataset, row) in enumerate(self._indices):
113
+ if dataset == self._missing_index:
114
+ result.add(i)
115
+ continue
116
+
117
+ if row in self.datasets[dataset].missing:
118
+ result.add(i)
119
+
120
+ return result
121
+
122
+ def check_same_lengths(self, d1, d2):
123
+ # Turned off because we are concatenating along the first axis
124
+ pass
125
+
126
+ def check_same_dates(self, d1, d2):
127
+ # Turned off because we are concatenating along the dates axis
128
+ pass
129
+
130
+ def check_compatibility(self, d1, d2):
131
+ super().check_compatibility(d1, d2)
132
+ self.check_same_sub_shapes(d1, d2, drop_axis=0)
133
+
134
+ def tree(self):
135
+ return Node(self, [d.tree() for d in self.datasets], allow_gaps_in_dates=self.allow_gaps_in_dates)
136
+
137
+ @debug_indexing
138
+ def __getitem__(self, n):
139
+ if isinstance(n, tuple):
140
+ return self._get_tuple(n)
141
+
142
+ if isinstance(n, slice):
143
+ return self._get_slice(n)
144
+
145
+ dataset, row = self._indices[n]
146
+
147
+ if dataset == self._missing_index:
148
+ raise MissingDateError(f"Date {self.dates[n]} is missing (index={n})")
149
+
150
+ return self.datasets[dataset][int(row)]
151
+
152
+ @debug_indexing
153
+ @expand_list_indexing
154
+ def _get_tuple(self, index):
155
+ index, changes = index_to_slices(index, self.shape)
156
+ index, previous = update_tuple(index, 0, slice(None))
157
+ result = self._get_slice(previous)
158
+ return apply_index_to_slices_changes(result[index], changes)
159
+
160
+ def _get_slice(self, s):
161
+ return np.stack([self[i] for i in range(*s.indices(self._len))])
162
+
163
+
164
+ def merge_factory(args, kwargs):
165
+
166
+ datasets = kwargs.pop("merge")
167
+
168
+ assert isinstance(datasets, (list, tuple))
169
+ assert len(args) == 0
170
+
171
+ datasets = [_open(e) for e in datasets]
172
+
173
+ if len(datasets) == 1:
174
+ return datasets[0]._subset(**kwargs)
175
+
176
+ datasets, kwargs = _auto_adjust(datasets, kwargs)
177
+
178
+ allow_gaps_in_dates = kwargs.pop("allow_gaps_in_dates", False)
179
+
180
+ return Merge(datasets, allow_gaps_in_dates=allow_gaps_in_dates)._subset(**kwargs)
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import calendar
9
12
  import datetime
10
13
  import logging
@@ -235,16 +238,21 @@ def _auto_adjust(datasets, kwargs):
235
238
  if set(d.variables) != variables:
236
239
  subset_kwargs[i]["select"] = sorted(variables)
237
240
 
241
+ if "start" or "end" in adjust_set:
242
+ common = datasets[0].dates
243
+ for d in datasets[0:]:
244
+ common = np.intersect1d(common, d.dates)
245
+
238
246
  if "start" in adjust_set:
239
247
  assert "start" not in kwargs, "Cannot use 'start' in adjust and kwargs"
240
- start = max(d.dates[0] for d in datasets).astype(object)
248
+ start = min(common).astype(object)
241
249
  for i, d in enumerate(datasets):
242
250
  if start != d.dates[0]:
243
251
  subset_kwargs[i]["start"] = start
244
252
 
245
253
  if "end" in adjust_set:
246
254
  assert "end" not in kwargs, "Cannot use 'end' in adjust and kwargs"
247
- end = min(d.dates[-1] for d in datasets).astype(object)
255
+ end = max(common).astype(object)
248
256
  for i, d in enumerate(datasets):
249
257
  if end != d.dates[-1]:
250
258
  subset_kwargs[i]["end"] = end
@@ -262,6 +270,7 @@ def _auto_adjust(datasets, kwargs):
262
270
 
263
271
 
264
272
  def _open_dataset(*args, **kwargs):
273
+
265
274
  sets = []
266
275
  for a in args:
267
276
  sets.append(_open(a))
@@ -302,6 +311,12 @@ def _open_dataset(*args, **kwargs):
302
311
  assert not sets, sets
303
312
  return concat_factory(args, kwargs).mutate()
304
313
 
314
+ if "merge" in kwargs:
315
+ from .merge import merge_factory
316
+
317
+ assert not sets, sets
318
+ return merge_factory(args, kwargs).mutate()
319
+
305
320
  if "ensemble" in kwargs:
306
321
  from .ensemble import ensemble_factory
307
322
 
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -1,6 +1,8 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
@@ -69,7 +71,7 @@ class S3Store(ReadOnlyStore):
69
71
  """
70
72
 
71
73
  def __init__(self, url, region=None):
72
- from anemoi.utils.s3 import s3_client
74
+ from anemoi.utils.remote.s3 import s3_client
73
75
 
74
76
  _, _, self.bucket, self.key = url.split("/", 3)
75
77
  self.s3 = s3_client(self.bucket, region=region)
@@ -83,6 +85,41 @@ class S3Store(ReadOnlyStore):
83
85
  return response["Body"].read()
84
86
 
85
87
 
88
+ class PlanetaryComputerStore(ReadOnlyStore):
89
+ """We write our own Store to access catalogs on Planetary Computer,
90
+ as it requires some extra arguements to use xr.open_zarr.
91
+ """
92
+
93
+ def __init__(self, data_catalog_id):
94
+ self.data_catalog_id = data_catalog_id
95
+
96
+ def __getitem__(self):
97
+ import planetary_computer
98
+ import pystac_client
99
+
100
+ catalog = pystac_client.Client.open(
101
+ "https://planetarycomputer.microsoft.com/api/stac/v1/",
102
+ modifier=planetary_computer.sign_inplace,
103
+ )
104
+ collection = catalog.get_collection(self.data_catalog_id)
105
+
106
+ asset = collection.assets["zarr-abfs"]
107
+
108
+ if "xarray:storage_options" in asset.extra_fields:
109
+ store = {
110
+ "store": asset.href,
111
+ "storage_options": asset.extra_fields["xarray:storage_options"],
112
+ **asset.extra_fields["xarray:open_kwargs"],
113
+ }
114
+ else:
115
+ store = {
116
+ "filename_or_obj": asset.href,
117
+ **asset.extra_fields["xarray:open_kwargs"],
118
+ }
119
+
120
+ return store
121
+
122
+
86
123
  class DebugStore(ReadOnlyStore):
87
124
  """A store to debug the zarr loading."""
88
125
 
@@ -119,6 +156,9 @@ def name_to_zarr_store(path_or_url):
119
156
  if len(bits) == 5 and (bits[1], bits[3], bits[4]) == ("s3", "amazonaws", "com"):
120
157
  s3_url = f"s3://{bits[0]}{parsed.path}"
121
158
  store = S3Store(s3_url, region=bits[2])
159
+ elif store.startswith("https://planetarycomputer.microsoft.com/"):
160
+ data_catalog_id = store.rsplit("/", 1)[-1]
161
+ store = PlanetaryComputerStore(data_catalog_id).__getitem__()
122
162
  else:
123
163
  store = HTTPStore(store)
124
164
 
@@ -302,6 +342,13 @@ class Zarr(Dataset):
302
342
  )
303
343
  ]
304
344
 
345
+ @cached_property
346
+ def constant_fields(self):
347
+ result = self.z.attrs.get("constant_fields")
348
+ if result is None:
349
+ LOG.warning("No 'constant_fields' attribute in %r, computing them", self)
350
+ return self.computed_constant_fields()
351
+
305
352
  @property
306
353
  def variables_metadata(self):
307
354
  return self.z.attrs.get("variables_metadata", {})
@@ -317,6 +364,7 @@ class Zarr(Dataset):
317
364
  attrs=dict(self.z.attrs),
318
365
  chunks=self.chunks,
319
366
  dtype=str(self.dtype),
367
+ path=self.path,
320
368
  )
321
369
 
322
370
  def source(self, index):
@@ -335,6 +383,12 @@ class Zarr(Dataset):
335
383
  name, _ = os.path.splitext(os.path.basename(self.path))
336
384
  names.add(name)
337
385
 
386
+ def collect_supporting_arrays(self, collected, *path):
387
+ pass
388
+
389
+ def collect_input_sources(self, collected):
390
+ pass
391
+
338
392
 
339
393
  class ZarrWithMissingDates(Zarr):
340
394
  """A zarr dataset with missing dates."""
@@ -343,7 +397,7 @@ class ZarrWithMissingDates(Zarr):
343
397
  super().__init__(path)
344
398
 
345
399
  missing_dates = self.z.attrs.get("missing_dates", [])
346
- missing_dates = set([np.datetime64(x) for x in missing_dates])
400
+ missing_dates = set([np.datetime64(x, "s") for x in missing_dates])
347
401
  self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
348
402
  self.missing = set(self.missing_to_dates)
349
403
 
@@ -396,6 +450,9 @@ class ZarrWithMissingDates(Zarr):
396
450
  return "zarr*"
397
451
 
398
452
 
453
+ QUIET = set()
454
+
455
+
399
456
  def zarr_lookup(name, fail=True):
400
457
 
401
458
  if name.endswith(".zarr") or name.endswith(".zip"):
@@ -404,6 +461,9 @@ def zarr_lookup(name, fail=True):
404
461
  config = load_config()["datasets"]
405
462
 
406
463
  if name in config["named"]:
464
+ if name not in QUIET:
465
+ LOG.info("Opening `%s` as `%s`", name, config["named"][name])
466
+ QUIET.add(name)
407
467
  return config["named"][name]
408
468
 
409
469
  tried = []
@@ -417,6 +477,9 @@ def zarr_lookup(name, fail=True):
417
477
  if z is not None:
418
478
  # Cache for next time
419
479
  config["named"][name] = full
480
+ if name not in QUIET:
481
+ LOG.info("Opening `%s` as `%s`", name, full)
482
+ QUIET.add(name)
420
483
  return full
421
484
  except zarr.errors.PathNotFoundError:
422
485
  pass
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -132,6 +135,8 @@ class Subset(Forwards):
132
135
  @cached_property
133
136
  def frequency(self):
134
137
  dates = self.dates
138
+ if len(dates) < 2:
139
+ raise ValueError(f"Cannot determine frequency of a subset with less than two dates ({self.dates}).")
135
140
  return frequency_to_timedelta(dates[1].astype(object) - dates[0].astype(object))
136
141
 
137
142
  def source(self, index):
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
  from functools import wraps
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -18,15 +21,19 @@ LOG = logging.getLogger(__name__)
18
21
 
19
22
  class ZipBase(Combined):
20
23
 
24
+ def __init__(self, datasets, check_compatibility=True):
25
+ self._check_compatibility = check_compatibility
26
+ super().__init__(datasets)
27
+
21
28
  def swap_with_parent(self, parent):
22
29
  new_parents = [parent.clone(ds) for ds in self.datasets]
23
30
  return self.clone(new_parents)
24
31
 
25
32
  def clone(self, datasets):
26
- return self.__class__(datasets)
33
+ return self.__class__(datasets, check_compatibility=self._check_compatibility)
27
34
 
28
35
  def tree(self):
29
- return Node(self, [d.tree() for d in self.datasets])
36
+ return Node(self, [d.tree() for d in self.datasets], check_compatibility=self._check_compatibility)
30
37
 
31
38
  def __len__(self):
32
39
  return min(len(d) for d in self.datasets)
@@ -86,6 +93,10 @@ class ZipBase(Combined):
86
93
  def name_to_index(self):
87
94
  return tuple(d.name_to_index for d in self.datasets)
88
95
 
96
+ def check_compatibility(self, d1, d2):
97
+ if self._check_compatibility:
98
+ super().check_compatibility(d1, d2)
99
+
89
100
 
90
101
  class Zip(ZipBase):
91
102
  pass
@@ -110,7 +121,9 @@ def xy_factory(args, kwargs):
110
121
 
111
122
  assert len(datasets) == 2
112
123
 
113
- return XY(datasets)._subset(**kwargs)
124
+ check_compatibility = kwargs.pop("check_compatibility", True)
125
+
126
+ return XY(datasets, check_compatibility=check_compatibility)._subset(**kwargs)
114
127
 
115
128
 
116
129
  def zip_factory(args, kwargs):
@@ -122,4 +135,6 @@ def zip_factory(args, kwargs):
122
135
  datasets = [_open(e) for e in zip]
123
136
  datasets, kwargs = _auto_adjust(datasets, kwargs)
124
137
 
125
- return Zip(datasets)._subset(**kwargs)
138
+ check_compatibility = kwargs.pop("check_compatibility", True)
139
+
140
+ return Zip(datasets, check_compatibility=check_compatibility)._subset(**kwargs)
@@ -1,6 +1,8 @@
1
- # (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
@@ -8,6 +10,8 @@
8
10
 
9
11
  import datetime
10
12
  import warnings
13
+ from functools import reduce
14
+ from math import gcd
11
15
 
12
16
  # from anemoi.utils.dates import as_datetime
13
17
  from anemoi.utils.dates import DateTimes
@@ -193,18 +197,16 @@ class HindcastsDates(DatesProvider):
193
197
 
194
198
  dates = sorted(dates)
195
199
 
196
- mindelta = None
200
+ deltas = set()
197
201
  for a, b in zip(dates, dates[1:]):
198
202
  delta = b - a
199
203
  assert isinstance(delta, datetime.timedelta), delta
200
- if mindelta is None:
201
- mindelta = delta
202
- else:
203
- mindelta = min(mindelta, delta)
204
+ deltas.add(delta)
204
205
 
206
+ mindelta_seconds = reduce(gcd, [int(delta.total_seconds()) for delta in deltas])
207
+ mindelta = datetime.timedelta(seconds=mindelta_seconds)
205
208
  self.frequency = mindelta
206
209
  assert mindelta.total_seconds() > 0, mindelta
207
-
208
210
  print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", dates[0], dates[-1], mindelta)
209
211
 
210
212
  # Use all values between start and end by frequency, and set the ones that are missing
@@ -1,6 +1,8 @@
1
- # (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
@@ -26,7 +28,7 @@ class GroupOfDates:
26
28
  assert isinstance(provider, DatesProvider), type(provider)
27
29
  assert isinstance(dates, list)
28
30
 
29
- self.dates = dates
31
+ self.dates = [as_datetime(_) for _ in dates]
30
32
  self.provider = provider
31
33
  self.partial_ok = partial_ok
32
34