anemoi-datasets 0.5.6__py3-none-any.whl → 0.5.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. anemoi/datasets/__init__.py +11 -3
  2. anemoi/datasets/__main__.py +2 -3
  3. anemoi/datasets/_version.py +2 -2
  4. anemoi/datasets/commands/__init__.py +2 -3
  5. anemoi/datasets/commands/cleanup.py +9 -0
  6. anemoi/datasets/commands/compare.py +3 -3
  7. anemoi/datasets/commands/copy.py +38 -68
  8. anemoi/datasets/commands/create.py +20 -5
  9. anemoi/datasets/commands/finalise-additions.py +9 -0
  10. anemoi/datasets/commands/finalise.py +9 -0
  11. anemoi/datasets/commands/init-additions.py +9 -0
  12. anemoi/datasets/commands/init.py +9 -0
  13. anemoi/datasets/commands/inspect.py +7 -1
  14. anemoi/datasets/commands/load-additions.py +9 -0
  15. anemoi/datasets/commands/load.py +9 -0
  16. anemoi/datasets/commands/patch.py +9 -0
  17. anemoi/datasets/commands/publish.py +9 -0
  18. anemoi/datasets/commands/scan.py +9 -0
  19. anemoi/datasets/compute/__init__.py +8 -0
  20. anemoi/datasets/compute/recentre.py +3 -2
  21. anemoi/datasets/create/__init__.py +64 -48
  22. anemoi/datasets/create/check.py +4 -3
  23. anemoi/datasets/create/chunks.py +3 -2
  24. anemoi/datasets/create/config.py +5 -5
  25. anemoi/datasets/create/functions/__init__.py +22 -7
  26. anemoi/datasets/create/functions/filters/__init__.py +2 -1
  27. anemoi/datasets/create/functions/filters/empty.py +3 -2
  28. anemoi/datasets/create/functions/filters/noop.py +2 -2
  29. anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +3 -2
  30. anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +3 -2
  31. anemoi/datasets/create/functions/filters/rename.py +16 -10
  32. anemoi/datasets/create/functions/filters/rotate_winds.py +3 -2
  33. anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +3 -2
  34. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +3 -2
  35. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +2 -2
  36. anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +2 -2
  37. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +3 -2
  38. anemoi/datasets/create/functions/filters/unrotate_winds.py +3 -2
  39. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +3 -2
  40. anemoi/datasets/create/functions/sources/__init__.py +2 -2
  41. anemoi/datasets/create/functions/sources/accumulations.py +10 -4
  42. anemoi/datasets/create/functions/sources/constants.py +3 -2
  43. anemoi/datasets/create/functions/sources/empty.py +3 -2
  44. anemoi/datasets/create/functions/sources/forcings.py +3 -2
  45. anemoi/datasets/create/functions/sources/grib.py +2 -2
  46. anemoi/datasets/create/functions/sources/hindcasts.py +3 -2
  47. anemoi/datasets/create/functions/sources/mars.py +97 -17
  48. anemoi/datasets/create/functions/sources/netcdf.py +3 -2
  49. anemoi/datasets/create/functions/sources/opendap.py +2 -2
  50. anemoi/datasets/create/functions/sources/recentre.py +3 -2
  51. anemoi/datasets/create/functions/sources/source.py +3 -2
  52. anemoi/datasets/create/functions/sources/tendencies.py +3 -2
  53. anemoi/datasets/create/functions/sources/xarray/__init__.py +8 -2
  54. anemoi/datasets/create/functions/sources/xarray/coordinates.py +5 -2
  55. anemoi/datasets/create/functions/sources/xarray/field.py +3 -2
  56. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +12 -2
  57. anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -16
  58. anemoi/datasets/create/functions/sources/xarray/grid.py +3 -2
  59. anemoi/datasets/create/functions/sources/xarray/metadata.py +3 -2
  60. anemoi/datasets/create/functions/sources/xarray/time.py +39 -4
  61. anemoi/datasets/create/functions/sources/xarray/variable.py +6 -6
  62. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +2 -2
  63. anemoi/datasets/create/functions/sources/xarray_zarr.py +2 -2
  64. anemoi/datasets/create/functions/sources/zenodo.py +2 -2
  65. anemoi/datasets/create/input/__init__.py +3 -17
  66. anemoi/datasets/create/input/action.py +3 -2
  67. anemoi/datasets/create/input/concat.py +3 -2
  68. anemoi/datasets/create/input/context.py +3 -2
  69. anemoi/datasets/create/input/data_sources.py +3 -2
  70. anemoi/datasets/create/input/empty.py +3 -2
  71. anemoi/datasets/create/input/filter.py +3 -2
  72. anemoi/datasets/create/input/function.py +3 -2
  73. anemoi/datasets/create/input/join.py +3 -2
  74. anemoi/datasets/create/input/misc.py +3 -2
  75. anemoi/datasets/create/input/pipe.py +3 -2
  76. anemoi/datasets/create/input/repeated_dates.py +3 -2
  77. anemoi/datasets/create/input/result.py +187 -3
  78. anemoi/datasets/create/input/step.py +4 -2
  79. anemoi/datasets/create/input/template.py +3 -2
  80. anemoi/datasets/create/input/trace.py +3 -2
  81. anemoi/datasets/create/patch.py +9 -1
  82. anemoi/datasets/create/persistent.py +7 -3
  83. anemoi/datasets/create/size.py +3 -2
  84. anemoi/datasets/create/statistics/__init__.py +7 -3
  85. anemoi/datasets/create/statistics/summary.py +3 -2
  86. anemoi/datasets/create/utils.py +15 -2
  87. anemoi/datasets/create/writer.py +3 -2
  88. anemoi/datasets/create/zarr.py +8 -3
  89. anemoi/datasets/data/__init__.py +27 -1
  90. anemoi/datasets/data/concat.py +5 -1
  91. anemoi/datasets/data/dataset.py +216 -37
  92. anemoi/datasets/data/debug.py +4 -1
  93. anemoi/datasets/data/ensemble.py +4 -1
  94. anemoi/datasets/data/fill_missing.py +165 -0
  95. anemoi/datasets/data/forwards.py +27 -2
  96. anemoi/datasets/data/grids.py +236 -58
  97. anemoi/datasets/data/indexing.py +4 -1
  98. anemoi/datasets/data/interpolate.py +4 -1
  99. anemoi/datasets/data/join.py +17 -1
  100. anemoi/datasets/data/masked.py +36 -10
  101. anemoi/datasets/data/merge.py +180 -0
  102. anemoi/datasets/data/misc.py +18 -3
  103. anemoi/datasets/data/missing.py +4 -1
  104. anemoi/datasets/data/rescale.py +4 -1
  105. anemoi/datasets/data/select.py +15 -1
  106. anemoi/datasets/data/statistics.py +4 -1
  107. anemoi/datasets/data/stores.py +70 -3
  108. anemoi/datasets/data/subset.py +6 -1
  109. anemoi/datasets/data/unchecked.py +9 -1
  110. anemoi/datasets/data/xy.py +20 -5
  111. anemoi/datasets/dates/__init__.py +9 -7
  112. anemoi/datasets/dates/groups.py +3 -1
  113. anemoi/datasets/fields.py +3 -1
  114. anemoi/datasets/grids.py +86 -2
  115. anemoi/datasets/testing.py +60 -0
  116. anemoi/datasets/utils/__init__.py +8 -0
  117. anemoi/datasets/utils/fields.py +2 -2
  118. {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/METADATA +11 -29
  119. anemoi_datasets-0.5.10.dist-info/RECORD +124 -0
  120. {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/WHEEL +1 -1
  121. anemoi_datasets-0.5.6.dist-info/RECORD +0 -121
  122. {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/LICENSE +0 -0
  123. {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/entry_points.txt +0 -0
  124. {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import calendar
9
12
  import datetime
10
13
  import logging
@@ -235,16 +238,21 @@ def _auto_adjust(datasets, kwargs):
235
238
  if set(d.variables) != variables:
236
239
  subset_kwargs[i]["select"] = sorted(variables)
237
240
 
241
+ if "start" or "end" in adjust_set:
242
+ common = datasets[0].dates
243
+ for d in datasets[0:]:
244
+ common = np.intersect1d(common, d.dates)
245
+
238
246
  if "start" in adjust_set:
239
247
  assert "start" not in kwargs, "Cannot use 'start' in adjust and kwargs"
240
- start = max(d.dates[0] for d in datasets).astype(object)
248
+ start = min(common).astype(object)
241
249
  for i, d in enumerate(datasets):
242
250
  if start != d.dates[0]:
243
251
  subset_kwargs[i]["start"] = start
244
252
 
245
253
  if "end" in adjust_set:
246
254
  assert "end" not in kwargs, "Cannot use 'end' in adjust and kwargs"
247
- end = min(d.dates[-1] for d in datasets).astype(object)
255
+ end = max(common).astype(object)
248
256
  for i, d in enumerate(datasets):
249
257
  if end != d.dates[-1]:
250
258
  subset_kwargs[i]["end"] = end
@@ -262,6 +270,7 @@ def _auto_adjust(datasets, kwargs):
262
270
 
263
271
 
264
272
  def _open_dataset(*args, **kwargs):
273
+
265
274
  sets = []
266
275
  for a in args:
267
276
  sets.append(_open(a))
@@ -302,6 +311,12 @@ def _open_dataset(*args, **kwargs):
302
311
  assert not sets, sets
303
312
  return concat_factory(args, kwargs).mutate()
304
313
 
314
+ if "merge" in kwargs:
315
+ from .merge import merge_factory
316
+
317
+ assert not sets, sets
318
+ return merge_factory(args, kwargs).mutate()
319
+
305
320
  if "ensemble" in kwargs:
306
321
  from .ensemble import ensemble_factory
307
322
 
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -76,6 +79,10 @@ class Select(Forwards):
76
79
  def variables(self):
77
80
  return [self.dataset.variables[i] for i in self.indices]
78
81
 
82
+ @cached_property
83
+ def variables_metadata(self):
84
+ return {k: v for k, v in self.dataset.variables_metadata.items() if k in self.variables}
85
+
79
86
  @cached_property
80
87
  def name_to_index(self):
81
88
  return {k: i for i, k in enumerate(self.variables)}
@@ -108,13 +115,20 @@ class Rename(Forwards):
108
115
  super().__init__(dataset)
109
116
  for n in rename:
110
117
  assert n in dataset.variables, n
118
+
111
119
  self._variables = [rename.get(v, v) for v in dataset.variables]
120
+ self._variables_metadata = {rename.get(k, k): v for k, v in dataset.variables_metadata.items()}
121
+
112
122
  self.rename = rename
113
123
 
114
124
  @property
115
125
  def variables(self):
116
126
  return self._variables
117
127
 
128
+ @property
129
+ def variables_metadata(self):
130
+ return self._variables_metadata
131
+
118
132
  @cached_property
119
133
  def name_to_index(self):
120
134
  return {k: i for i, k in enumerate(self.variables)}
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -1,6 +1,8 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
@@ -69,7 +71,7 @@ class S3Store(ReadOnlyStore):
69
71
  """
70
72
 
71
73
  def __init__(self, url, region=None):
72
- from anemoi.utils.s3 import s3_client
74
+ from anemoi.utils.remote.s3 import s3_client
73
75
 
74
76
  _, _, self.bucket, self.key = url.split("/", 3)
75
77
  self.s3 = s3_client(self.bucket, region=region)
@@ -83,6 +85,41 @@ class S3Store(ReadOnlyStore):
83
85
  return response["Body"].read()
84
86
 
85
87
 
88
+ class PlanetaryComputerStore(ReadOnlyStore):
89
+ """We write our own Store to access catalogs on Planetary Computer,
90
+ as it requires some extra arguements to use xr.open_zarr.
91
+ """
92
+
93
+ def __init__(self, data_catalog_id):
94
+ self.data_catalog_id = data_catalog_id
95
+
96
+ def __getitem__(self):
97
+ import planetary_computer
98
+ import pystac_client
99
+
100
+ catalog = pystac_client.Client.open(
101
+ "https://planetarycomputer.microsoft.com/api/stac/v1/",
102
+ modifier=planetary_computer.sign_inplace,
103
+ )
104
+ collection = catalog.get_collection(self.data_catalog_id)
105
+
106
+ asset = collection.assets["zarr-abfs"]
107
+
108
+ if "xarray:storage_options" in asset.extra_fields:
109
+ store = {
110
+ "store": asset.href,
111
+ "storage_options": asset.extra_fields["xarray:storage_options"],
112
+ **asset.extra_fields["xarray:open_kwargs"],
113
+ }
114
+ else:
115
+ store = {
116
+ "filename_or_obj": asset.href,
117
+ **asset.extra_fields["xarray:open_kwargs"],
118
+ }
119
+
120
+ return store
121
+
122
+
86
123
  class DebugStore(ReadOnlyStore):
87
124
  """A store to debug the zarr loading."""
88
125
 
@@ -119,6 +156,9 @@ def name_to_zarr_store(path_or_url):
119
156
  if len(bits) == 5 and (bits[1], bits[3], bits[4]) == ("s3", "amazonaws", "com"):
120
157
  s3_url = f"s3://{bits[0]}{parsed.path}"
121
158
  store = S3Store(s3_url, region=bits[2])
159
+ elif store.startswith("https://planetarycomputer.microsoft.com/"):
160
+ data_catalog_id = store.rsplit("/", 1)[-1]
161
+ store = PlanetaryComputerStore(data_catalog_id).__getitem__()
122
162
  else:
123
163
  store = HTTPStore(store)
124
164
 
@@ -302,6 +342,17 @@ class Zarr(Dataset):
302
342
  )
303
343
  ]
304
344
 
345
+ @cached_property
346
+ def constant_fields(self):
347
+ result = self.z.attrs.get("constant_fields")
348
+ if result is None:
349
+ LOG.warning("No 'constant_fields' attribute in %r, computing them", self)
350
+ return self.computed_constant_fields()
351
+
352
+ @property
353
+ def variables_metadata(self):
354
+ return self.z.attrs.get("variables_metadata", {})
355
+
305
356
  def __repr__(self):
306
357
  return self.path
307
358
 
@@ -313,6 +364,7 @@ class Zarr(Dataset):
313
364
  attrs=dict(self.z.attrs),
314
365
  chunks=self.chunks,
315
366
  dtype=str(self.dtype),
367
+ path=self.path,
316
368
  )
317
369
 
318
370
  def source(self, index):
@@ -331,6 +383,12 @@ class Zarr(Dataset):
331
383
  name, _ = os.path.splitext(os.path.basename(self.path))
332
384
  names.add(name)
333
385
 
386
+ def collect_supporting_arrays(self, collected, *path):
387
+ pass
388
+
389
+ def collect_input_sources(self, collected):
390
+ pass
391
+
334
392
 
335
393
  class ZarrWithMissingDates(Zarr):
336
394
  """A zarr dataset with missing dates."""
@@ -339,7 +397,7 @@ class ZarrWithMissingDates(Zarr):
339
397
  super().__init__(path)
340
398
 
341
399
  missing_dates = self.z.attrs.get("missing_dates", [])
342
- missing_dates = set([np.datetime64(x) for x in missing_dates])
400
+ missing_dates = set([np.datetime64(x, "s") for x in missing_dates])
343
401
  self.missing_to_dates = {i: d for i, d in enumerate(self.dates) if d in missing_dates}
344
402
  self.missing = set(self.missing_to_dates)
345
403
 
@@ -392,6 +450,9 @@ class ZarrWithMissingDates(Zarr):
392
450
  return "zarr*"
393
451
 
394
452
 
453
+ QUIET = set()
454
+
455
+
395
456
  def zarr_lookup(name, fail=True):
396
457
 
397
458
  if name.endswith(".zarr") or name.endswith(".zip"):
@@ -400,6 +461,9 @@ def zarr_lookup(name, fail=True):
400
461
  config = load_config()["datasets"]
401
462
 
402
463
  if name in config["named"]:
464
+ if name not in QUIET:
465
+ LOG.info("Opening `%s` as `%s`", name, config["named"][name])
466
+ QUIET.add(name)
403
467
  return config["named"][name]
404
468
 
405
469
  tried = []
@@ -413,6 +477,9 @@ def zarr_lookup(name, fail=True):
413
477
  if z is not None:
414
478
  # Cache for next time
415
479
  config["named"][name] = full
480
+ if name not in QUIET:
481
+ LOG.info("Opening `%s` as `%s`", name, full)
482
+ QUIET.add(name)
416
483
  return full
417
484
  except zarr.errors.PathNotFoundError:
418
485
  pass
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -132,6 +135,8 @@ class Subset(Forwards):
132
135
  @cached_property
133
136
  def frequency(self):
134
137
  dates = self.dates
138
+ if len(dates) < 2:
139
+ raise ValueError(f"Cannot determine frequency of a subset with less than two dates ({self.dates}).")
135
140
  return frequency_to_timedelta(dates[1].astype(object) - dates[0].astype(object))
136
141
 
137
142
  def source(self, index):
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
  from functools import wraps
@@ -91,6 +94,11 @@ class Unchecked(Combined):
91
94
  def variables(self):
92
95
  raise NotImplementedError()
93
96
 
97
+ @property
98
+ @check("check_same_variables")
99
+ def variables_metadata(self):
100
+ raise NotImplementedError()
101
+
94
102
  @property
95
103
  @check("check_same_variables")
96
104
  def statistics(self):
@@ -1,10 +1,13 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  from functools import cached_property
10
13
 
@@ -18,15 +21,19 @@ LOG = logging.getLogger(__name__)
18
21
 
19
22
  class ZipBase(Combined):
20
23
 
24
+ def __init__(self, datasets, check_compatibility=True):
25
+ self._check_compatibility = check_compatibility
26
+ super().__init__(datasets)
27
+
21
28
  def swap_with_parent(self, parent):
22
29
  new_parents = [parent.clone(ds) for ds in self.datasets]
23
30
  return self.clone(new_parents)
24
31
 
25
32
  def clone(self, datasets):
26
- return self.__class__(datasets)
33
+ return self.__class__(datasets, check_compatibility=self._check_compatibility)
27
34
 
28
35
  def tree(self):
29
- return Node(self, [d.tree() for d in self.datasets])
36
+ return Node(self, [d.tree() for d in self.datasets], check_compatibility=self._check_compatibility)
30
37
 
31
38
  def __len__(self):
32
39
  return min(len(d) for d in self.datasets)
@@ -86,6 +93,10 @@ class ZipBase(Combined):
86
93
  def name_to_index(self):
87
94
  return tuple(d.name_to_index for d in self.datasets)
88
95
 
96
+ def check_compatibility(self, d1, d2):
97
+ if self._check_compatibility:
98
+ super().check_compatibility(d1, d2)
99
+
89
100
 
90
101
  class Zip(ZipBase):
91
102
  pass
@@ -110,7 +121,9 @@ def xy_factory(args, kwargs):
110
121
 
111
122
  assert len(datasets) == 2
112
123
 
113
- return XY(datasets)._subset(**kwargs)
124
+ check_compatibility = kwargs.pop("check_compatibility", True)
125
+
126
+ return XY(datasets, check_compatibility=check_compatibility)._subset(**kwargs)
114
127
 
115
128
 
116
129
  def zip_factory(args, kwargs):
@@ -122,4 +135,6 @@ def zip_factory(args, kwargs):
122
135
  datasets = [_open(e) for e in zip]
123
136
  datasets, kwargs = _auto_adjust(datasets, kwargs)
124
137
 
125
- return Zip(datasets)._subset(**kwargs)
138
+ check_compatibility = kwargs.pop("check_compatibility", True)
139
+
140
+ return Zip(datasets, check_compatibility=check_compatibility)._subset(**kwargs)
@@ -1,6 +1,8 @@
1
- # (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
@@ -8,6 +10,8 @@
8
10
 
9
11
  import datetime
10
12
  import warnings
13
+ from functools import reduce
14
+ from math import gcd
11
15
 
12
16
  # from anemoi.utils.dates import as_datetime
13
17
  from anemoi.utils.dates import DateTimes
@@ -193,18 +197,16 @@ class HindcastsDates(DatesProvider):
193
197
 
194
198
  dates = sorted(dates)
195
199
 
196
- mindelta = None
200
+ deltas = set()
197
201
  for a, b in zip(dates, dates[1:]):
198
202
  delta = b - a
199
203
  assert isinstance(delta, datetime.timedelta), delta
200
- if mindelta is None:
201
- mindelta = delta
202
- else:
203
- mindelta = min(mindelta, delta)
204
+ deltas.add(delta)
204
205
 
206
+ mindelta_seconds = reduce(gcd, [int(delta.total_seconds()) for delta in deltas])
207
+ mindelta = datetime.timedelta(seconds=mindelta_seconds)
205
208
  self.frequency = mindelta
206
209
  assert mindelta.total_seconds() > 0, mindelta
207
-
208
210
  print("🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥", dates[0], dates[-1], mindelta)
209
211
 
210
212
  # Use all values between start and end by frequency, and set the ones that are missing
@@ -1,6 +1,8 @@
1
- # (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
anemoi/datasets/fields.py CHANGED
@@ -1,6 +1,8 @@
1
- # (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
anemoi/datasets/grids.py CHANGED
@@ -1,12 +1,14 @@
1
- # (C) Copyright 2024 ECMWF.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
2
  #
3
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
4
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
5
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
6
7
  # granted to it by virtue of its status as an intergovernmental organisation
7
8
  # nor does it submit to any jurisdiction.
8
- #
9
9
 
10
+
11
+ import base64
10
12
  import logging
11
13
 
12
14
  import numpy as np
@@ -295,6 +297,88 @@ def thinning_mask(
295
297
  return np.array([i for i in indices])
296
298
 
297
299
 
300
+ def outline(lats, lons, neighbours=5):
301
+ from scipy.spatial import KDTree
302
+
303
+ xyx = latlon_to_xyz(lats, lons)
304
+ grid_points = np.array(xyx).transpose()
305
+
306
+ # Use a KDTree to find the nearest points
307
+ _, indices = KDTree(grid_points).query(grid_points, k=neighbours)
308
+
309
+ # Centre of the Earth
310
+ zero = np.array([0.0, 0.0, 0.0])
311
+
312
+ outside = []
313
+
314
+ for i, (point, index) in enumerate(zip(grid_points, indices)):
315
+ inside = False
316
+ for j in range(1, neighbours):
317
+ t = Triangle3D(
318
+ grid_points[index[j]],
319
+ grid_points[index[(j + 1) % neighbours]],
320
+ grid_points[index[(j + 2) % neighbours]],
321
+ )
322
+ inside = t.intersect(zero, point)
323
+ if inside:
324
+ break
325
+
326
+ if not inside:
327
+ outside.append(i)
328
+
329
+ return outside
330
+
331
+
332
+ def deserialise_mask(encoded):
333
+ import pickle
334
+ import zlib
335
+
336
+ packed = pickle.loads(zlib.decompress(base64.b64decode(encoded)))
337
+
338
+ mask = []
339
+ value = False
340
+ for count in packed:
341
+ mask.extend([value] * count)
342
+ value = not value
343
+ return np.array(mask, dtype=bool)
344
+
345
+
346
+ def _serialise_mask(mask):
347
+ import pickle
348
+ import zlib
349
+
350
+ assert len(mask.shape) == 1
351
+ assert len(mask)
352
+
353
+ packed = []
354
+ last = mask[0]
355
+ count = 1
356
+
357
+ for value in mask[1:]:
358
+ if value == last:
359
+ count += 1
360
+ else:
361
+ packed.append(count)
362
+ last = value
363
+ count = 1
364
+
365
+ packed.append(count)
366
+
367
+ # We always start with an 'off' value
368
+ # So if the first value is 'on', we need to add a zero
369
+ if mask[0]:
370
+ packed.insert(0, 0)
371
+
372
+ return base64.b64encode(zlib.compress(pickle.dumps(packed))).decode("utf-8")
373
+
374
+
375
+ def serialise_mask(mask):
376
+ result = _serialise_mask(mask)
377
+ # Make sure we can deserialise it
378
+ assert np.all(mask == deserialise_mask(result))
379
+ return result
380
+
381
+
298
382
  if __name__ == "__main__":
299
383
  global_lats, global_lons = np.meshgrid(
300
384
  np.linspace(90, -90, 90),
@@ -0,0 +1,60 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
10
+
11
+ # A collection of functions to support pytest testing
12
+
13
+ import logging
14
+
15
+ LOG = logging.getLogger(__name__)
16
+
17
+
18
+ def assert_field_list(fs, size=None, start=None, end=None, constant=False, skip=None):
19
+ import numpy as np
20
+
21
+ if size is None:
22
+ assert len(fs) > 0, fs
23
+ else:
24
+ assert len(fs) == size, (len(fs), size)
25
+
26
+ first = fs[0]
27
+ last = fs[-1]
28
+
29
+ if constant:
30
+ # TODO: add a check for constant fields
31
+ pass
32
+ else:
33
+ assert start is None or first.metadata("valid_datetime") == start, (first.metadata("valid_datetime"), start)
34
+ assert end is None or last.metadata("valid_datetime") == end, (last.metadata("valid_datetime"), end)
35
+ print(first.datetime())
36
+
37
+ print(last.metadata())
38
+
39
+ first = first
40
+ latitudes, longitudes = first.grid_points()
41
+
42
+ assert len(latitudes.shape) == 1, latitudes.shape
43
+ assert len(longitudes.shape) == 1, longitudes.shape
44
+
45
+ assert len(latitudes) == len(longitudes), (len(latitudes), len(longitudes))
46
+ data = first.to_numpy(flatten=True)
47
+
48
+ assert len(data) == len(latitudes), (len(data), len(latitudes))
49
+
50
+ north = np.max(latitudes)
51
+ south = np.min(latitudes)
52
+ east = np.max(longitudes)
53
+ west = np.min(longitudes)
54
+
55
+ assert north >= south, (north, south)
56
+ assert east >= west, (east, west)
57
+ assert north <= 90, north
58
+ assert south >= -90, south
59
+ assert east <= 360, east
60
+ assert west >= -180, west
@@ -0,0 +1,8 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
@@ -1,11 +1,11 @@
1
- # (C) Copyright 2024 ECMWF.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
2
  #
3
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
4
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
5
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
6
7
  # granted to it by virtue of its status as an intergovernmental organisation
7
8
  # nor does it submit to any jurisdiction.
8
- #
9
9
 
10
10
 
11
11
  class WrappedField: