anemoi-datasets 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/cleanup.py +44 -0
  3. anemoi/datasets/commands/create.py +52 -21
  4. anemoi/datasets/commands/finalise-additions.py +45 -0
  5. anemoi/datasets/commands/finalise.py +39 -0
  6. anemoi/datasets/commands/init-additions.py +45 -0
  7. anemoi/datasets/commands/init.py +67 -0
  8. anemoi/datasets/commands/inspect.py +1 -1
  9. anemoi/datasets/commands/load-additions.py +47 -0
  10. anemoi/datasets/commands/load.py +47 -0
  11. anemoi/datasets/commands/patch.py +39 -0
  12. anemoi/datasets/create/__init__.py +959 -146
  13. anemoi/datasets/create/check.py +5 -3
  14. anemoi/datasets/create/config.py +54 -2
  15. anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +57 -0
  16. anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +57 -0
  17. anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +54 -0
  18. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +59 -0
  19. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +115 -0
  20. anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +390 -0
  21. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +77 -0
  22. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +55 -0
  23. anemoi/datasets/create/functions/sources/grib.py +86 -1
  24. anemoi/datasets/create/functions/sources/hindcasts.py +14 -73
  25. anemoi/datasets/create/functions/sources/mars.py +9 -3
  26. anemoi/datasets/create/functions/sources/xarray/__init__.py +12 -2
  27. anemoi/datasets/create/functions/sources/xarray/coordinates.py +7 -0
  28. anemoi/datasets/create/functions/sources/xarray/field.py +8 -2
  29. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +0 -2
  30. anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -1
  31. anemoi/datasets/create/functions/sources/xarray/metadata.py +40 -40
  32. anemoi/datasets/create/functions/sources/xarray/time.py +63 -30
  33. anemoi/datasets/create/functions/sources/xarray/variable.py +15 -38
  34. anemoi/datasets/create/input.py +62 -39
  35. anemoi/datasets/create/persistent.py +1 -1
  36. anemoi/datasets/create/statistics/__init__.py +39 -23
  37. anemoi/datasets/create/utils.py +6 -2
  38. anemoi/datasets/data/__init__.py +1 -0
  39. anemoi/datasets/data/concat.py +46 -2
  40. anemoi/datasets/data/dataset.py +119 -34
  41. anemoi/datasets/data/debug.py +5 -1
  42. anemoi/datasets/data/forwards.py +17 -8
  43. anemoi/datasets/data/grids.py +17 -3
  44. anemoi/datasets/data/interpolate.py +133 -0
  45. anemoi/datasets/data/masked.py +2 -2
  46. anemoi/datasets/data/misc.py +56 -66
  47. anemoi/datasets/data/missing.py +240 -0
  48. anemoi/datasets/data/rescale.py +147 -0
  49. anemoi/datasets/data/select.py +7 -1
  50. anemoi/datasets/data/stores.py +23 -10
  51. anemoi/datasets/data/subset.py +47 -5
  52. anemoi/datasets/data/unchecked.py +20 -22
  53. anemoi/datasets/data/xy.py +125 -0
  54. anemoi/datasets/dates/__init__.py +124 -95
  55. anemoi/datasets/dates/groups.py +85 -20
  56. anemoi/datasets/grids.py +66 -48
  57. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/METADATA +8 -17
  58. anemoi_datasets-0.5.0.dist-info/RECORD +105 -0
  59. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/WHEEL +1 -1
  60. anemoi/datasets/create/loaders.py +0 -936
  61. anemoi_datasets-0.4.4.dist-info/RECORD +0 -86
  62. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/LICENSE +0 -0
  63. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/entry_points.txt +0 -0
  64. {anemoi_datasets-0.4.4.dist-info → anemoi_datasets-0.5.0.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@ import re
12
12
  import warnings
13
13
 
14
14
  import numpy as np
15
+ from anemoi.utils.dates import frequency_to_string
15
16
 
16
17
  LOG = logging.getLogger(__name__)
17
18
 
@@ -56,10 +57,11 @@ class DatasetName:
56
57
  raise ValueError(self.error_message)
57
58
 
58
59
  def _parse(self, name):
59
- pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)$"
60
+ pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)?$"
60
61
  match = re.match(pattern, name)
61
62
 
62
- assert match, (name, pattern)
63
+ if not match:
64
+ raise ValueError(f"the dataset name '{name}' does not follow naming convention. Does not match {pattern}")
63
65
 
64
66
  parsed = {}
65
67
  if match:
@@ -105,7 +107,7 @@ class DatasetName:
105
107
  def check_frequency(self, frequency):
106
108
  if frequency is None:
107
109
  return
108
- frequency_str = f"{frequency}h"
110
+ frequency_str = frequency_to_string(frequency)
109
111
  self._check_missing("frequency", frequency_str)
110
112
  self._check_mismatch("frequency", frequency_str)
111
113
 
@@ -16,6 +16,8 @@ from anemoi.utils.config import DotDict
16
16
  from anemoi.utils.config import load_any_dict_format
17
17
  from earthkit.data.core.order import normalize_order_by
18
18
 
19
+ from anemoi.datasets.dates.groups import Groups
20
+
19
21
  LOG = logging.getLogger(__name__)
20
22
 
21
23
 
@@ -153,6 +155,8 @@ class LoadersConfig(Config):
153
155
  raise ValueError("statistics_end is not supported anymore. Use 'statistics:end:' instead")
154
156
 
155
157
  self.setdefault("statistics", Config())
158
+ if "allow_nans" not in self.statistics:
159
+ self.statistics.allow_nans = []
156
160
 
157
161
  check_dict_value_and_set(self.output, "flatten_grid", True)
158
162
  check_dict_value_and_set(self.output, "ensemble_dimension", 2)
@@ -207,8 +211,50 @@ def _prepare_serialisation(o):
207
211
  return str(o)
208
212
 
209
213
 
210
- def loader_config(config):
214
+ def set_to_test_mode(cfg):
215
+ NUMBER_OF_DATES = 4
216
+
217
+ dates = cfg["dates"]
218
+ LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
219
+ groups = Groups(**LoadersConfig(cfg).dates)
220
+
221
+ dates = groups.dates
222
+ cfg["dates"] = dict(
223
+ start=dates[0],
224
+ end=dates[NUMBER_OF_DATES - 1],
225
+ frequency=dates.frequency,
226
+ group_by=NUMBER_OF_DATES,
227
+ )
228
+
229
+ def set_element_to_test(obj):
230
+ if isinstance(obj, (list, tuple)):
231
+ for v in obj:
232
+ set_element_to_test(v)
233
+ return
234
+ if isinstance(obj, (dict, DotDict)):
235
+ if "grid" in obj:
236
+ previous = obj["grid"]
237
+ obj["grid"] = "20./20."
238
+ LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
239
+ if "number" in obj:
240
+ if isinstance(obj["number"], (list, tuple)):
241
+ previous = obj["number"]
242
+ obj["number"] = previous[0:3]
243
+ LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
244
+ for k, v in obj.items():
245
+ set_element_to_test(v)
246
+ if "constants" in obj:
247
+ constants = obj["constants"]
248
+ if "param" in constants and isinstance(constants["param"], list):
249
+ constants["param"] = ["cos_latitude"]
250
+
251
+ set_element_to_test(cfg)
252
+
253
+
254
+ def loader_config(config, is_test=False):
211
255
  config = Config(config)
256
+ if is_test:
257
+ set_to_test_mode(config)
212
258
  obj = LoadersConfig(config)
213
259
 
214
260
  # yaml round trip to check that serialisation works as expected
@@ -216,7 +262,13 @@ def loader_config(config):
216
262
  copy = yaml.load(yaml.dump(copy), Loader=yaml.SafeLoader)
217
263
  copy = Config(copy)
218
264
  copy = LoadersConfig(config)
219
- assert yaml.dump(obj) == yaml.dump(copy), (obj, copy)
265
+
266
+ a = yaml.dump(obj)
267
+ b = yaml.dump(copy)
268
+ if a != b:
269
+ print(a)
270
+ print(b)
271
+ raise ValueError("Serialisation failed")
220
272
 
221
273
  return copy
222
274
 
@@ -0,0 +1,57 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+
18
+ def execute(context, input, t, rh, q="q"):
19
+ """Convert relative humidity on pressure levels to specific humidity"""
20
+ result = FieldArray()
21
+
22
+ params = (t, rh)
23
+ pairs = defaultdict(dict)
24
+
25
+ # Gather all necessary fields
26
+ for f in input:
27
+ key = f.metadata(namespace="mars")
28
+ param = key.pop("param")
29
+ if param in params:
30
+ key = tuple(key.items())
31
+
32
+ if param in pairs[key]:
33
+ raise ValueError(f"Duplicate field {param} for {key}")
34
+
35
+ pairs[key][param] = f
36
+ if param == t:
37
+ result.append(f)
38
+ # all other parameters
39
+ else:
40
+ result.append(f)
41
+
42
+ for keys, values in pairs.items():
43
+ # some checks
44
+
45
+ if len(values) != 2:
46
+ raise ValueError("Missing fields")
47
+
48
+ t_pl = values[t].to_numpy(flatten=True)
49
+ rh_pl = values[rh].to_numpy(flatten=True)
50
+ pressure = keys[4][1] * 100 # TODO: REMOVE HARDCODED INDICES
51
+ # print(f"Handling fields for pressure level {pressure}...")
52
+
53
+ # actual conversion from rh --> q_v
54
+ q_pl = thermo.specific_humidity_from_relative_humidity(t_pl, rh_pl, pressure)
55
+ result.append(NewDataField(values[rh], q_pl, q))
56
+
57
+ return result
@@ -0,0 +1,57 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+
18
+ def execute(context, input, t, q, rh="r"):
19
+ """Convert specific humidity on pressure levels to relative humidity"""
20
+ result = FieldArray()
21
+
22
+ params = (t, q)
23
+ pairs = defaultdict(dict)
24
+
25
+ # Gather all necessary fields
26
+ for f in input:
27
+ key = f.metadata(namespace="mars")
28
+ param = key.pop("param")
29
+ if param in params:
30
+ key = tuple(key.items())
31
+
32
+ if param in pairs[key]:
33
+ raise ValueError(f"Duplicate field {param} for {key}")
34
+
35
+ pairs[key][param] = f
36
+ if param == t:
37
+ result.append(f)
38
+ # all other parameters
39
+ else:
40
+ result.append(f)
41
+
42
+ for keys, values in pairs.items():
43
+ # some checks
44
+
45
+ if len(values) != 2:
46
+ raise ValueError("Missing fields")
47
+
48
+ t_pl = values[t].to_numpy(flatten=True)
49
+ q_pl = values[q].to_numpy(flatten=True)
50
+ pressure = keys[4][1] * 100 # TODO: REMOVE HARDCODED INDICES
51
+ # print(f"Handling fields for pressure level {pressure}...")
52
+
53
+ # actual conversion from rh --> q_v
54
+ rh_pl = thermo.relative_humidity_from_specific_humidity(t_pl, q_pl, pressure)
55
+ result.append(NewDataField(values[q], rh_pl, rh))
56
+
57
+ return result
@@ -0,0 +1,54 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+
18
+ def execute(context, input, t, td, rh="d"):
19
+ """Convert relative humidity on single levels to dewpoint"""
20
+ result = FieldArray()
21
+
22
+ params = (t, td)
23
+ pairs = defaultdict(dict)
24
+
25
+ # Gather all necessary fields
26
+ for f in input:
27
+ key = f.metadata(namespace="mars")
28
+ param = key.pop("param")
29
+ if param in params:
30
+ key = tuple(key.items())
31
+
32
+ if param in pairs[key]:
33
+ raise ValueError(f"Duplicate field {param} for {key}")
34
+
35
+ pairs[key][param] = f
36
+ if param == t:
37
+ result.append(f)
38
+ # all other parameters
39
+ else:
40
+ result.append(f)
41
+
42
+ for keys, values in pairs.items():
43
+ # some checks
44
+
45
+ if len(values) != 2:
46
+ raise ValueError("Missing fields")
47
+
48
+ t_values = values[t].to_numpy(flatten=True)
49
+ td_values = values[td].to_numpy(flatten=True)
50
+ # actual conversion from td --> rh
51
+ rh_values = thermo.relative_humidity_from_dewpoint(t=t_values, td=td_values)
52
+ result.append(NewDataField(values[td], rh_values, rh))
53
+
54
+ return result
@@ -0,0 +1,59 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+ from collections import defaultdict
11
+
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
16
+
17
+ EPS = 1.0e-4
18
+
19
+
20
+ def execute(context, input, t, rh, td="d"):
21
+ """Convert relative humidity on single levels to dewpoint"""
22
+ result = FieldArray()
23
+
24
+ params = (t, rh)
25
+ pairs = defaultdict(dict)
26
+
27
+ # Gather all necessary fields
28
+ for f in input:
29
+ key = f.metadata(namespace="mars")
30
+ param = key.pop("param")
31
+ if param in params:
32
+ key = tuple(key.items())
33
+
34
+ if param in pairs[key]:
35
+ raise ValueError(f"Duplicate field {param} for {key}")
36
+
37
+ pairs[key][param] = f
38
+ if param == t:
39
+ result.append(f)
40
+ # all other parameters
41
+ else:
42
+ result.append(f)
43
+
44
+ for keys, values in pairs.items():
45
+ # some checks
46
+
47
+ if len(values) != 2:
48
+ raise ValueError("Missing fields")
49
+
50
+ t_values = values[t].to_numpy(flatten=True)
51
+ rh_values = values[rh].to_numpy(flatten=True)
52
+ # Prevent 0 % Relative humidity which cannot be converted to dewpoint
53
+ # Seems to happen over Egypt in the CERRA dataset
54
+ rh_values[rh_values == 0] = EPS
55
+ # actual conversion from rh --> td
56
+ td_values = thermo.dewpoint_from_relative_humidity(t=t_values, r=rh_values)
57
+ result.append(NewDataField(values[rh], td_values, td))
58
+
59
+ return result
@@ -0,0 +1,115 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+
10
+
11
+ import numpy as np
12
+ from earthkit.data.indexing.fieldlist import FieldArray
13
+ from earthkit.meteo import thermo
14
+
15
+ from .single_level_specific_humidity_to_relative_humidity import AutoDict
16
+ from .single_level_specific_humidity_to_relative_humidity import NewDataField
17
+ from .single_level_specific_humidity_to_relative_humidity import pressure_at_height_level
18
+
19
+
20
+ def execute(context, input, height, t, rh, sp, new_name="2q", **kwargs):
21
+ """Convert the single (height) level relative humidity to specific humidity"""
22
+ result = FieldArray()
23
+
24
+ MANDATORY_KEYS = ["A", "B"]
25
+ OPTIONAL_KEYS = ["t_ml", "q_ml"]
26
+ MISSING_KEYS = []
27
+ DEFAULTS = dict(t_ml="t", q_ml="q")
28
+
29
+ for key in OPTIONAL_KEYS:
30
+ if key not in kwargs:
31
+ print(f"key {key} not found in yaml-file, using default key: {DEFAULTS[key]}")
32
+ kwargs[key] = DEFAULTS[key]
33
+
34
+ for key in MANDATORY_KEYS:
35
+ if key not in kwargs:
36
+ MISSING_KEYS.append(key)
37
+
38
+ if MISSING_KEYS:
39
+ raise KeyError(f"Following keys are missing: {', '.join(MISSING_KEYS)}")
40
+
41
+ single_level_params = (t, rh, sp)
42
+ model_level_params = (kwargs["t_ml"], kwargs["q_ml"])
43
+
44
+ needed_fields = AutoDict()
45
+
46
+ # Gather all necessary fields
47
+ for f in input:
48
+ key = f.metadata(namespace="mars")
49
+ param = key.pop("param")
50
+ # check single level parameters
51
+ if param in single_level_params:
52
+ levtype = key.pop("levtype")
53
+ key = tuple(key.items())
54
+
55
+ if param in needed_fields[key][levtype]:
56
+ raise ValueError(f"Duplicate single level field {param} for {key}")
57
+
58
+ needed_fields[key][levtype][param] = f
59
+ if param == rh:
60
+ if kwargs.get("keep_rh", False):
61
+ result.append(f)
62
+ else:
63
+ result.append(f)
64
+
65
+ # check model level parameters
66
+ elif param in model_level_params:
67
+ levtype = key.pop("levtype")
68
+ levelist = key.pop("levelist")
69
+ key = tuple(key.items())
70
+
71
+ if param in needed_fields[key][levtype][levelist]:
72
+ raise ValueError(f"Duplicate model level field {param} for {key} at level {levelist}")
73
+
74
+ needed_fields[key][levtype][levelist][param] = f
75
+
76
+ # all other parameters
77
+ else:
78
+ result.append(f)
79
+
80
+ for _, values in needed_fields.items():
81
+ # some checks
82
+ if len(values["sfc"]) != 3:
83
+ raise ValueError("Missing surface fields")
84
+
85
+ rh_sl = values["sfc"][rh].to_numpy(flatten=True)
86
+ t_sl = values["sfc"][t].to_numpy(flatten=True)
87
+ sp_sl = values["sfc"][sp].to_numpy(flatten=True)
88
+
89
+ nlevels = len(kwargs["A"]) - 1
90
+ if len(values["ml"]) != nlevels:
91
+ raise ValueError("Missing model levels")
92
+
93
+ for key in values["ml"].keys():
94
+ if len(values["ml"][key]) != 2:
95
+ raise ValueError(f"Missing field on level {key}")
96
+
97
+ # create 3D arrays for upper air fields
98
+ levels = list(values["ml"].keys())
99
+ levels.sort()
100
+ t_ml = []
101
+ q_ml = []
102
+ for level in levels:
103
+ t_ml.append(values["ml"][level][kwargs["t_ml"]].to_numpy(flatten=True))
104
+ q_ml.append(values["ml"][level][kwargs["q_ml"]].to_numpy(flatten=True))
105
+
106
+ t_ml = np.stack(t_ml)
107
+ q_ml = np.stack(q_ml)
108
+
109
+ # actual conversion from rh --> q_v
110
+ p_sl = pressure_at_height_level(height, q_ml, t_ml, sp_sl, np.array(kwargs["A"]), np.array(kwargs["B"]))
111
+ q_sl = thermo.specific_humidity_from_relative_humidity(t_sl, rh_sl, p_sl)
112
+
113
+ result.append(NewDataField(values["sfc"][rh], q_sl, new_name))
114
+
115
+ return result