anemoi-datasets 0.5.6__py3-none-any.whl → 0.5.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. anemoi/datasets/__init__.py +11 -3
  2. anemoi/datasets/__main__.py +2 -3
  3. anemoi/datasets/_version.py +2 -2
  4. anemoi/datasets/commands/__init__.py +2 -3
  5. anemoi/datasets/commands/cleanup.py +9 -0
  6. anemoi/datasets/commands/compare.py +3 -3
  7. anemoi/datasets/commands/copy.py +38 -68
  8. anemoi/datasets/commands/create.py +20 -5
  9. anemoi/datasets/commands/finalise-additions.py +9 -0
  10. anemoi/datasets/commands/finalise.py +9 -0
  11. anemoi/datasets/commands/init-additions.py +9 -0
  12. anemoi/datasets/commands/init.py +9 -0
  13. anemoi/datasets/commands/inspect.py +7 -1
  14. anemoi/datasets/commands/load-additions.py +9 -0
  15. anemoi/datasets/commands/load.py +9 -0
  16. anemoi/datasets/commands/patch.py +9 -0
  17. anemoi/datasets/commands/publish.py +9 -0
  18. anemoi/datasets/commands/scan.py +9 -0
  19. anemoi/datasets/compute/__init__.py +8 -0
  20. anemoi/datasets/compute/recentre.py +3 -2
  21. anemoi/datasets/create/__init__.py +64 -48
  22. anemoi/datasets/create/check.py +4 -3
  23. anemoi/datasets/create/chunks.py +3 -2
  24. anemoi/datasets/create/config.py +5 -5
  25. anemoi/datasets/create/functions/__init__.py +22 -7
  26. anemoi/datasets/create/functions/filters/__init__.py +2 -1
  27. anemoi/datasets/create/functions/filters/empty.py +3 -2
  28. anemoi/datasets/create/functions/filters/noop.py +2 -2
  29. anemoi/datasets/create/functions/filters/pressure_level_relative_humidity_to_specific_humidity.py +3 -2
  30. anemoi/datasets/create/functions/filters/pressure_level_specific_humidity_to_relative_humidity.py +3 -2
  31. anemoi/datasets/create/functions/filters/rename.py +16 -10
  32. anemoi/datasets/create/functions/filters/rotate_winds.py +3 -2
  33. anemoi/datasets/create/functions/filters/single_level_dewpoint_to_relative_humidity.py +3 -2
  34. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_dewpoint.py +3 -2
  35. anemoi/datasets/create/functions/filters/single_level_relative_humidity_to_specific_humidity.py +2 -2
  36. anemoi/datasets/create/functions/filters/single_level_specific_humidity_to_relative_humidity.py +2 -2
  37. anemoi/datasets/create/functions/filters/speeddir_to_uv.py +3 -2
  38. anemoi/datasets/create/functions/filters/unrotate_winds.py +3 -2
  39. anemoi/datasets/create/functions/filters/uv_to_speeddir.py +3 -2
  40. anemoi/datasets/create/functions/sources/__init__.py +2 -2
  41. anemoi/datasets/create/functions/sources/accumulations.py +10 -4
  42. anemoi/datasets/create/functions/sources/constants.py +3 -2
  43. anemoi/datasets/create/functions/sources/empty.py +3 -2
  44. anemoi/datasets/create/functions/sources/forcings.py +3 -2
  45. anemoi/datasets/create/functions/sources/grib.py +2 -2
  46. anemoi/datasets/create/functions/sources/hindcasts.py +3 -2
  47. anemoi/datasets/create/functions/sources/mars.py +97 -17
  48. anemoi/datasets/create/functions/sources/netcdf.py +3 -2
  49. anemoi/datasets/create/functions/sources/opendap.py +2 -2
  50. anemoi/datasets/create/functions/sources/recentre.py +3 -2
  51. anemoi/datasets/create/functions/sources/source.py +3 -2
  52. anemoi/datasets/create/functions/sources/tendencies.py +3 -2
  53. anemoi/datasets/create/functions/sources/xarray/__init__.py +8 -2
  54. anemoi/datasets/create/functions/sources/xarray/coordinates.py +5 -2
  55. anemoi/datasets/create/functions/sources/xarray/field.py +3 -2
  56. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +12 -2
  57. anemoi/datasets/create/functions/sources/xarray/flavour.py +21 -16
  58. anemoi/datasets/create/functions/sources/xarray/grid.py +3 -2
  59. anemoi/datasets/create/functions/sources/xarray/metadata.py +3 -2
  60. anemoi/datasets/create/functions/sources/xarray/time.py +39 -4
  61. anemoi/datasets/create/functions/sources/xarray/variable.py +6 -6
  62. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +2 -2
  63. anemoi/datasets/create/functions/sources/xarray_zarr.py +2 -2
  64. anemoi/datasets/create/functions/sources/zenodo.py +2 -2
  65. anemoi/datasets/create/input/__init__.py +3 -17
  66. anemoi/datasets/create/input/action.py +3 -2
  67. anemoi/datasets/create/input/concat.py +3 -2
  68. anemoi/datasets/create/input/context.py +3 -2
  69. anemoi/datasets/create/input/data_sources.py +3 -2
  70. anemoi/datasets/create/input/empty.py +3 -2
  71. anemoi/datasets/create/input/filter.py +3 -2
  72. anemoi/datasets/create/input/function.py +3 -2
  73. anemoi/datasets/create/input/join.py +3 -2
  74. anemoi/datasets/create/input/misc.py +3 -2
  75. anemoi/datasets/create/input/pipe.py +3 -2
  76. anemoi/datasets/create/input/repeated_dates.py +3 -2
  77. anemoi/datasets/create/input/result.py +187 -3
  78. anemoi/datasets/create/input/step.py +4 -2
  79. anemoi/datasets/create/input/template.py +3 -2
  80. anemoi/datasets/create/input/trace.py +3 -2
  81. anemoi/datasets/create/patch.py +9 -1
  82. anemoi/datasets/create/persistent.py +7 -3
  83. anemoi/datasets/create/size.py +3 -2
  84. anemoi/datasets/create/statistics/__init__.py +7 -3
  85. anemoi/datasets/create/statistics/summary.py +3 -2
  86. anemoi/datasets/create/utils.py +15 -2
  87. anemoi/datasets/create/writer.py +3 -2
  88. anemoi/datasets/create/zarr.py +8 -3
  89. anemoi/datasets/data/__init__.py +27 -1
  90. anemoi/datasets/data/concat.py +5 -1
  91. anemoi/datasets/data/dataset.py +216 -37
  92. anemoi/datasets/data/debug.py +4 -1
  93. anemoi/datasets/data/ensemble.py +4 -1
  94. anemoi/datasets/data/fill_missing.py +165 -0
  95. anemoi/datasets/data/forwards.py +27 -2
  96. anemoi/datasets/data/grids.py +236 -58
  97. anemoi/datasets/data/indexing.py +4 -1
  98. anemoi/datasets/data/interpolate.py +4 -1
  99. anemoi/datasets/data/join.py +17 -1
  100. anemoi/datasets/data/masked.py +36 -10
  101. anemoi/datasets/data/merge.py +180 -0
  102. anemoi/datasets/data/misc.py +18 -3
  103. anemoi/datasets/data/missing.py +4 -1
  104. anemoi/datasets/data/rescale.py +4 -1
  105. anemoi/datasets/data/select.py +15 -1
  106. anemoi/datasets/data/statistics.py +4 -1
  107. anemoi/datasets/data/stores.py +70 -3
  108. anemoi/datasets/data/subset.py +6 -1
  109. anemoi/datasets/data/unchecked.py +9 -1
  110. anemoi/datasets/data/xy.py +20 -5
  111. anemoi/datasets/dates/__init__.py +9 -7
  112. anemoi/datasets/dates/groups.py +3 -1
  113. anemoi/datasets/fields.py +3 -1
  114. anemoi/datasets/grids.py +86 -2
  115. anemoi/datasets/testing.py +60 -0
  116. anemoi/datasets/utils/__init__.py +8 -0
  117. anemoi/datasets/utils/fields.py +2 -2
  118. {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/METADATA +11 -29
  119. anemoi_datasets-0.5.10.dist-info/RECORD +124 -0
  120. {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/WHEEL +1 -1
  121. anemoi_datasets-0.5.6.dist-info/RECORD +0 -121
  122. {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/LICENSE +0 -0
  123. {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/entry_points.txt +0 -0
  124. {anemoi_datasets-0.5.6.dist-info → anemoi_datasets-0.5.10.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,27 @@
1
- # (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
8
- from ._version import __version__
9
10
  from .data import MissingDateError
10
11
  from .data import add_dataset_path
11
12
  from .data import add_named_dataset
12
13
  from .data import list_dataset_names
13
14
  from .data import open_dataset
14
15
 
16
+ try:
17
+ # NOTE: the `_version.py` file must not be present in the git repository
18
+ # as it is generated by setuptools at install time
19
+ from ._version import __version__ # type: ignore
20
+ except ImportError: # pragma: no cover
21
+ # Local copy or not installed with setuptools
22
+ __version__ = "999"
23
+
15
24
  __all__ = [
16
- "__version__",
17
25
  "add_dataset_path",
18
26
  "add_named_dataset",
19
27
  "list_dataset_names",
@@ -1,12 +1,11 @@
1
- #!/usr/bin/env python
2
- # (C) Copyright 2024 ECMWF.
1
+ # (C) Copyright 2024 Anemoi contributors.
3
2
  #
4
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
5
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
- #
10
9
 
11
10
  from anemoi.utils.cli import cli_main
12
11
  from anemoi.utils.cli import make_parser
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.5.6'
16
- __version_tuple__ = version_tuple = (0, 5, 6)
15
+ __version__ = version = '0.5.10'
16
+ __version_tuple__ = version_tuple = (0, 5, 10)
@@ -1,12 +1,11 @@
1
- #!/usr/bin/env python
2
- # (C) Copyright 2024 ECMWF.
1
+ # (C) Copyright 2024 Anemoi contributors.
3
2
  #
4
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
5
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
- #
10
9
 
11
10
  import os
12
11
 
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import logging
2
11
  import time
3
12
 
@@ -1,12 +1,12 @@
1
- #!/usr/bin/env python
2
- # (C) Copyright 2024 ECMWF.
1
+ # (C) Copyright 2024 Anemoi contributors.
3
2
  #
4
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
5
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
7
7
  # granted to it by virtue of its status as an intergovernmental organisation
8
8
  # nor does it submit to any jurisdiction.
9
- #
9
+
10
10
 
11
11
  import numpy as np
12
12
  import tqdm
@@ -1,20 +1,22 @@
1
- # (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
7
9
 
10
+
8
11
  import logging
9
12
  import os
10
- import shutil
11
13
  import sys
12
14
  from concurrent.futures import ThreadPoolExecutor
13
15
  from concurrent.futures import as_completed
14
16
 
15
17
  import tqdm
16
- from anemoi.utils.s3 import download
17
- from anemoi.utils.s3 import upload
18
+ from anemoi.utils.remote import Transfer
19
+ from anemoi.utils.remote import TransferMethodNotImplementedError
18
20
 
19
21
  from . import Command
20
22
 
@@ -26,54 +28,7 @@ except AttributeError:
26
28
  isatty = False
27
29
 
28
30
 
29
- class S3Downloader:
30
- def __init__(self, source, target, transfers, overwrite, resume, verbosity, **kwargs):
31
- self.source = source
32
- self.target = target
33
- self.transfers = transfers
34
- self.overwrite = overwrite
35
- self.resume = resume
36
- self.verbosity = verbosity
37
-
38
- def run(self):
39
- if self.target == ".":
40
- self.target = os.path.basename(self.source)
41
-
42
- if self.overwrite and os.path.exists(self.target):
43
- LOG.info(f"Deleting {self.target}")
44
- shutil.rmtree(self.target)
45
-
46
- download(
47
- self.source + "/" if not self.source.endswith("/") else self.source,
48
- self.target,
49
- overwrite=self.overwrite,
50
- resume=self.resume,
51
- verbosity=self.verbosity,
52
- threads=self.transfers,
53
- )
54
-
55
-
56
- class S3Uploader:
57
- def __init__(self, source, target, transfers, overwrite, resume, verbosity, **kwargs):
58
- self.source = source
59
- self.target = target
60
- self.transfers = transfers
61
- self.overwrite = overwrite
62
- self.resume = resume
63
- self.verbosity = verbosity
64
-
65
- def run(self):
66
- upload(
67
- self.source,
68
- self.target,
69
- overwrite=self.overwrite,
70
- resume=self.resume,
71
- verbosity=self.verbosity,
72
- threads=self.transfers,
73
- )
74
-
75
-
76
- class DefaultCopier:
31
+ class ZarrCopier:
77
32
  def __init__(self, source, target, transfers, block_size, overwrite, resume, verbosity, nested, rechunk, **kwargs):
78
33
  self.source = source
79
34
  self.target = target
@@ -87,6 +42,14 @@ class DefaultCopier:
87
42
 
88
43
  self.rechunking = rechunk.split(",") if rechunk else []
89
44
 
45
+ source_is_ssh = self.source.startswith("ssh://")
46
+ target_is_ssh = self.target.startswith("ssh://")
47
+
48
+ if source_is_ssh or target_is_ssh:
49
+ if self.rechunk:
50
+ raise NotImplementedError("Rechunking with SSH not implemented.")
51
+ assert NotImplementedError("SSH not implemented.")
52
+
90
53
  def _store(self, path, nested=False):
91
54
  if nested:
92
55
  import zarr
@@ -334,26 +297,33 @@ class CopyMixin:
334
297
  if args.source == args.target:
335
298
  raise ValueError("Source and target are the same.")
336
299
 
337
- kwargs = vars(args)
338
-
339
300
  if args.overwrite and args.resume:
340
301
  raise ValueError("Cannot use --overwrite and --resume together.")
341
302
 
342
- source_in_s3 = args.source.startswith("s3://")
343
- target_in_s3 = args.target.startswith("s3://")
344
-
345
- copier = None
346
-
347
- if args.rechunk or (source_in_s3 and target_in_s3):
348
- copier = DefaultCopier(**kwargs)
349
- else:
350
- if source_in_s3:
351
- copier = S3Downloader(**kwargs)
352
-
353
- if target_in_s3:
354
- copier = S3Uploader(**kwargs)
355
-
303
+ if not args.rechunk:
304
+ # rechunking is only supported for ZARR datasets, it is implemented in this package
305
+ try:
306
+ if args.source.startswith("s3://") and not args.source.endswith("/"):
307
+ args.source = args.source + "/"
308
+ copier = Transfer(
309
+ args.source,
310
+ args.target,
311
+ overwrite=args.overwrite,
312
+ resume=args.resume,
313
+ verbosity=args.verbosity,
314
+ threads=args.transfers,
315
+ )
316
+ copier.run()
317
+ return
318
+ except TransferMethodNotImplementedError:
319
+ # DataTransfer relies on anemoi-utils which is agnostic to the source and target format
320
+ # it transfers file and folders, ignoring that it is zarr data
321
+ # if it is not implemented, we fallback to the ZarrCopier
322
+ pass
323
+
324
+ copier = ZarrCopier(**vars(args))
356
325
  copier.run()
326
+ return
357
327
 
358
328
 
359
329
  class Copy(CopyMixin, Command):
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import datetime
2
11
  import logging
3
12
  import time
@@ -14,9 +23,7 @@ LOG = logging.getLogger(__name__)
14
23
 
15
24
 
16
25
  def task(what, options, *args, **kwargs):
17
- """
18
- Make sure `import Creator` is done in the sub-processes, and not in the main one.
19
- """
26
+ """Make sure `import Creator` is done in the sub-processes, and not in the main one."""
20
27
 
21
28
  now = datetime.datetime.now()
22
29
  LOG.info(f"🎬 Task {what}({args},{kwargs}) starting")
@@ -117,7 +124,9 @@ class Create(Command):
117
124
  opt["parts"] = f"{n+1}/{total}"
118
125
  futures.append(executor.submit(task, "load", opt))
119
126
 
120
- for future in tqdm.tqdm(as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1): # fmt: skip
127
+ for future in tqdm.tqdm(
128
+ as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1
129
+ ):
121
130
  future.result()
122
131
 
123
132
  with ExecutorClass(max_workers=1) as executor:
@@ -133,7 +142,13 @@ class Create(Command):
133
142
  for n in range(total):
134
143
  futures.append(executor.submit(task, "load-additions", opt))
135
144
 
136
- for future in tqdm.tqdm(as_completed(futures), desc="Computing additions", total=len(futures), colour="green", position=parallel + 1): # fmt: skip
145
+ for future in tqdm.tqdm(
146
+ as_completed(futures),
147
+ desc="Computing additions",
148
+ total=len(futures),
149
+ colour="green",
150
+ position=parallel + 1,
151
+ ):
137
152
  future.result()
138
153
 
139
154
  with ExecutorClass(max_workers=1) as executor:
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import logging
2
11
  import time
3
12
 
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import logging
2
11
  import time
3
12
 
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import logging
2
11
  import time
3
12
 
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import logging
2
11
  import time
3
12
 
@@ -1,6 +1,8 @@
1
- # (C) Copyright 2023 European Centre for Medium-Range Weather Forecasts.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
2
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
3
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
4
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
5
7
  # granted to it by virtue of its status as an intergovernmental organisation
6
8
  # nor does it submit to any jurisdiction.
@@ -506,6 +508,10 @@ class Version0_6(Version):
506
508
  def variables(self):
507
509
  return self.metadata["variables"]
508
510
 
511
+ @property
512
+ def variables_metadata(self):
513
+ return self.metadata.get("variables_metadata", {})
514
+
509
515
 
510
516
  class Version0_12(Version0_6):
511
517
  def details(self):
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import logging
2
11
  import time
3
12
 
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import logging
2
11
  import time
3
12
 
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import logging
2
11
  import time
3
12
 
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import logging
2
11
 
3
12
  from . import Command
@@ -1,3 +1,12 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+
1
10
  import fnmatch
2
11
  import os
3
12
  import sys
@@ -0,0 +1,8 @@
1
+ # (C) Copyright 2024 Anemoi contributors.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
@@ -1,11 +1,12 @@
1
- # (C) Copyright 2024 ECMWF.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
2
  #
3
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
4
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
5
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
6
7
  # granted to it by virtue of its status as an intergovernmental organisation
7
8
  # nor does it submit to any jurisdiction.
8
- #
9
+
9
10
 
10
11
  import logging
11
12
 
@@ -1,11 +1,11 @@
1
- # (C) Copyright 2023 ECMWF.
1
+ # (C) Copyright 2024 Anemoi contributors.
2
2
  #
3
3
  # This software is licensed under the terms of the Apache Licence Version 2.0
4
4
  # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ #
5
6
  # In applying this licence, ECMWF does not waive the privileges and immunities
6
7
  # granted to it by virtue of its status as an intergovernmental organisation
7
8
  # nor does it submit to any jurisdiction.
8
- #
9
9
 
10
10
  import datetime
11
11
  import json
@@ -14,9 +14,9 @@ import os
14
14
  import time
15
15
  import uuid
16
16
  import warnings
17
- from copy import deepcopy
18
17
  from functools import cached_property
19
18
 
19
+ import cftime
20
20
  import numpy as np
21
21
  import tqdm
22
22
  from anemoi.utils.config import DotDict as DotDict
@@ -25,6 +25,7 @@ from anemoi.utils.dates import frequency_to_string
25
25
  from anemoi.utils.dates import frequency_to_timedelta
26
26
  from anemoi.utils.humanize import compress_dates
27
27
  from anemoi.utils.humanize import seconds_to_human
28
+ from anemoi.utils.sanitise import sanitise
28
29
  from earthkit.data.core.order import build_remapping
29
30
 
30
31
  from anemoi.datasets import MissingDateError
@@ -52,7 +53,7 @@ from .writer import ViewCacheArray
52
53
 
53
54
  LOG = logging.getLogger(__name__)
54
55
 
55
- VERSION = "0.20"
56
+ VERSION = "0.30"
56
57
 
57
58
 
58
59
  def json_tidy(o):
@@ -66,6 +67,19 @@ def json_tidy(o):
66
67
  if isinstance(o, datetime.timedelta):
67
68
  return frequency_to_string(o)
68
69
 
70
+ if isinstance(o, cftime.DatetimeJulian):
71
+ import pandas as pd
72
+
73
+ o = pd.Timestamp(
74
+ o.year,
75
+ o.month,
76
+ o.day,
77
+ o.hour,
78
+ o.minute,
79
+ o.second,
80
+ )
81
+ return o.isoformat()
82
+
69
83
  raise TypeError(repr(o) + " is not JSON serializable")
70
84
 
71
85
 
@@ -93,10 +107,6 @@ def build_statistics_dates(dates, start, end):
93
107
  return (start.isoformat(), end.isoformat())
94
108
 
95
109
 
96
- def _ignore(*args, **kwargs):
97
- pass
98
-
99
-
100
110
  def _path_readable(path):
101
111
  import zarr
102
112
 
@@ -277,6 +287,16 @@ class Size(Actor):
277
287
  metadata = compute_directory_sizes(self.path)
278
288
  self.update_metadata(**metadata)
279
289
 
290
+ # Look for constant fields
291
+ ds = open_dataset(self.path)
292
+ constants = ds.computed_constant_fields()
293
+
294
+ variables_metadata = self.dataset.zarr_metadata.get("variables_metadata", {}).copy()
295
+ for k in constants:
296
+ variables_metadata[k]["constant_in_time"] = True
297
+
298
+ self.update_metadata(constant_fields=constants, variables_metadata=variables_metadata)
299
+
280
300
 
281
301
  class HasRegistryMixin:
282
302
  @cached_property
@@ -325,46 +345,22 @@ def build_input_(main_config, output_config):
325
345
  return builder
326
346
 
327
347
 
328
- def tidy_recipe(config: object):
329
- """Remove potentially private information in the config"""
330
- config = deepcopy(config)
331
- if isinstance(config, (tuple, list)):
332
- return [tidy_recipe(_) for _ in config]
333
- if isinstance(config, (dict, DotDict)):
334
- for k, v in config.items():
335
- if k.startswith("_"):
336
- config[k] = "*** REMOVED FOR SECURITY ***"
337
- else:
338
- config[k] = tidy_recipe(v)
339
- if isinstance(config, str):
340
- if config.startswith("_"):
341
- return "*** REMOVED FOR SECURITY ***"
342
- if config.startswith("s3://"):
343
- return "*** REMOVED FOR SECURITY ***"
344
- if config.startswith("gs://"):
345
- return "*** REMOVED FOR SECURITY ***"
346
- if config.startswith("http"):
347
- return "*** REMOVED FOR SECURITY ***"
348
- if config.startswith("ftp"):
349
- return "*** REMOVED FOR SECURITY ***"
350
- if config.startswith("file"):
351
- return "*** REMOVED FOR SECURITY ***"
352
- if config.startswith("ssh"):
353
- return "*** REMOVED FOR SECURITY ***"
354
- if config.startswith("scp"):
355
- return "*** REMOVED FOR SECURITY ***"
356
- if config.startswith("rsync"):
357
- return "*** REMOVED FOR SECURITY ***"
358
- if config.startswith("/"):
359
- return "*** REMOVED FOR SECURITY ***"
360
- if "@" in config:
361
- return "*** REMOVED FOR SECURITY ***"
362
- return config
363
-
364
-
365
348
  class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
366
349
  dataset_class = NewDataset
367
- def __init__(self, path, config, check_name=False, overwrite=False, use_threads=False, statistics_temp_dir=None, progress=None, test=False, cache=None, **kwargs): # fmt: skip
350
+
351
+ def __init__(
352
+ self,
353
+ path,
354
+ config,
355
+ check_name=False,
356
+ overwrite=False,
357
+ use_threads=False,
358
+ statistics_temp_dir=None,
359
+ progress=None,
360
+ test=False,
361
+ cache=None,
362
+ **kwargs,
363
+ ):
368
364
  if _path_readable(path) and not overwrite:
369
365
  raise Exception(f"{path} already exists. Use overwrite=True to overwrite.")
370
366
 
@@ -448,7 +444,24 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
448
444
  metadata.update(self.main_config.get("add_metadata", {}))
449
445
 
450
446
  metadata["_create_yaml_config"] = self.main_config.get_serialisable_dict()
451
- metadata["recipe"] = tidy_recipe(self.main_config.get_serialisable_dict())
447
+
448
+ recipe = sanitise(self.main_config.get_serialisable_dict())
449
+
450
+ # Remove stuff added by prepml
451
+ for k in [
452
+ "build_dataset",
453
+ "config_format_version",
454
+ "config_path",
455
+ "dataset_status",
456
+ "ecflow",
457
+ "metadata",
458
+ "platform",
459
+ "reading_chunks",
460
+ "upload",
461
+ ]:
462
+ recipe.pop(k, None)
463
+
464
+ metadata["recipe"] = recipe
452
465
 
453
466
  metadata["description"] = self.main_config.description
454
467
  metadata["licence"] = self.main_config["licence"]
@@ -467,6 +480,7 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
467
480
  metadata["data_request"] = self.minimal_input.data_request
468
481
  metadata["field_shape"] = self.minimal_input.field_shape
469
482
  metadata["proj_string"] = self.minimal_input.proj_string
483
+ metadata["variables_metadata"] = self.minimal_input.variables_metadata
470
484
 
471
485
  metadata["start_date"] = dates[0].isoformat()
472
486
  metadata["end_date"] = dates[-1].isoformat()
@@ -531,7 +545,9 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
531
545
 
532
546
 
533
547
  class Load(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixin):
534
- def __init__(self, path, parts=None, use_threads=False, statistics_temp_dir=None, progress=None, cache=None, **kwargs): # fmt: skip
548
+ def __init__(
549
+ self, path, parts=None, use_threads=False, statistics_temp_dir=None, progress=None, cache=None, **kwargs
550
+ ):
535
551
  super().__init__(path, cache=cache)
536
552
  self.use_threads = use_threads
537
553
  self.statistics_temp_dir = statistics_temp_dir