PyPI - anemoi-datasets - Versions diffs - 0.3.10__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

anemoi-datasets 0.3.10py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

anemoi/datasets/_version.py +2 -2
anemoi/datasets/commands/compare.py +59 -0
anemoi/datasets/commands/create.py +84 -3
anemoi/datasets/commands/inspect.py +9 -9
anemoi/datasets/commands/scan.py +4 -4
anemoi/datasets/compute/recentre.py +14 -9
anemoi/datasets/create/__init__.py +44 -17
anemoi/datasets/create/check.py +6 -5
anemoi/datasets/create/chunks.py +1 -1
anemoi/datasets/create/config.py +6 -27
anemoi/datasets/create/functions/__init__.py +3 -3
anemoi/datasets/create/functions/filters/empty.py +4 -4
anemoi/datasets/create/functions/filters/rename.py +14 -6
anemoi/datasets/create/functions/filters/rotate_winds.py +16 -60
anemoi/datasets/create/functions/filters/unrotate_winds.py +14 -64
anemoi/datasets/create/functions/sources/__init__.py +39 -0
anemoi/datasets/create/functions/sources/accumulations.py +38 -56
anemoi/datasets/create/functions/sources/constants.py +11 -4
anemoi/datasets/create/functions/sources/empty.py +2 -2
anemoi/datasets/create/functions/sources/forcings.py +3 -3
anemoi/datasets/create/functions/sources/grib.py +8 -4
anemoi/datasets/create/functions/sources/hindcasts.py +32 -364
anemoi/datasets/create/functions/sources/mars.py +57 -26
anemoi/datasets/create/functions/sources/netcdf.py +2 -60
anemoi/datasets/create/functions/sources/opendap.py +3 -2
anemoi/datasets/create/functions/sources/source.py +3 -3
anemoi/datasets/create/functions/sources/tendencies.py +7 -7
anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
anemoi/datasets/create/functions/sources/xarray_zarr.py +15 -0
anemoi/datasets/create/functions/sources/zenodo.py +40 -0
anemoi/datasets/create/input.py +309 -191
anemoi/datasets/create/loaders.py +155 -77
anemoi/datasets/create/patch.py +17 -14
anemoi/datasets/create/persistent.py +1 -1
anemoi/datasets/create/size.py +4 -5
anemoi/datasets/create/statistics/__init__.py +51 -17
anemoi/datasets/create/template.py +11 -61
anemoi/datasets/create/trace.py +91 -0
anemoi/datasets/create/utils.py +5 -52
anemoi/datasets/create/zarr.py +24 -10
anemoi/datasets/data/dataset.py +4 -4
anemoi/datasets/data/misc.py +9 -37
anemoi/datasets/data/stores.py +37 -14
anemoi/datasets/dates/__init__.py +7 -1
anemoi/datasets/dates/groups.py +3 -0
{anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/METADATA +24 -8
anemoi_datasets-0.4.2.dist-info/RECORD +86 -0
{anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/WHEEL +1 -1
anemoi_datasets-0.3.10.dist-info/RECORD +0 -73
{anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/LICENSE +0 -0
{anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/entry_points.txt +0 -0
{anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/top_level.txt +0 -0

anemoi/datasets/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.3.10'
-__version_tuple__ = version_tuple = (0, 3, 10)
+__version__ = version = '0.4.2'
+__version_tuple__ = version_tuple = (0, 4, 2)

anemoi/datasets/commands/compare.py CHANGED Viewed

@@ -8,6 +8,10 @@
 # nor does it submit to any jurisdiction.
 #
+import numpy as np
+import tqdm
+import zarr
 from anemoi.datasets import open_dataset
 from . import Command
@@ -19,6 +23,8 @@ class Compare(Command):
     def add_arguments(self, command_parser):
         command_parser.add_argument("dataset1")
         command_parser.add_argument("dataset2")
+        command_parser.add_argument("--data", action="store_true", help="Compare the data.")
+        command_parser.add_argument("--statistics", action="store_true", help="Compare the statistics.")
     def run(self, args):
         ds1 = open_dataset(args.dataset1)
@@ -42,5 +48,58 @@ class Compare(Command):
                 f"{ds2.statistics['mean'][ds2.name_to_index[v]]:14g}",
             )
+        if args.data:
+            print()
+            print("Data:")
+            print("-----")
+            print()
+            diff = 0
+            for a, b in tqdm.tqdm(zip(ds1, ds2)):
+                if not np.array_equal(a, b, equal_nan=True):
+                    diff += 1
+            print(f"Number of different rows: {diff}/{len(ds1)}")
+        if args.data:
+            print()
+            print("Data 2:")
+            print("-----")
+            print()
+            ds1 = zarr.open(args.dataset1, mode="r")
+            ds2 = zarr.open(args.dataset2, mode="r")
+            for name in (
+                "data",
+                "count",
+                "sums",
+                "squares",
+                "mean",
+                "stdev",
+                "minimum",
+                "maximum",
+                "latitudes",
+                "longitudes",
+            ):
+                a1 = ds1[name]
+                a2 = ds2[name]
+                if len(a1) != len(a2):
+                    print(f"{name}: lengths mismatch {len(a1)} != {len(a2)}")
+                    continue
+                diff = 0
+                for a, b in tqdm.tqdm(zip(a1, a2), leave=False):
+                    if not np.array_equal(a, b, equal_nan=True):
+                        if diff == 0:
+                            print(f"\n{name}: first different row:")
+                            print(a[a != b])
+                            print(b[a != b])
+                        diff += 1
+                print(f"{name}: {diff} different rows out of {len(a1)}")
 command = Compare

anemoi/datasets/commands/create.py CHANGED Viewed

@@ -1,7 +1,39 @@
-from anemoi.datasets.create import Creator
+import datetime
+import logging
+import time
+from concurrent.futures import ProcessPoolExecutor
+from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import as_completed
+import tqdm
+from anemoi.utils.humanize import seconds_to_human
+from anemoi.datasets.create.trace import enable_trace
 from . import Command
+LOG = logging.getLogger(__name__)
+def task(what, options, *args, **kwargs):
+    """
+    Make sure `import Creator` is done in the sub-processes, and not in the main one.
+    """
+    now = datetime.datetime.now()
+    LOG.debug(f"Task {what}({args},{kwargs}) starting")
+    from anemoi.datasets.create import Creator
+    if "trace" in options:
+        enable_trace(options["trace"])
+    c = Creator(**options)
+    result = getattr(c, what)(*args, **kwargs)
+    LOG.debug(f"Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
+    return result
 class Create(Command):
     """Create a dataset."""
@@ -22,12 +54,61 @@ class Create(Command):
         )
         command_parser.add_argument("config", help="Configuration yaml file defining the recipe to create the dataset.")
         command_parser.add_argument("path", help="Path to store the created data.")
+        group = command_parser.add_mutually_exclusive_group()
+        group.add_argument("--threads", help="Use `n` parallel thread workers.", type=int, default=0)
+        group.add_argument("--processes", help="Use `n` parallel process workers.", type=int, default=0)
+        command_parser.add_argument("--trace", action="store_true")
     def run(self, args):
-        kwargs = vars(args)
+        now = time.time()
+        if args.threads + args.processes:
+            self.parallel_create(args)
+        else:
+            self.serial_create(args)
+        LOG.info(f"Create completed in {seconds_to_human(time.time()-now)}")
-        c = Creator(**kwargs)
+    def serial_create(self, args):
+        from anemoi.datasets.create import Creator
+        options = vars(args)
+        c = Creator(**options)
         c.create()
+    def parallel_create(self, args):
+        """Some modules, like fsspec do not work well with fork()
+        Other modules may not be thread safe. So we implement
+        parallel loadining using multiprocessing before any
+        of the modules are imported.
+        """
+        options = vars(args)
+        parallel = args.threads + args.processes
+        args.use_threads = args.threads > 0
+        if args.use_threads:
+            ExecutorClass = ThreadPoolExecutor
+        else:
+            ExecutorClass = ProcessPoolExecutor
+        with ExecutorClass(max_workers=1) as executor:
+            total = executor.submit(task, "init", options).result()
+        futures = []
+        with ExecutorClass(max_workers=parallel) as executor:
+            for n in range(total):
+                futures.append(executor.submit(task, "load", options, parts=f"{n+1}/{total}"))
+            for future in tqdm.tqdm(
+                as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1
+            ):
+                future.result()
+        with ExecutorClass(max_workers=1) as executor:
+            executor.submit(task, "statistics", options).result()
+            executor.submit(task, "additions", options).result()
+            executor.submit(task, "cleanup", options).result()
+            executor.submit(task, "verify", options).result()
 command = Create

anemoi/datasets/commands/inspect.py CHANGED Viewed

@@ -16,7 +16,7 @@ import numpy as np
 import semantic_version
 import tqdm
 from anemoi.utils.humanize import bytes
-from anemoi.utils.humanize import number
+from anemoi.utils.humanize import bytes_to_human
 from anemoi.utils.humanize import when
 from anemoi.utils.text import dotted_line
 from anemoi.utils.text import progress
@@ -215,9 +215,9 @@ class Version:
             total_size, n = compute_directory_size(self.path)
         if total_size is not None:
-            print(f"💽 Size       : {bytes(total_size)} ({number(total_size)})")
+            print(f"💽 Size       : {bytes(total_size)} ({bytes_to_human(total_size)})")
         if n is not None:
-            print(f"📁 Files      : {number(n)}")
+            print(f"📁 Files      : {n:,}")
     @property
     def statistics(self):
@@ -382,7 +382,7 @@ class NoVersion(Version):
     @property
     def last_date(self):
         monthly = find(self.metadata, "monthly")
-        time = max([int(t) for t in find(self.metadata["climetlab"], "time")])
+        time = max([int(t) for t in find(self.metadata["earthkit-data"], "time")])
         assert isinstance(time, int), (time, type(time))
         if time > 100:
             time = time // 100
@@ -390,7 +390,7 @@ class NoVersion(Version):
     @property
     def frequency(self):
-        time = find(self.metadata["climetlab"], "time")
+        time = find(self.metadata["earthkit-data"], "time")
         return 24 // len(time)
     @property
@@ -444,9 +444,9 @@ class Version0_4(Version):
         z = self.zarr
         # for backward compatibility
-        if "climetlab" in z.attrs:
-            climetlab_version = z.attrs["climetlab"].get("versions", {}).get("climetlab", "unkwown")
-            print(f"climetlab version used to create this zarr: {climetlab_version}. Not supported.")
+        if "earthkit-data" in z.attrs:
+            ekd_version = z.attrs["earthkit-data"].get("versions", {}).get("earthkit-data", "unkwown")
+            print(f"earthkit-data version used to create this zarr: {ekd_version}. Not supported.")
             return
         version = z.attrs.get("version")
@@ -455,7 +455,7 @@ class Version0_4(Version):
             print(" Cannot find metadata information about versions.")
         else:
             print(f"Zarr format (version {version})", end="")
-            print(f" created by climetlab={versions.pop('climetlab')}", end="")
+            print(f" created by earthkit-data={versions.pop('earthkit-data')}", end="")
             timestamp = z.attrs.get("creation_timestamp")
             timestamp = datetime.datetime.fromisoformat(timestamp)
             print(f" on {timestamp}", end="")

anemoi/datasets/commands/scan.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import sys
 from collections import defaultdict
-import climetlab as cml
+import earthkit.data as ekd
 import tqdm
 import yaml
@@ -50,9 +50,9 @@ class Scan(Command):
         for path in tqdm.tqdm(paths, leave=False):
             if not match(path):
                 continue
-            for field in tqdm.tqdm(cml.load_source("file", path), leave=False):
-                dates.add(field.valid_datetime())
-                mars = field.as_mars()
+            for field in tqdm.tqdm(ekd.from_source("file", path), leave=False):
+                dates.add(field.datetime()["valid_time"])
+                mars = field.metadata(namespace="mars")
                 keys = tuple(mars.get(k) for k in KEYS)
                 gribs[keys].add(path)
                 for k, v in mars.items():

anemoi/datasets/compute/recentre.py CHANGED Viewed

@@ -10,10 +10,10 @@
 import logging
 import numpy as np
-from climetlab.core.temporary import temp_file
-from climetlab.readers.grib.output import new_grib_output
+from earthkit.data.core.temporary import temp_file
+from earthkit.data.readers.grib.output import new_grib_output
-from anemoi.datasets.create.functions import assert_is_fieldset
+from anemoi.datasets.create.functions import assert_is_fieldlist
 LOG = logging.getLogger(__name__)
@@ -96,7 +96,7 @@ def recentre(
     for i, centre_field in enumerate(centre):
         param = centre_field.metadata("param")
-        centre_field_as_mars = centre_field.as_mars()
+        centre_field_as_mars = centre_field.metadata(namespace="mars")
         # load the centre field
         centre_np = centre_field.to_numpy()
@@ -106,8 +106,13 @@ def recentre(
         for j in range(n_numbers):
             ensemble_field = members[i * n_numbers + j]
-            ensemble_field_as_mars = ensemble_field.as_mars()
-            check_compatible(centre_field, ensemble_field, centre_field_as_mars, ensemble_field_as_mars)
+            ensemble_field_as_mars = ensemble_field.metadata(namespace="mars")
+            check_compatible(
+                centre_field,
+                ensemble_field,
+                centre_field_as_mars,
+                ensemble_field_as_mars,
+            )
             members_np[j] = ensemble_field.to_numpy()
             ensemble_field_as_mars = tuple(sorted(ensemble_field_as_mars.items()))
@@ -149,10 +154,10 @@ def recentre(
     if output is not None:
         return path
-    from climetlab import load_source
+    from earthkit.data import from_source
-    ds = load_source("file", path)
-    assert_is_fieldset(ds)
+    ds = from_source("file", path)
+    assert_is_fieldlist(ds)
     # save a reference to the tmp file so it is deleted
     # only when the dataset is not used anymore
     ds._tmp = tmp

anemoi/datasets/create/__init__.py CHANGED Viewed

@@ -7,8 +7,15 @@
 # nor does it submit to any jurisdiction.
 #
+import logging
 import os
+LOG = logging.getLogger(__name__)
+def _ignore(*args, **kwargs):
+    pass
 class Creator:
     def __init__(
@@ -16,19 +23,21 @@ class Creator:
         path,
         config=None,
         cache=None,
-        print=print,
+        use_threads=False,
         statistics_tmp=None,
         overwrite=False,
         test=None,
+        progress=None,
         **kwargs,
     ):
         self.path = path  # Output path
         self.config = config
         self.cache = cache
-        self.print = print
+        self.use_threads = use_threads
         self.statistics_tmp = statistics_tmp
         self.overwrite = overwrite
         self.test = test
+        self.progress = progress if progress is not None else _ignore
     def init(self, check_name=False):
         # check path
@@ -44,10 +53,11 @@ class Creator:
                 path=self.path,
                 config=self.config,
                 statistics_tmp=self.statistics_tmp,
-                print=self.print,
+                use_threads=self.use_threads,
+                progress=self.progress,
                 test=self.test,
             )
-            obj.initialise(check_name=check_name)
+            return obj.initialise(check_name=check_name)
     def load(self, parts=None):
         from .loaders import ContentLoader
@@ -56,7 +66,8 @@ class Creator:
             loader = ContentLoader.from_dataset_config(
                 path=self.path,
                 statistics_tmp=self.statistics_tmp,
-                print=self.print,
+                use_threads=self.use_threads,
+                progress=self.progress,
                 parts=parts,
             )
             loader.load()
@@ -66,7 +77,8 @@ class Creator:
         loader = StatisticsAdder.from_dataset(
             path=self.path,
-            print=self.print,
+            use_threads=self.use_threads,
+            progress=self.progress,
             statistics_tmp=self.statistics_tmp,
             statistics_output=output,
             recompute=False,
@@ -74,20 +86,22 @@ class Creator:
             statistics_end=end,
         )
         loader.run()
+        assert loader.ready()
     def size(self):
         from .loaders import DatasetHandler
         from .size import compute_directory_sizes
         metadata = compute_directory_sizes(self.path)
-        handle = DatasetHandler.from_dataset(path=self.path, print=self.print)
+        handle = DatasetHandler.from_dataset(path=self.path, use_threads=self.use_threads)
         handle.update_metadata(**metadata)
+        assert handle.ready()
     def cleanup(self):
         from .loaders import DatasetHandlerWithStatistics
         cleaner = DatasetHandlerWithStatistics.from_dataset(
-            path=self.path, print=self.print, statistics_tmp=self.statistics_tmp
+            path=self.path, use_threads=self.use_threads, progress=self.progress, statistics_tmp=self.statistics_tmp
         )
         cleaner.tmp_statistics.delete()
         cleaner.registry.clean()
@@ -103,15 +117,17 @@ class Creator:
         from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
         if statistics:
-            a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
+            a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
             a.initialise()
         for d in delta:
             try:
-                a = TendenciesStatisticsAddition.from_dataset(path=self.path, print=self.print, delta=d)
+                a = TendenciesStatisticsAddition.from_dataset(
+                    path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
+                )
                 a.initialise()
             except TendenciesStatisticsDeltaNotMultipleOfFrequency:
-                self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
+                LOG.info(f"Skipping delta={d} as it is not a multiple of the frequency.")
     def run_additions(self, parts=None, delta=[1, 3, 6, 12, 24], statistics=True):
         from .loaders import StatisticsAddition
@@ -119,15 +135,17 @@ class Creator:
         from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
         if statistics:
-            a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
+            a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
             a.run(parts)
         for d in delta:
             try:
-                a = TendenciesStatisticsAddition.from_dataset(path=self.path, print=self.print, delta=d)
+                a = TendenciesStatisticsAddition.from_dataset(
+                    path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
+                )
                 a.run(parts)
             except TendenciesStatisticsDeltaNotMultipleOfFrequency:
-                self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
+                LOG.debug(f"Skipping delta={d} as it is not a multiple of the frequency.")
     def finalise_additions(self, delta=[1, 3, 6, 12, 24], statistics=True):
         from .loaders import StatisticsAddition
@@ -135,15 +153,17 @@ class Creator:
         from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
         if statistics:
-            a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
+            a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
             a.finalise()
         for d in delta:
             try:
-                a = TendenciesStatisticsAddition.from_dataset(path=self.path, print=self.print, delta=d)
+                a = TendenciesStatisticsAddition.from_dataset(
+                    path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
+                )
                 a.finalise()
             except TendenciesStatisticsDeltaNotMultipleOfFrequency:
-                self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
+                LOG.debug(f"Skipping delta={d} as it is not a multiple of the frequency.")
     def finalise(self, **kwargs):
         self.statistics(**kwargs)
@@ -174,3 +194,10 @@ class Creator:
             return True
         except zarr.errors.PathNotFoundError:
             return False
+    def verify(self):
+        from .loaders import DatasetVerifier
+        handle = DatasetVerifier.from_dataset(path=self.path, use_threads=self.use_threads)
+        handle.verify()

anemoi/datasets/create/check.py CHANGED Viewed

@@ -56,7 +56,7 @@ class DatasetName:
             raise ValueError(self.error_message)
     def _parse(self, name):
-        pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?(.*)$"
+        pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)$"
         match = re.match(pattern, name)
         assert match, (name, pattern)
@@ -136,18 +136,19 @@ class StatisticsValueError(ValueError):
     pass
-def check_data_values(arr, *, name: str, log=[], allow_nan=False):
-    if allow_nan is False:
-        allow_nan = lambda x: False  # noqa: E731
+def check_data_values(arr, *, name: str, log=[], allow_nans=False):
-    if allow_nan(name):
+    if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
         arr = arr[~np.isnan(arr)]
+    assert arr.size > 0, (name, *log)
     min, max = arr.min(), arr.max()
     assert not (np.isnan(arr).any()), (name, min, max, *log)
     if min == 9999.0:
         warnings.warn(f"Min value 9999 for {name}")
     if max == 9999.0:
         warnings.warn(f"Max value 9999 for {name}")

anemoi/datasets/create/chunks.py CHANGED Viewed

@@ -57,7 +57,7 @@ class ChunkFilter:
         if not parts:
             warnings.warn(f"Nothing to do for chunk {i}/{n}.")
-        LOG.info(f"Running parts: {parts}")
+        LOG.debug(f"Running parts: {parts}")
         self.allowed = parts

anemoi/datasets/create/config.py CHANGED Viewed

@@ -12,9 +12,9 @@ import os
 from copy import deepcopy
 import yaml
-from climetlab.core.order import normalize_order_by
-from .utils import load_json_or_yaml
+from anemoi.utils.config import DotDict
+from anemoi.utils.config import load_any_dict_format
+from earthkit.data.core.order import normalize_order_by
 LOG = logging.getLogger(__name__)
@@ -43,31 +43,10 @@ def check_dict_value_and_set(dic, key, value):
         if dic[key] == value:
             return
         raise ValueError(f"Cannot use {key}={dic[key]}. Must use {value}.")
-    print(f"Setting {key}={value} in config")
+    LOG.info(f"Setting {key}={value} in config")
     dic[key] = value
-class DictObj(dict):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        for key, value in self.items():
-            if isinstance(value, dict):
-                self[key] = DictObj(value)
-                continue
-            if isinstance(value, list):
-                self[key] = [DictObj(item) if isinstance(item, dict) else item for item in value]
-                continue
-    def __getattr__(self, attr):
-        try:
-            return self[attr]
-        except KeyError:
-            raise AttributeError(attr)
-    def __setattr__(self, attr, value):
-        self[attr] = value
 def resolve_includes(config):
     if isinstance(config, list):
         return [resolve_includes(c) for c in config]
@@ -79,11 +58,11 @@ def resolve_includes(config):
     return config
-class Config(DictObj):
+class Config(DotDict):
     def __init__(self, config=None, **kwargs):
         if isinstance(config, str):
             self.config_path = os.path.realpath(config)
-            config = load_json_or_yaml(config)
+            config = load_any_dict_format(config)
         else:
             config = deepcopy(config if config is not None else {})
         config = resolve_includes(config)

anemoi/datasets/create/functions/__init__.py CHANGED Viewed

@@ -13,10 +13,10 @@ import importlib
 import entrypoints
-def assert_is_fieldset(obj):
-    from climetlab.readers.grib.index import FieldSet
+def assert_is_fieldlist(obj):
+    from earthkit.data.indexing.fieldlist import FieldList
-    assert isinstance(obj, FieldSet), type(obj)
+    assert isinstance(obj, FieldList), type(obj)
 def import_function(name, kind):

anemoi/datasets/create/functions/filters/empty.py CHANGED Viewed

@@ -7,10 +7,10 @@
 # nor does it submit to any jurisdiction.
 #
-import climetlab as cml
+import earthkit.data as ekd
 def execute(context, input, **kwargs):
-    # Usefull to create a pipeline that returns an empty result
-    # So we can reference an earlier step in a function like 'contants'
-    return cml.load_source("empty")
+    # Useful to create a pipeline that returns an empty result
+    # So we can reference an earlier step in a function like 'constants'
+    return ekd.from_source("empty")

anemoi/datasets/create/functions/filters/rename.py CHANGED Viewed

@@ -9,7 +9,7 @@
 import re
-from climetlab.indexing.fieldset import FieldArray
+from earthkit.data.indexing.fieldlist import FieldArray
 class RenamedFieldMapping:
@@ -26,15 +26,23 @@ class RenamedFieldMapping:
         self.what = what
         self.renaming = renaming
-    def metadata(self, key):
-        value = self.field.metadata(key)
+    def metadata(self, key=None, **kwargs):
+        if key is None:
+            return self.field.metadata(**kwargs)
+        value = self.field.metadata(key, **kwargs)
         if key == self.what:
             return self.renaming.get(value, value)
         return value
     def __getattr__(self, name):
         return getattr(self.field, name)
+    def __repr__(self) -> str:
+        return repr(self.field)
+        return f"{self.field} -> {self.what} -> {self.renaming}"
 class RenamedFieldFormat:
     """Rename a field based on a format string.
@@ -48,10 +56,10 @@ class RenamedFieldFormat:
         self.format = format
         self.bits = re.findall(r"{(\w+)}", format)
-    def metadata(self, key):
-        value = self.field.metadata(key)
+    def metadata(self, key, **kwargs):
+        value = self.field.metadata(key, **kwargs)
         if "{" + key + "}" in self.format:
-            bits = {b: self.field.metadata(b) for b in self.bits}
+            bits = {b: self.field.metadata(b, **kwargs) for b in self.bits}
             return self.format.format(**bits)
         return value

anemoi-datasets 0.3.10__py3-none-any.whl → 0.4.2__py3-none-any.whl

anemoi-datasets 0.3.10py3-none-any.whl → 0.4.2py3-none-any.whl