anemoi-datasets 0.5.28__py3-none-any.whl → 0.5.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anemoi/datasets/_version.py +2 -2
- anemoi/datasets/create/__init__.py +4 -12
- anemoi/datasets/create/config.py +50 -53
- anemoi/datasets/create/input/result/field.py +1 -3
- anemoi/datasets/create/sources/accumulate.py +517 -0
- anemoi/datasets/create/sources/accumulate_utils/__init__.py +8 -0
- anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py +221 -0
- anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py +153 -0
- anemoi/datasets/create/sources/accumulate_utils/interval_generators.py +321 -0
- anemoi/datasets/create/sources/grib_index.py +79 -51
- anemoi/datasets/create/sources/mars.py +56 -27
- anemoi/datasets/create/sources/xarray_support/__init__.py +1 -0
- anemoi/datasets/create/sources/xarray_support/coordinates.py +1 -4
- anemoi/datasets/create/sources/xarray_support/flavour.py +2 -2
- anemoi/datasets/create/sources/xarray_support/patch.py +178 -5
- anemoi/datasets/data/complement.py +26 -17
- anemoi/datasets/data/dataset.py +6 -0
- anemoi/datasets/data/masked.py +74 -13
- anemoi/datasets/data/missing.py +5 -0
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/METADATA +8 -7
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/RECORD +25 -23
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/WHEEL +1 -1
- anemoi/datasets/create/sources/accumulations.py +0 -1042
- anemoi/datasets/create/sources/accumulations2.py +0 -618
- anemoi/datasets/create/sources/tendencies.py +0 -171
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/entry_points.txt +0 -0
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/licenses/LICENSE +0 -0
- {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/top_level.txt +0 -0
anemoi/datasets/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.5.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 5,
|
|
31
|
+
__version__ = version = '0.5.30'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 5, 30)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -256,8 +256,7 @@ class Dataset:
|
|
|
256
256
|
resolution: str,
|
|
257
257
|
dates: list[datetime.datetime],
|
|
258
258
|
frequency: datetime.timedelta,
|
|
259
|
-
raise_exception: bool =
|
|
260
|
-
is_test: bool = False,
|
|
259
|
+
raise_exception: bool = False,
|
|
261
260
|
) -> None:
|
|
262
261
|
"""Check the name of the dataset.
|
|
263
262
|
|
|
@@ -271,15 +270,13 @@ class Dataset:
|
|
|
271
270
|
The frequency of the dataset.
|
|
272
271
|
raise_exception : bool, optional
|
|
273
272
|
Whether to raise an exception if the name is invalid.
|
|
274
|
-
is_test : bool, optional
|
|
275
|
-
Whether this is a test.
|
|
276
273
|
"""
|
|
277
274
|
basename, _ = os.path.splitext(os.path.basename(self.path))
|
|
278
275
|
try:
|
|
279
276
|
DatasetName(basename, resolution, dates[0], dates[-1], frequency).raise_if_not_valid()
|
|
280
277
|
except Exception as e:
|
|
281
|
-
if raise_exception
|
|
282
|
-
raise
|
|
278
|
+
if raise_exception:
|
|
279
|
+
raise
|
|
283
280
|
else:
|
|
284
281
|
LOG.warning(f"Dataset name error: {e}")
|
|
285
282
|
|
|
@@ -577,7 +574,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
577
574
|
use_threads: bool = False,
|
|
578
575
|
statistics_temp_dir: str | None = None,
|
|
579
576
|
progress: Any = None,
|
|
580
|
-
test: bool = False,
|
|
581
577
|
cache: str | None = None,
|
|
582
578
|
**kwargs: Any,
|
|
583
579
|
):
|
|
@@ -599,8 +595,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
599
595
|
The directory for temporary statistics.
|
|
600
596
|
progress : Any, optional
|
|
601
597
|
The progress indicator.
|
|
602
|
-
test : bool, optional
|
|
603
|
-
Whether this is a test.
|
|
604
598
|
cache : Optional[str], optional
|
|
605
599
|
The cache directory.
|
|
606
600
|
"""
|
|
@@ -613,9 +607,8 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
613
607
|
self.use_threads = use_threads
|
|
614
608
|
self.statistics_temp_dir = statistics_temp_dir
|
|
615
609
|
self.progress = progress
|
|
616
|
-
self.test = test
|
|
617
610
|
|
|
618
|
-
self.main_config = loader_config(config
|
|
611
|
+
self.main_config = loader_config(config)
|
|
619
612
|
|
|
620
613
|
# self.registry.delete() ??
|
|
621
614
|
self.tmp_statistics.delete()
|
|
@@ -748,7 +741,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
|
|
|
748
741
|
|
|
749
742
|
self.dataset.check_name(
|
|
750
743
|
raise_exception=self.check_name,
|
|
751
|
-
is_test=self.test,
|
|
752
744
|
resolution=resolution,
|
|
753
745
|
dates=dates,
|
|
754
746
|
frequency=frequency,
|
anemoi/datasets/create/config.py
CHANGED
|
@@ -18,8 +18,6 @@ from anemoi.utils.config import DotDict
|
|
|
18
18
|
from anemoi.utils.config import load_any_dict_format
|
|
19
19
|
from earthkit.data.core.order import normalize_order_by
|
|
20
20
|
|
|
21
|
-
from anemoi.datasets.dates.groups import Groups
|
|
22
|
-
|
|
23
21
|
LOG = logging.getLogger(__name__)
|
|
24
22
|
|
|
25
23
|
|
|
@@ -340,61 +338,13 @@ def _prepare_serialisation(o: Any) -> Any:
|
|
|
340
338
|
return str(o)
|
|
341
339
|
|
|
342
340
|
|
|
343
|
-
def
|
|
344
|
-
"""Modifies the configuration to run in test mode.
|
|
345
|
-
|
|
346
|
-
Parameters
|
|
347
|
-
----------
|
|
348
|
-
cfg : dict
|
|
349
|
-
The configuration dictionary.
|
|
350
|
-
"""
|
|
351
|
-
NUMBER_OF_DATES = 4
|
|
352
|
-
|
|
353
|
-
LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
|
|
354
|
-
groups = Groups(**LoadersConfig(cfg).dates)
|
|
355
|
-
|
|
356
|
-
dates = groups.provider.values
|
|
357
|
-
cfg["dates"] = dict(
|
|
358
|
-
start=dates[0],
|
|
359
|
-
end=dates[NUMBER_OF_DATES - 1],
|
|
360
|
-
frequency=groups.provider.frequency,
|
|
361
|
-
group_by=NUMBER_OF_DATES,
|
|
362
|
-
)
|
|
363
|
-
|
|
364
|
-
def set_element_to_test(obj):
|
|
365
|
-
if isinstance(obj, (list, tuple)):
|
|
366
|
-
for v in obj:
|
|
367
|
-
set_element_to_test(v)
|
|
368
|
-
return
|
|
369
|
-
if isinstance(obj, (dict, DotDict)):
|
|
370
|
-
if "grid" in obj:
|
|
371
|
-
previous = obj["grid"]
|
|
372
|
-
obj["grid"] = "20./20."
|
|
373
|
-
LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
|
|
374
|
-
if "number" in obj:
|
|
375
|
-
if isinstance(obj["number"], (list, tuple)):
|
|
376
|
-
previous = obj["number"]
|
|
377
|
-
obj["number"] = previous[0:3]
|
|
378
|
-
LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
|
|
379
|
-
for k, v in obj.items():
|
|
380
|
-
set_element_to_test(v)
|
|
381
|
-
if "constants" in obj:
|
|
382
|
-
constants = obj["constants"]
|
|
383
|
-
if "param" in constants and isinstance(constants["param"], list):
|
|
384
|
-
constants["param"] = ["cos_latitude"]
|
|
385
|
-
|
|
386
|
-
set_element_to_test(cfg)
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
|
|
341
|
+
def loader_config(config: dict) -> LoadersConfig:
|
|
390
342
|
"""Loads and validates the configuration for dataset loaders.
|
|
391
343
|
|
|
392
344
|
Parameters
|
|
393
345
|
----------
|
|
394
346
|
config : dict
|
|
395
347
|
The configuration dictionary.
|
|
396
|
-
is_test : bool, optional
|
|
397
|
-
Whether to run in test mode. Defaults to False.
|
|
398
348
|
|
|
399
349
|
Returns
|
|
400
350
|
-------
|
|
@@ -402,8 +352,6 @@ def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
|
|
|
402
352
|
The validated configuration object.
|
|
403
353
|
"""
|
|
404
354
|
config = Config(config)
|
|
405
|
-
if is_test:
|
|
406
|
-
set_to_test_mode(config)
|
|
407
355
|
obj = LoadersConfig(config)
|
|
408
356
|
|
|
409
357
|
# yaml round trip to check that serialisation works as expected
|
|
@@ -424,6 +372,9 @@ def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
|
|
|
424
372
|
LOG.info(f"Setting env variable {k}={v}")
|
|
425
373
|
os.environ[k] = str(v)
|
|
426
374
|
|
|
375
|
+
# Used by pytest only
|
|
376
|
+
# copy.pop('checks', None)
|
|
377
|
+
|
|
427
378
|
return copy
|
|
428
379
|
|
|
429
380
|
|
|
@@ -443,3 +394,49 @@ def build_output(*args, **kwargs) -> OutputSpecs:
|
|
|
443
394
|
The output specifications object.
|
|
444
395
|
"""
|
|
445
396
|
return OutputSpecs(*args, **kwargs)
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def flatten_list_of_sets(list_of_sets: list[set]) -> set:
|
|
400
|
+
return {element for subset in list_of_sets for element in subset}
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def mars_str_to_set(s: str) -> set[str]:
|
|
404
|
+
"""Mars strings are like 1/to/2 or 1/to/2/by/1
|
|
405
|
+
|
|
406
|
+
Returns a set of strings, e.g. {'1', '2'}
|
|
407
|
+
"""
|
|
408
|
+
assert "/" in s, "mars_str_to_set expects a string with '/'"
|
|
409
|
+
lst = s.split("/")
|
|
410
|
+
assert len(lst) in (3, 5), f"mars_str_to_set expects a string like 1/to/2 or 1/to/4/by/1, got {s}"
|
|
411
|
+
if len(lst) == 3:
|
|
412
|
+
assert "to" in lst
|
|
413
|
+
start, _, end = lst
|
|
414
|
+
step = 1
|
|
415
|
+
elif len(lst) == 5:
|
|
416
|
+
assert "by" in lst and "to" in lst
|
|
417
|
+
start, _, end, _, step = lst
|
|
418
|
+
return {str(i) for i in range(int(start), int(end) + 1, int(step))}
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def get_ensembles_set(obj):
|
|
422
|
+
"""Counts the number of ensembles in the configuration."""
|
|
423
|
+
if isinstance(obj, dict):
|
|
424
|
+
if "number" in obj:
|
|
425
|
+
if isinstance(obj["number"], (list, tuple)):
|
|
426
|
+
return set([str(element) for element in obj["number"]])
|
|
427
|
+
if isinstance(obj["number"], (str, int)):
|
|
428
|
+
if "/" in str(obj["number"]):
|
|
429
|
+
return mars_str_to_set(obj["number"])
|
|
430
|
+
else:
|
|
431
|
+
return {str(obj["number"])}
|
|
432
|
+
if isinstance(obj, (dict)):
|
|
433
|
+
return flatten_list_of_sets([get_ensembles_set(v) for v in obj.values()])
|
|
434
|
+
if isinstance(obj, (list, tuple)):
|
|
435
|
+
return flatten_list_of_sets([get_ensembles_set(v) for v in obj])
|
|
436
|
+
return {}
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def count_ensembles(config: Config) -> int:
|
|
440
|
+
"""Counts the number of ensembles in the configuration."""
|
|
441
|
+
ensembles = get_ensembles_set(config.input)
|
|
442
|
+
return len(ensembles) if ensembles else 1
|
|
@@ -329,8 +329,7 @@ class FieldResult(Result):
|
|
|
329
329
|
LOG.debug(f"Sorting done in {seconds_to_human(time.time()-start)}.")
|
|
330
330
|
except ValueError:
|
|
331
331
|
self.explain(ds, order_by, remapping=remapping, patches=patches)
|
|
332
|
-
|
|
333
|
-
exit(1)
|
|
332
|
+
raise ValueError(f"Error in {self}")
|
|
334
333
|
|
|
335
334
|
if LOG.isEnabledFor(logging.DEBUG):
|
|
336
335
|
LOG.debug("Cube shape: %s", cube)
|
|
@@ -497,7 +496,6 @@ class FieldResult(Result):
|
|
|
497
496
|
print()
|
|
498
497
|
print("❌" * 40)
|
|
499
498
|
print()
|
|
500
|
-
exit(1)
|
|
501
499
|
|
|
502
500
|
def build_coords(self) -> None:
|
|
503
501
|
"""Build the coordinates for the result."""
|