anemoi-datasets 0.5.28__py3-none-any.whl → 0.5.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/create/__init__.py +4 -12
  3. anemoi/datasets/create/config.py +50 -53
  4. anemoi/datasets/create/input/result/field.py +1 -3
  5. anemoi/datasets/create/sources/accumulate.py +517 -0
  6. anemoi/datasets/create/sources/accumulate_utils/__init__.py +8 -0
  7. anemoi/datasets/create/sources/accumulate_utils/covering_intervals.py +221 -0
  8. anemoi/datasets/create/sources/accumulate_utils/field_to_interval.py +153 -0
  9. anemoi/datasets/create/sources/accumulate_utils/interval_generators.py +321 -0
  10. anemoi/datasets/create/sources/grib_index.py +79 -51
  11. anemoi/datasets/create/sources/mars.py +56 -27
  12. anemoi/datasets/create/sources/xarray_support/__init__.py +1 -0
  13. anemoi/datasets/create/sources/xarray_support/coordinates.py +1 -4
  14. anemoi/datasets/create/sources/xarray_support/flavour.py +2 -2
  15. anemoi/datasets/create/sources/xarray_support/patch.py +178 -5
  16. anemoi/datasets/data/complement.py +26 -17
  17. anemoi/datasets/data/dataset.py +6 -0
  18. anemoi/datasets/data/masked.py +74 -13
  19. anemoi/datasets/data/missing.py +5 -0
  20. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/METADATA +8 -7
  21. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/RECORD +25 -23
  22. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/WHEEL +1 -1
  23. anemoi/datasets/create/sources/accumulations.py +0 -1042
  24. anemoi/datasets/create/sources/accumulations2.py +0 -618
  25. anemoi/datasets/create/sources/tendencies.py +0 -171
  26. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/entry_points.txt +0 -0
  27. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/licenses/LICENSE +0 -0
  28. {anemoi_datasets-0.5.28.dist-info → anemoi_datasets-0.5.30.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.5.28'
32
- __version_tuple__ = version_tuple = (0, 5, 28)
31
+ __version__ = version = '0.5.30'
32
+ __version_tuple__ = version_tuple = (0, 5, 30)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -256,8 +256,7 @@ class Dataset:
256
256
  resolution: str,
257
257
  dates: list[datetime.datetime],
258
258
  frequency: datetime.timedelta,
259
- raise_exception: bool = True,
260
- is_test: bool = False,
259
+ raise_exception: bool = False,
261
260
  ) -> None:
262
261
  """Check the name of the dataset.
263
262
 
@@ -271,15 +270,13 @@ class Dataset:
271
270
  The frequency of the dataset.
272
271
  raise_exception : bool, optional
273
272
  Whether to raise an exception if the name is invalid.
274
- is_test : bool, optional
275
- Whether this is a test.
276
273
  """
277
274
  basename, _ = os.path.splitext(os.path.basename(self.path))
278
275
  try:
279
276
  DatasetName(basename, resolution, dates[0], dates[-1], frequency).raise_if_not_valid()
280
277
  except Exception as e:
281
- if raise_exception and not is_test:
282
- raise e
278
+ if raise_exception:
279
+ raise
283
280
  else:
284
281
  LOG.warning(f"Dataset name error: {e}")
285
282
 
@@ -577,7 +574,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
577
574
  use_threads: bool = False,
578
575
  statistics_temp_dir: str | None = None,
579
576
  progress: Any = None,
580
- test: bool = False,
581
577
  cache: str | None = None,
582
578
  **kwargs: Any,
583
579
  ):
@@ -599,8 +595,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
599
595
  The directory for temporary statistics.
600
596
  progress : Any, optional
601
597
  The progress indicator.
602
- test : bool, optional
603
- Whether this is a test.
604
598
  cache : Optional[str], optional
605
599
  The cache directory.
606
600
  """
@@ -613,9 +607,8 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
613
607
  self.use_threads = use_threads
614
608
  self.statistics_temp_dir = statistics_temp_dir
615
609
  self.progress = progress
616
- self.test = test
617
610
 
618
- self.main_config = loader_config(config, is_test=test)
611
+ self.main_config = loader_config(config)
619
612
 
620
613
  # self.registry.delete() ??
621
614
  self.tmp_statistics.delete()
@@ -748,7 +741,6 @@ class Init(Actor, HasRegistryMixin, HasStatisticTempMixin, HasElementForDataMixi
748
741
 
749
742
  self.dataset.check_name(
750
743
  raise_exception=self.check_name,
751
- is_test=self.test,
752
744
  resolution=resolution,
753
745
  dates=dates,
754
746
  frequency=frequency,
@@ -18,8 +18,6 @@ from anemoi.utils.config import DotDict
18
18
  from anemoi.utils.config import load_any_dict_format
19
19
  from earthkit.data.core.order import normalize_order_by
20
20
 
21
- from anemoi.datasets.dates.groups import Groups
22
-
23
21
  LOG = logging.getLogger(__name__)
24
22
 
25
23
 
@@ -340,61 +338,13 @@ def _prepare_serialisation(o: Any) -> Any:
340
338
  return str(o)
341
339
 
342
340
 
343
- def set_to_test_mode(cfg: dict) -> None:
344
- """Modifies the configuration to run in test mode.
345
-
346
- Parameters
347
- ----------
348
- cfg : dict
349
- The configuration dictionary.
350
- """
351
- NUMBER_OF_DATES = 4
352
-
353
- LOG.warning(f"Running in test mode. Changing the list of dates to use only {NUMBER_OF_DATES}.")
354
- groups = Groups(**LoadersConfig(cfg).dates)
355
-
356
- dates = groups.provider.values
357
- cfg["dates"] = dict(
358
- start=dates[0],
359
- end=dates[NUMBER_OF_DATES - 1],
360
- frequency=groups.provider.frequency,
361
- group_by=NUMBER_OF_DATES,
362
- )
363
-
364
- def set_element_to_test(obj):
365
- if isinstance(obj, (list, tuple)):
366
- for v in obj:
367
- set_element_to_test(v)
368
- return
369
- if isinstance(obj, (dict, DotDict)):
370
- if "grid" in obj:
371
- previous = obj["grid"]
372
- obj["grid"] = "20./20."
373
- LOG.warning(f"Running in test mode. Setting grid to {obj['grid']} instead of {previous}")
374
- if "number" in obj:
375
- if isinstance(obj["number"], (list, tuple)):
376
- previous = obj["number"]
377
- obj["number"] = previous[0:3]
378
- LOG.warning(f"Running in test mode. Setting number to {obj['number']} instead of {previous}")
379
- for k, v in obj.items():
380
- set_element_to_test(v)
381
- if "constants" in obj:
382
- constants = obj["constants"]
383
- if "param" in constants and isinstance(constants["param"], list):
384
- constants["param"] = ["cos_latitude"]
385
-
386
- set_element_to_test(cfg)
387
-
388
-
389
- def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
341
+ def loader_config(config: dict) -> LoadersConfig:
390
342
  """Loads and validates the configuration for dataset loaders.
391
343
 
392
344
  Parameters
393
345
  ----------
394
346
  config : dict
395
347
  The configuration dictionary.
396
- is_test : bool, optional
397
- Whether to run in test mode. Defaults to False.
398
348
 
399
349
  Returns
400
350
  -------
@@ -402,8 +352,6 @@ def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
402
352
  The validated configuration object.
403
353
  """
404
354
  config = Config(config)
405
- if is_test:
406
- set_to_test_mode(config)
407
355
  obj = LoadersConfig(config)
408
356
 
409
357
  # yaml round trip to check that serialisation works as expected
@@ -424,6 +372,9 @@ def loader_config(config: dict, is_test: bool = False) -> LoadersConfig:
424
372
  LOG.info(f"Setting env variable {k}={v}")
425
373
  os.environ[k] = str(v)
426
374
 
375
+ # Used by pytest only
376
+ # copy.pop('checks', None)
377
+
427
378
  return copy
428
379
 
429
380
 
@@ -443,3 +394,49 @@ def build_output(*args, **kwargs) -> OutputSpecs:
443
394
  The output specifications object.
444
395
  """
445
396
  return OutputSpecs(*args, **kwargs)
397
+
398
+
399
+ def flatten_list_of_sets(list_of_sets: list[set]) -> set:
400
+ return {element for subset in list_of_sets for element in subset}
401
+
402
+
403
+ def mars_str_to_set(s: str) -> set[str]:
404
+ """Mars strings are like 1/to/2 or 1/to/2/by/1
405
+
406
+ Returns a set of strings, e.g. {'1', '2'}
407
+ """
408
+ assert "/" in s, "mars_str_to_set expects a string with '/'"
409
+ lst = s.split("/")
410
+ assert len(lst) in (3, 5), f"mars_str_to_set expects a string like 1/to/2 or 1/to/4/by/1, got {s}"
411
+ if len(lst) == 3:
412
+ assert "to" in lst
413
+ start, _, end = lst
414
+ step = 1
415
+ elif len(lst) == 5:
416
+ assert "by" in lst and "to" in lst
417
+ start, _, end, _, step = lst
418
+ return {str(i) for i in range(int(start), int(end) + 1, int(step))}
419
+
420
+
421
+ def get_ensembles_set(obj):
422
+ """Counts the number of ensembles in the configuration."""
423
+ if isinstance(obj, dict):
424
+ if "number" in obj:
425
+ if isinstance(obj["number"], (list, tuple)):
426
+ return set([str(element) for element in obj["number"]])
427
+ if isinstance(obj["number"], (str, int)):
428
+ if "/" in str(obj["number"]):
429
+ return mars_str_to_set(obj["number"])
430
+ else:
431
+ return {str(obj["number"])}
432
+ if isinstance(obj, (dict)):
433
+ return flatten_list_of_sets([get_ensembles_set(v) for v in obj.values()])
434
+ if isinstance(obj, (list, tuple)):
435
+ return flatten_list_of_sets([get_ensembles_set(v) for v in obj])
436
+ return {}
437
+
438
+
439
+ def count_ensembles(config: Config) -> int:
440
+ """Counts the number of ensembles in the configuration."""
441
+ ensembles = get_ensembles_set(config.input)
442
+ return len(ensembles) if ensembles else 1
@@ -329,8 +329,7 @@ class FieldResult(Result):
329
329
  LOG.debug(f"Sorting done in {seconds_to_human(time.time()-start)}.")
330
330
  except ValueError:
331
331
  self.explain(ds, order_by, remapping=remapping, patches=patches)
332
- # raise ValueError(f"Error in {self}")
333
- exit(1)
332
+ raise ValueError(f"Error in {self}")
334
333
 
335
334
  if LOG.isEnabledFor(logging.DEBUG):
336
335
  LOG.debug("Cube shape: %s", cube)
@@ -497,7 +496,6 @@ class FieldResult(Result):
497
496
  print()
498
497
  print("❌" * 40)
499
498
  print()
500
- exit(1)
501
499
 
502
500
  def build_coords(self) -> None:
503
501
  """Build the coordinates for the result."""