anemoi-datasets 0.4.0__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/.pre-commit-config.yaml +2 -2
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/.readthedocs.yaml +0 -1
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/PKG-INFO +18 -3
- anemoi_datasets-0.4.3/docs/building/advanced-options.rst +3 -0
- anemoi_datasets-0.4.3/docs/building/sources/xarray.rst +6 -0
- anemoi_datasets-0.4.3/docs/building/sources/xarray.yaml +3 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources.rst +1 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/index.rst +1 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/pyproject.toml +21 -3
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/_version.py +2 -2
- anemoi_datasets-0.4.3/src/anemoi/datasets/commands/compare.py +105 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/commands/create.py +114 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/commands/inspect.py +3 -3
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/__init__.py +43 -17
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/check.py +6 -5
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/chunks.py +1 -1
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/config.py +5 -26
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/rename.py +9 -1
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/rotate_winds.py +10 -1
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/__init__.py +47 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/accumulations.py +11 -41
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/constants.py +3 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/grib.py +4 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/hindcasts.py +105 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/mars.py +53 -22
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/netcdf.py +14 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/opendap.py +3 -2
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
- anemoi_datasets-0.4.0/src/anemoi/datasets/create/functions/sources/__init__.py → anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray_zarr.py +7 -0
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/zenodo.py +40 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/input.py +290 -172
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/loaders.py +120 -71
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/patch.py +17 -14
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/persistent.py +1 -1
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/size.py +4 -5
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/statistics/__init__.py +49 -16
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/template.py +11 -61
- anemoi_datasets-0.4.3/src/anemoi/datasets/create/trace.py +91 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/utils.py +0 -48
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/zarr.py +24 -10
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/misc.py +9 -37
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/stores.py +29 -14
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/dates/__init__.py +7 -1
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/dates/groups.py +3 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/PKG-INFO +18 -3
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/SOURCES.txt +20 -1
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/requires.txt +18 -2
- anemoi_datasets-0.4.3/tests/xarray/test_kerchunk.py +36 -0
- anemoi_datasets-0.4.3/tests/xarray/test_netcdf.py +55 -0
- anemoi_datasets-0.4.3/tests/xarray/test_opendap.py +24 -0
- anemoi_datasets-0.4.3/tests/xarray/test_zarr.py +54 -0
- anemoi_datasets-0.4.0/docs/requirements.txt +0 -10
- anemoi_datasets-0.4.0/src/anemoi/datasets/commands/compare.py +0 -46
- anemoi_datasets-0.4.0/src/anemoi/datasets/commands/create.py +0 -33
- anemoi_datasets-0.4.0/src/anemoi/datasets/create/functions/sources/hindcasts.py +0 -450
- anemoi_datasets-0.4.0/src/anemoi/datasets/create/functions/sources/netcdf.py +0 -72
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/.github/workflows/python-publish.yml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/.gitignore +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/.vscode/spellright.dict +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/LICENSE +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/README.md +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/Makefile +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/_static/logo.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/_static/style.css +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/_templates/.gitkeep +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/apply-fmt.sh +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/empty.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/noop.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/rename.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/rotate_winds.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/select.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/unrotate_winds.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/handling-missing-dates.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/handling-missing-values.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/introduction.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/naming-variables.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/operations.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/accumulations.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/accumulations1.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/accumulations2.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/forcings.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/forcings.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/grib.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/hindcasts.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/mars.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/mars1.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/mars2.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/netcdf.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/netcdf.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/opendap.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/opendap.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/recentre.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/grib1.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/grib2.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/grib3.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/grib4.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/hindcasts.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/recentre.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/statistics.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/syntax.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/syntax.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/Makefile +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building1.txt +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building1.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building2.txt +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building2.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building3.txt +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building3.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/concat.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/hindcasts.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/input.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/missing_dates.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/nan.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/pipe.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/check-index.sh +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/compare.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/copy.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/create.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/inspect.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/introduction.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/scan.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/conf.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/images.pptx +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/installing.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/overview.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/overview_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/matrix.excalidraw +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/matrix.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/overview.excalidraw +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/overview.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/recipe.excalidraw +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/recipe.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/test.ipynb +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/area1_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/area2_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/chain_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/combine_example.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/concat1.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/cutout_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/drop_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/end_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/ensembles1_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/frequency_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/grids1_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/join1.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/matching0_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/matching1_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/matching2_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/matching3_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/matching4_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/misc1.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/misc2.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/missing_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_cloud.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_combine1_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_combine2_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_complex.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_dict_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_first_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_list_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_name.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_other.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_path.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_yaml_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/rename_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/reorder1_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/reorder2_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/select1_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/select2_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/shuffle_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/some_attributes_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/start_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/statistics_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/subset_example.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/thinning_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/zip1_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/zip2_.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/combining.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/configuration.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/configuration.toml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/grids.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/area-1.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/concat.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/cutout-1.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/cutout-2.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/cutout-3.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/cutout-4.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/join.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/overlay.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/thinning-after.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/thinning-before.png +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/introduction.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/matching.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/methods.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/miscellaneous.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/opening.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/other.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/selecting.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/statistics.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/subsetting.rst +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/setup.cfg +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/__init__.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/__main__.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/commands/__init__.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/commands/copy.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/commands/scan.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/compute/__init__.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/compute/recentre.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/__init__.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/__init__.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/empty.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/noop.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/unrotate_winds.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/empty.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/forcings.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/recentre.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/source.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/tendencies.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/statistics/summary.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/writer.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/__init__.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/concat.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/dataset.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/debug.css +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/debug.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/ensemble.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/forwards.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/grids.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/indexing.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/join.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/masked.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/select.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/statistics.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/subset.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/unchecked.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/grids.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/utils/__init__.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/dependency_links.txt +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/entry_points.txt +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/top_level.txt +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/concat.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/data_sources.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/join.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/missing.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/nan.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/pipe.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/recentre.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/test_create.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create-perturbations-full.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create-shift.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/test_chunks.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/test_data.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/test_dates.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/test_indexing.py +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/.gitignore +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/examples/Makefile +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/examples/an-oper-2023-2023-2p5-6h-v1.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/grids/Makefile +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/grids/grids.ipynb +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/grids/grids1.yaml +0 -0
- {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/grids/grids2.yaml +0 -0
|
@@ -37,8 +37,8 @@ repos:
|
|
|
37
37
|
rev: v0.4.6
|
|
38
38
|
hooks:
|
|
39
39
|
- id: ruff
|
|
40
|
-
# Next line
|
|
41
|
-
exclude: '
|
|
40
|
+
# Next line is to exclude for documentation code snippets
|
|
41
|
+
exclude: 'docs/(.*/)?[a-z]\w+_.py$'
|
|
42
42
|
args:
|
|
43
43
|
- --line-length=120
|
|
44
44
|
- --fix
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: anemoi-datasets
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: A package to hold various functions to support training of ML models on ECMWF data.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License: Apache License
|
|
@@ -223,19 +223,22 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
|
223
223
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
224
224
|
Requires-Python: >=3.9
|
|
225
225
|
License-File: LICENSE
|
|
226
|
-
Requires-Dist: anemoi-utils[provenance]>=0.3.
|
|
226
|
+
Requires-Dist: anemoi-utils[provenance]>=0.3.13
|
|
227
227
|
Requires-Dist: numpy
|
|
228
228
|
Requires-Dist: pyyaml
|
|
229
229
|
Requires-Dist: semantic-version
|
|
230
230
|
Requires-Dist: tqdm
|
|
231
|
-
Requires-Dist: zarr
|
|
231
|
+
Requires-Dist: zarr
|
|
232
232
|
Provides-Extra: all
|
|
233
|
+
Requires-Dist: aiohttp; extra == "all"
|
|
233
234
|
Requires-Dist: boto3; extra == "all"
|
|
234
235
|
Requires-Dist: earthkit-data[mars]>=0.9; extra == "all"
|
|
235
236
|
Requires-Dist: earthkit-geo>=0.2; extra == "all"
|
|
236
237
|
Requires-Dist: earthkit-meteo; extra == "all"
|
|
237
238
|
Requires-Dist: ecmwflibs>=0.6.3; extra == "all"
|
|
238
239
|
Requires-Dist: entrypoints; extra == "all"
|
|
240
|
+
Requires-Dist: gcsfs; extra == "all"
|
|
241
|
+
Requires-Dist: kerchunk; extra == "all"
|
|
239
242
|
Requires-Dist: pyproj; extra == "all"
|
|
240
243
|
Requires-Dist: requests; extra == "all"
|
|
241
244
|
Requires-Dist: s3fs; extra == "all"
|
|
@@ -247,26 +250,38 @@ Requires-Dist: ecmwflibs>=0.6.3; extra == "create"
|
|
|
247
250
|
Requires-Dist: entrypoints; extra == "create"
|
|
248
251
|
Requires-Dist: pyproj; extra == "create"
|
|
249
252
|
Provides-Extra: dev
|
|
253
|
+
Requires-Dist: aiohttp; extra == "dev"
|
|
250
254
|
Requires-Dist: boto3; extra == "dev"
|
|
251
255
|
Requires-Dist: earthkit-data[mars]>=0.9; extra == "dev"
|
|
252
256
|
Requires-Dist: earthkit-geo>=0.2; extra == "dev"
|
|
253
257
|
Requires-Dist: earthkit-meteo; extra == "dev"
|
|
254
258
|
Requires-Dist: ecmwflibs>=0.6.3; extra == "dev"
|
|
255
259
|
Requires-Dist: entrypoints; extra == "dev"
|
|
260
|
+
Requires-Dist: gcsfs; extra == "dev"
|
|
261
|
+
Requires-Dist: kerchunk; extra == "dev"
|
|
256
262
|
Requires-Dist: nbsphinx; extra == "dev"
|
|
257
263
|
Requires-Dist: pandoc; extra == "dev"
|
|
258
264
|
Requires-Dist: pyproj; extra == "dev"
|
|
259
265
|
Requires-Dist: pytest; extra == "dev"
|
|
260
266
|
Requires-Dist: requests; extra == "dev"
|
|
267
|
+
Requires-Dist: rstfmt; extra == "dev"
|
|
261
268
|
Requires-Dist: s3fs; extra == "dev"
|
|
262
269
|
Requires-Dist: sphinx; extra == "dev"
|
|
270
|
+
Requires-Dist: sphinx-argparse<0.5; extra == "dev"
|
|
263
271
|
Requires-Dist: sphinx-rtd-theme; extra == "dev"
|
|
264
272
|
Provides-Extra: docs
|
|
265
273
|
Requires-Dist: nbsphinx; extra == "docs"
|
|
266
274
|
Requires-Dist: pandoc; extra == "docs"
|
|
275
|
+
Requires-Dist: rstfmt; extra == "docs"
|
|
267
276
|
Requires-Dist: sphinx; extra == "docs"
|
|
277
|
+
Requires-Dist: sphinx-argparse<0.5; extra == "docs"
|
|
268
278
|
Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
279
|
+
Provides-Extra: kerchunk
|
|
280
|
+
Requires-Dist: gcsfs; extra == "kerchunk"
|
|
281
|
+
Requires-Dist: kerchunk; extra == "kerchunk"
|
|
282
|
+
Requires-Dist: s3fs; extra == "kerchunk"
|
|
269
283
|
Provides-Extra: remote
|
|
284
|
+
Requires-Dist: aiohttp; extra == "remote"
|
|
270
285
|
Requires-Dist: boto3; extra == "remote"
|
|
271
286
|
Requires-Dist: requests; extra == "remote"
|
|
272
287
|
Requires-Dist: s3fs; extra == "remote"
|
|
@@ -50,22 +50,24 @@ dynamic = [
|
|
|
50
50
|
"version",
|
|
51
51
|
]
|
|
52
52
|
dependencies = [
|
|
53
|
-
"anemoi-utils[provenance]>=0.3.
|
|
53
|
+
"anemoi-utils[provenance]>=0.3.13",
|
|
54
54
|
"numpy",
|
|
55
55
|
"pyyaml",
|
|
56
56
|
"semantic-version",
|
|
57
57
|
"tqdm",
|
|
58
|
-
"zarr
|
|
59
|
-
|
|
58
|
+
"zarr",
|
|
60
59
|
]
|
|
61
60
|
|
|
62
61
|
optional-dependencies.all = [
|
|
62
|
+
"aiohttp",
|
|
63
63
|
"boto3",
|
|
64
64
|
"earthkit-data[mars]>=0.9",
|
|
65
65
|
"earthkit-geo>=0.2",
|
|
66
66
|
"earthkit-meteo",
|
|
67
67
|
"ecmwflibs>=0.6.3",
|
|
68
68
|
"entrypoints",
|
|
69
|
+
"gcsfs",
|
|
70
|
+
"kerchunk",
|
|
69
71
|
"pyproj",
|
|
70
72
|
"requests",
|
|
71
73
|
"s3fs",
|
|
@@ -81,37 +83,53 @@ optional-dependencies.create = [
|
|
|
81
83
|
]
|
|
82
84
|
|
|
83
85
|
optional-dependencies.dev = [
|
|
86
|
+
"aiohttp",
|
|
84
87
|
"boto3",
|
|
85
88
|
"earthkit-data[mars]>=0.9",
|
|
86
89
|
"earthkit-geo>=0.2",
|
|
87
90
|
"earthkit-meteo",
|
|
88
91
|
"ecmwflibs>=0.6.3",
|
|
89
92
|
"entrypoints",
|
|
93
|
+
"gcsfs",
|
|
94
|
+
"kerchunk",
|
|
90
95
|
"nbsphinx",
|
|
91
96
|
"pandoc",
|
|
92
97
|
"pyproj",
|
|
93
98
|
"pytest",
|
|
94
99
|
"requests",
|
|
100
|
+
"rstfmt",
|
|
95
101
|
"s3fs",
|
|
96
102
|
"sphinx",
|
|
103
|
+
"sphinx-argparse<0.5",
|
|
97
104
|
"sphinx-rtd-theme",
|
|
98
105
|
]
|
|
99
106
|
|
|
100
107
|
optional-dependencies.docs = [
|
|
101
108
|
"nbsphinx",
|
|
102
109
|
"pandoc",
|
|
110
|
+
"rstfmt",
|
|
103
111
|
"sphinx",
|
|
112
|
+
"sphinx-argparse<0.5",
|
|
104
113
|
"sphinx-rtd-theme",
|
|
105
114
|
]
|
|
106
115
|
|
|
116
|
+
optional-dependencies.kerchunk = [
|
|
117
|
+
"gcsfs",
|
|
118
|
+
"kerchunk",
|
|
119
|
+
"s3fs",
|
|
120
|
+
]
|
|
121
|
+
|
|
107
122
|
optional-dependencies.remote = [
|
|
123
|
+
"aiohttp",
|
|
108
124
|
"boto3",
|
|
109
125
|
"requests",
|
|
110
126
|
"s3fs",
|
|
111
127
|
]
|
|
128
|
+
|
|
112
129
|
optional-dependencies.tests = [
|
|
113
130
|
"pytest",
|
|
114
131
|
]
|
|
132
|
+
|
|
115
133
|
urls.Documentation = "https://anemoi-datasets.readthedocs.io/"
|
|
116
134
|
urls.Homepage = "https://github.com/ecmwf/anemoi-datasets/"
|
|
117
135
|
urls.Issues = "https://github.com/ecmwf/anemoi-datasets/issues"
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# (C) Copyright 2024 ECMWF.
|
|
3
|
+
#
|
|
4
|
+
# This software is licensed under the terms of the Apache Licence Version 2.0
|
|
5
|
+
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
|
|
6
|
+
# In applying this licence, ECMWF does not waive the privileges and immunities
|
|
7
|
+
# granted to it by virtue of its status as an intergovernmental organisation
|
|
8
|
+
# nor does it submit to any jurisdiction.
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import tqdm
|
|
13
|
+
import zarr
|
|
14
|
+
|
|
15
|
+
from anemoi.datasets import open_dataset
|
|
16
|
+
|
|
17
|
+
from . import Command
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Compare(Command):
|
|
21
|
+
"""Compare two datasets. This command compares the variables in two datasets and prints the mean of the common variables. It does not compare the data itself (yet)."""
|
|
22
|
+
|
|
23
|
+
def add_arguments(self, command_parser):
|
|
24
|
+
command_parser.add_argument("dataset1")
|
|
25
|
+
command_parser.add_argument("dataset2")
|
|
26
|
+
command_parser.add_argument("--data", action="store_true", help="Compare the data.")
|
|
27
|
+
command_parser.add_argument("--statistics", action="store_true", help="Compare the statistics.")
|
|
28
|
+
|
|
29
|
+
def run(self, args):
|
|
30
|
+
ds1 = open_dataset(args.dataset1)
|
|
31
|
+
ds2 = open_dataset(args.dataset2)
|
|
32
|
+
|
|
33
|
+
v1 = set(ds1.variables)
|
|
34
|
+
v2 = set(ds2.variables)
|
|
35
|
+
|
|
36
|
+
print("Only in dataset 1:", ", ".join(sorted(v1 - v2)))
|
|
37
|
+
print("Only in dataset 2:", ", ".join(sorted(v2 - v1)))
|
|
38
|
+
print()
|
|
39
|
+
common = sorted(v1 & v2)
|
|
40
|
+
print("Common:")
|
|
41
|
+
print("-------")
|
|
42
|
+
print()
|
|
43
|
+
|
|
44
|
+
for v in common:
|
|
45
|
+
print(
|
|
46
|
+
f"{v:14}",
|
|
47
|
+
f"{ds1.statistics['mean'][ds1.name_to_index[v]]:14g}",
|
|
48
|
+
f"{ds2.statistics['mean'][ds2.name_to_index[v]]:14g}",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if args.data:
|
|
52
|
+
print()
|
|
53
|
+
print("Data:")
|
|
54
|
+
print("-----")
|
|
55
|
+
print()
|
|
56
|
+
|
|
57
|
+
diff = 0
|
|
58
|
+
for a, b in tqdm.tqdm(zip(ds1, ds2)):
|
|
59
|
+
if not np.array_equal(a, b, equal_nan=True):
|
|
60
|
+
diff += 1
|
|
61
|
+
|
|
62
|
+
print(f"Number of different rows: {diff}/{len(ds1)}")
|
|
63
|
+
|
|
64
|
+
if args.data:
|
|
65
|
+
print()
|
|
66
|
+
print("Data 2:")
|
|
67
|
+
print("-----")
|
|
68
|
+
print()
|
|
69
|
+
|
|
70
|
+
ds1 = zarr.open(args.dataset1, mode="r")
|
|
71
|
+
ds2 = zarr.open(args.dataset2, mode="r")
|
|
72
|
+
|
|
73
|
+
for name in (
|
|
74
|
+
"data",
|
|
75
|
+
"count",
|
|
76
|
+
"sums",
|
|
77
|
+
"squares",
|
|
78
|
+
"mean",
|
|
79
|
+
"stdev",
|
|
80
|
+
"minimum",
|
|
81
|
+
"maximum",
|
|
82
|
+
"latitudes",
|
|
83
|
+
"longitudes",
|
|
84
|
+
):
|
|
85
|
+
a1 = ds1[name]
|
|
86
|
+
a2 = ds2[name]
|
|
87
|
+
|
|
88
|
+
if len(a1) != len(a2):
|
|
89
|
+
print(f"{name}: lengths mismatch {len(a1)} != {len(a2)}")
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
diff = 0
|
|
93
|
+
for a, b in tqdm.tqdm(zip(a1, a2), leave=False):
|
|
94
|
+
if not np.array_equal(a, b, equal_nan=True):
|
|
95
|
+
if diff == 0:
|
|
96
|
+
print(f"\n{name}: first different row:")
|
|
97
|
+
print(a[a != b])
|
|
98
|
+
print(b[a != b])
|
|
99
|
+
|
|
100
|
+
diff += 1
|
|
101
|
+
|
|
102
|
+
print(f"{name}: {diff} different rows out of {len(a1)}")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
command = Compare
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
6
|
+
from concurrent.futures import as_completed
|
|
7
|
+
|
|
8
|
+
import tqdm
|
|
9
|
+
from anemoi.utils.humanize import seconds_to_human
|
|
10
|
+
|
|
11
|
+
from anemoi.datasets.create.trace import enable_trace
|
|
12
|
+
|
|
13
|
+
from . import Command
|
|
14
|
+
|
|
15
|
+
LOG = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def task(what, options, *args, **kwargs):
|
|
19
|
+
"""
|
|
20
|
+
Make sure `import Creator` is done in the sub-processes, and not in the main one.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
now = datetime.datetime.now()
|
|
24
|
+
LOG.debug(f"Task {what}({args},{kwargs}) starting")
|
|
25
|
+
|
|
26
|
+
from anemoi.datasets.create import Creator
|
|
27
|
+
|
|
28
|
+
if "trace" in options:
|
|
29
|
+
enable_trace(options["trace"])
|
|
30
|
+
|
|
31
|
+
c = Creator(**options)
|
|
32
|
+
result = getattr(c, what)(*args, **kwargs)
|
|
33
|
+
|
|
34
|
+
LOG.debug(f"Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
|
|
35
|
+
return result
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Create(Command):
|
|
39
|
+
"""Create a dataset."""
|
|
40
|
+
|
|
41
|
+
internal = True
|
|
42
|
+
timestamp = True
|
|
43
|
+
|
|
44
|
+
def add_arguments(self, command_parser):
|
|
45
|
+
command_parser.add_argument(
|
|
46
|
+
"--overwrite",
|
|
47
|
+
action="store_true",
|
|
48
|
+
help="Overwrite existing files. This will delete the target dataset if it already exists.",
|
|
49
|
+
)
|
|
50
|
+
command_parser.add_argument(
|
|
51
|
+
"--test",
|
|
52
|
+
action="store_true",
|
|
53
|
+
help="Build a small dataset, using only the first dates. And, when possible, using low resolution and less ensemble members.",
|
|
54
|
+
)
|
|
55
|
+
command_parser.add_argument("config", help="Configuration yaml file defining the recipe to create the dataset.")
|
|
56
|
+
command_parser.add_argument("path", help="Path to store the created data.")
|
|
57
|
+
group = command_parser.add_mutually_exclusive_group()
|
|
58
|
+
group.add_argument("--threads", help="Use `n` parallel thread workers.", type=int, default=0)
|
|
59
|
+
group.add_argument("--processes", help="Use `n` parallel process workers.", type=int, default=0)
|
|
60
|
+
command_parser.add_argument("--trace", action="store_true")
|
|
61
|
+
|
|
62
|
+
def run(self, args):
|
|
63
|
+
now = time.time()
|
|
64
|
+
if args.threads + args.processes:
|
|
65
|
+
self.parallel_create(args)
|
|
66
|
+
else:
|
|
67
|
+
self.serial_create(args)
|
|
68
|
+
LOG.info(f"Create completed in {seconds_to_human(time.time()-now)}")
|
|
69
|
+
|
|
70
|
+
def serial_create(self, args):
|
|
71
|
+
from anemoi.datasets.create import Creator
|
|
72
|
+
|
|
73
|
+
options = vars(args)
|
|
74
|
+
c = Creator(**options)
|
|
75
|
+
c.create()
|
|
76
|
+
|
|
77
|
+
def parallel_create(self, args):
|
|
78
|
+
"""Some modules, like fsspec do not work well with fork()
|
|
79
|
+
Other modules may not be thread safe. So we implement
|
|
80
|
+
parallel loadining using multiprocessing before any
|
|
81
|
+
of the modules are imported.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
options = vars(args)
|
|
85
|
+
parallel = args.threads + args.processes
|
|
86
|
+
args.use_threads = args.threads > 0
|
|
87
|
+
|
|
88
|
+
if args.use_threads:
|
|
89
|
+
ExecutorClass = ThreadPoolExecutor
|
|
90
|
+
else:
|
|
91
|
+
ExecutorClass = ProcessPoolExecutor
|
|
92
|
+
|
|
93
|
+
with ExecutorClass(max_workers=1) as executor:
|
|
94
|
+
total = executor.submit(task, "init", options).result()
|
|
95
|
+
|
|
96
|
+
futures = []
|
|
97
|
+
|
|
98
|
+
with ExecutorClass(max_workers=parallel) as executor:
|
|
99
|
+
for n in range(total):
|
|
100
|
+
futures.append(executor.submit(task, "load", options, parts=f"{n+1}/{total}"))
|
|
101
|
+
|
|
102
|
+
for future in tqdm.tqdm(
|
|
103
|
+
as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1
|
|
104
|
+
):
|
|
105
|
+
future.result()
|
|
106
|
+
|
|
107
|
+
with ExecutorClass(max_workers=1) as executor:
|
|
108
|
+
executor.submit(task, "statistics", options).result()
|
|
109
|
+
executor.submit(task, "additions", options).result()
|
|
110
|
+
executor.submit(task, "cleanup", options).result()
|
|
111
|
+
executor.submit(task, "verify", options).result()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
command = Create
|
|
@@ -16,7 +16,7 @@ import numpy as np
|
|
|
16
16
|
import semantic_version
|
|
17
17
|
import tqdm
|
|
18
18
|
from anemoi.utils.humanize import bytes
|
|
19
|
-
from anemoi.utils.humanize import
|
|
19
|
+
from anemoi.utils.humanize import bytes_to_human
|
|
20
20
|
from anemoi.utils.humanize import when
|
|
21
21
|
from anemoi.utils.text import dotted_line
|
|
22
22
|
from anemoi.utils.text import progress
|
|
@@ -215,9 +215,9 @@ class Version:
|
|
|
215
215
|
total_size, n = compute_directory_size(self.path)
|
|
216
216
|
|
|
217
217
|
if total_size is not None:
|
|
218
|
-
print(f"💽 Size : {bytes(total_size)} ({
|
|
218
|
+
print(f"💽 Size : {bytes(total_size)} ({bytes_to_human(total_size)})")
|
|
219
219
|
if n is not None:
|
|
220
|
-
print(f"📁 Files : {
|
|
220
|
+
print(f"📁 Files : {n:,}")
|
|
221
221
|
|
|
222
222
|
@property
|
|
223
223
|
def statistics(self):
|
|
@@ -7,8 +7,15 @@
|
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
#
|
|
9
9
|
|
|
10
|
+
import logging
|
|
10
11
|
import os
|
|
11
12
|
|
|
13
|
+
LOG = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _ignore(*args, **kwargs):
|
|
17
|
+
pass
|
|
18
|
+
|
|
12
19
|
|
|
13
20
|
class Creator:
|
|
14
21
|
def __init__(
|
|
@@ -16,19 +23,21 @@ class Creator:
|
|
|
16
23
|
path,
|
|
17
24
|
config=None,
|
|
18
25
|
cache=None,
|
|
19
|
-
|
|
26
|
+
use_threads=False,
|
|
20
27
|
statistics_tmp=None,
|
|
21
28
|
overwrite=False,
|
|
22
29
|
test=None,
|
|
30
|
+
progress=None,
|
|
23
31
|
**kwargs,
|
|
24
32
|
):
|
|
25
33
|
self.path = path # Output path
|
|
26
34
|
self.config = config
|
|
27
35
|
self.cache = cache
|
|
28
|
-
self.
|
|
36
|
+
self.use_threads = use_threads
|
|
29
37
|
self.statistics_tmp = statistics_tmp
|
|
30
38
|
self.overwrite = overwrite
|
|
31
39
|
self.test = test
|
|
40
|
+
self.progress = progress if progress is not None else _ignore
|
|
32
41
|
|
|
33
42
|
def init(self, check_name=False):
|
|
34
43
|
# check path
|
|
@@ -44,10 +53,11 @@ class Creator:
|
|
|
44
53
|
path=self.path,
|
|
45
54
|
config=self.config,
|
|
46
55
|
statistics_tmp=self.statistics_tmp,
|
|
47
|
-
|
|
56
|
+
use_threads=self.use_threads,
|
|
57
|
+
progress=self.progress,
|
|
48
58
|
test=self.test,
|
|
49
59
|
)
|
|
50
|
-
obj.initialise(check_name=check_name)
|
|
60
|
+
return obj.initialise(check_name=check_name)
|
|
51
61
|
|
|
52
62
|
def load(self, parts=None):
|
|
53
63
|
from .loaders import ContentLoader
|
|
@@ -56,7 +66,8 @@ class Creator:
|
|
|
56
66
|
loader = ContentLoader.from_dataset_config(
|
|
57
67
|
path=self.path,
|
|
58
68
|
statistics_tmp=self.statistics_tmp,
|
|
59
|
-
|
|
69
|
+
use_threads=self.use_threads,
|
|
70
|
+
progress=self.progress,
|
|
60
71
|
parts=parts,
|
|
61
72
|
)
|
|
62
73
|
loader.load()
|
|
@@ -66,7 +77,8 @@ class Creator:
|
|
|
66
77
|
|
|
67
78
|
loader = StatisticsAdder.from_dataset(
|
|
68
79
|
path=self.path,
|
|
69
|
-
|
|
80
|
+
use_threads=self.use_threads,
|
|
81
|
+
progress=self.progress,
|
|
70
82
|
statistics_tmp=self.statistics_tmp,
|
|
71
83
|
statistics_output=output,
|
|
72
84
|
recompute=False,
|
|
@@ -74,20 +86,21 @@ class Creator:
|
|
|
74
86
|
statistics_end=end,
|
|
75
87
|
)
|
|
76
88
|
loader.run()
|
|
89
|
+
assert loader.ready()
|
|
77
90
|
|
|
78
91
|
def size(self):
|
|
79
92
|
from .loaders import DatasetHandler
|
|
80
93
|
from .size import compute_directory_sizes
|
|
81
94
|
|
|
82
95
|
metadata = compute_directory_sizes(self.path)
|
|
83
|
-
handle = DatasetHandler.from_dataset(path=self.path,
|
|
96
|
+
handle = DatasetHandler.from_dataset(path=self.path, use_threads=self.use_threads)
|
|
84
97
|
handle.update_metadata(**metadata)
|
|
85
98
|
|
|
86
99
|
def cleanup(self):
|
|
87
100
|
from .loaders import DatasetHandlerWithStatistics
|
|
88
101
|
|
|
89
102
|
cleaner = DatasetHandlerWithStatistics.from_dataset(
|
|
90
|
-
path=self.path,
|
|
103
|
+
path=self.path, use_threads=self.use_threads, progress=self.progress, statistics_tmp=self.statistics_tmp
|
|
91
104
|
)
|
|
92
105
|
cleaner.tmp_statistics.delete()
|
|
93
106
|
cleaner.registry.clean()
|
|
@@ -103,15 +116,17 @@ class Creator:
|
|
|
103
116
|
from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
|
|
104
117
|
|
|
105
118
|
if statistics:
|
|
106
|
-
a = StatisticsAddition.from_dataset(path=self.path,
|
|
119
|
+
a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
|
|
107
120
|
a.initialise()
|
|
108
121
|
|
|
109
122
|
for d in delta:
|
|
110
123
|
try:
|
|
111
|
-
a = TendenciesStatisticsAddition.from_dataset(
|
|
124
|
+
a = TendenciesStatisticsAddition.from_dataset(
|
|
125
|
+
path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
|
|
126
|
+
)
|
|
112
127
|
a.initialise()
|
|
113
128
|
except TendenciesStatisticsDeltaNotMultipleOfFrequency:
|
|
114
|
-
|
|
129
|
+
LOG.info(f"Skipping delta={d} as it is not a multiple of the frequency.")
|
|
115
130
|
|
|
116
131
|
def run_additions(self, parts=None, delta=[1, 3, 6, 12, 24], statistics=True):
|
|
117
132
|
from .loaders import StatisticsAddition
|
|
@@ -119,15 +134,17 @@ class Creator:
|
|
|
119
134
|
from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
|
|
120
135
|
|
|
121
136
|
if statistics:
|
|
122
|
-
a = StatisticsAddition.from_dataset(path=self.path,
|
|
137
|
+
a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
|
|
123
138
|
a.run(parts)
|
|
124
139
|
|
|
125
140
|
for d in delta:
|
|
126
141
|
try:
|
|
127
|
-
a = TendenciesStatisticsAddition.from_dataset(
|
|
142
|
+
a = TendenciesStatisticsAddition.from_dataset(
|
|
143
|
+
path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
|
|
144
|
+
)
|
|
128
145
|
a.run(parts)
|
|
129
146
|
except TendenciesStatisticsDeltaNotMultipleOfFrequency:
|
|
130
|
-
|
|
147
|
+
LOG.debug(f"Skipping delta={d} as it is not a multiple of the frequency.")
|
|
131
148
|
|
|
132
149
|
def finalise_additions(self, delta=[1, 3, 6, 12, 24], statistics=True):
|
|
133
150
|
from .loaders import StatisticsAddition
|
|
@@ -135,15 +152,17 @@ class Creator:
|
|
|
135
152
|
from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
|
|
136
153
|
|
|
137
154
|
if statistics:
|
|
138
|
-
a = StatisticsAddition.from_dataset(path=self.path,
|
|
155
|
+
a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
|
|
139
156
|
a.finalise()
|
|
140
157
|
|
|
141
158
|
for d in delta:
|
|
142
159
|
try:
|
|
143
|
-
a = TendenciesStatisticsAddition.from_dataset(
|
|
160
|
+
a = TendenciesStatisticsAddition.from_dataset(
|
|
161
|
+
path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
|
|
162
|
+
)
|
|
144
163
|
a.finalise()
|
|
145
164
|
except TendenciesStatisticsDeltaNotMultipleOfFrequency:
|
|
146
|
-
|
|
165
|
+
LOG.debug(f"Skipping delta={d} as it is not a multiple of the frequency.")
|
|
147
166
|
|
|
148
167
|
def finalise(self, **kwargs):
|
|
149
168
|
self.statistics(**kwargs)
|
|
@@ -174,3 +193,10 @@ class Creator:
|
|
|
174
193
|
return True
|
|
175
194
|
except zarr.errors.PathNotFoundError:
|
|
176
195
|
return False
|
|
196
|
+
|
|
197
|
+
def verify(self):
|
|
198
|
+
from .loaders import DatasetVerifier
|
|
199
|
+
|
|
200
|
+
handle = DatasetVerifier.from_dataset(path=self.path, use_threads=self.use_threads)
|
|
201
|
+
|
|
202
|
+
handle.verify()
|
|
@@ -56,7 +56,7 @@ class DatasetName:
|
|
|
56
56
|
raise ValueError(self.error_message)
|
|
57
57
|
|
|
58
58
|
def _parse(self, name):
|
|
59
|
-
pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?(
|
|
59
|
+
pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)$"
|
|
60
60
|
match = re.match(pattern, name)
|
|
61
61
|
|
|
62
62
|
assert match, (name, pattern)
|
|
@@ -136,18 +136,19 @@ class StatisticsValueError(ValueError):
|
|
|
136
136
|
pass
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
def check_data_values(arr, *, name: str, log=[],
|
|
140
|
-
if allow_nan is False:
|
|
141
|
-
allow_nan = lambda x: False # noqa: E731
|
|
139
|
+
def check_data_values(arr, *, name: str, log=[], allow_nans=False):
|
|
142
140
|
|
|
143
|
-
if
|
|
141
|
+
if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
|
|
144
142
|
arr = arr[~np.isnan(arr)]
|
|
145
143
|
|
|
144
|
+
assert arr.size > 0, (name, *log)
|
|
145
|
+
|
|
146
146
|
min, max = arr.min(), arr.max()
|
|
147
147
|
assert not (np.isnan(arr).any()), (name, min, max, *log)
|
|
148
148
|
|
|
149
149
|
if min == 9999.0:
|
|
150
150
|
warnings.warn(f"Min value 9999 for {name}")
|
|
151
|
+
|
|
151
152
|
if max == 9999.0:
|
|
152
153
|
warnings.warn(f"Max value 9999 for {name}")
|
|
153
154
|
|