anemoi-datasets 0.4.0__tar.gz → 0.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/.pre-commit-config.yaml +2 -2
  2. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/.readthedocs.yaml +0 -1
  3. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/PKG-INFO +18 -3
  4. anemoi_datasets-0.4.3/docs/building/advanced-options.rst +3 -0
  5. anemoi_datasets-0.4.3/docs/building/sources/xarray.rst +6 -0
  6. anemoi_datasets-0.4.3/docs/building/sources/xarray.yaml +3 -0
  7. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources.rst +1 -0
  8. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/index.rst +1 -0
  9. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/pyproject.toml +21 -3
  10. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/_version.py +2 -2
  11. anemoi_datasets-0.4.3/src/anemoi/datasets/commands/compare.py +105 -0
  12. anemoi_datasets-0.4.3/src/anemoi/datasets/commands/create.py +114 -0
  13. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/commands/inspect.py +3 -3
  14. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/__init__.py +43 -17
  15. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/check.py +6 -5
  16. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/chunks.py +1 -1
  17. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/config.py +5 -26
  18. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/rename.py +9 -1
  19. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/rotate_winds.py +10 -1
  20. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/__init__.py +47 -0
  21. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/accumulations.py +11 -41
  22. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/constants.py +3 -0
  23. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/grib.py +4 -0
  24. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/hindcasts.py +105 -0
  25. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/mars.py +53 -22
  26. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/netcdf.py +14 -0
  27. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/opendap.py +3 -2
  28. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
  29. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
  30. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
  31. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
  32. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
  33. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
  34. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
  35. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
  36. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
  37. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
  38. anemoi_datasets-0.4.0/src/anemoi/datasets/create/functions/sources/__init__.py → anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/xarray_zarr.py +7 -0
  39. anemoi_datasets-0.4.3/src/anemoi/datasets/create/functions/sources/zenodo.py +40 -0
  40. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/input.py +290 -172
  41. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/loaders.py +120 -71
  42. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/patch.py +17 -14
  43. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/persistent.py +1 -1
  44. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/size.py +4 -5
  45. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/statistics/__init__.py +49 -16
  46. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/template.py +11 -61
  47. anemoi_datasets-0.4.3/src/anemoi/datasets/create/trace.py +91 -0
  48. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/utils.py +0 -48
  49. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/zarr.py +24 -10
  50. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/misc.py +9 -37
  51. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/stores.py +29 -14
  52. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/dates/__init__.py +7 -1
  53. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/dates/groups.py +3 -0
  54. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/PKG-INFO +18 -3
  55. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/SOURCES.txt +20 -1
  56. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/requires.txt +18 -2
  57. anemoi_datasets-0.4.3/tests/xarray/test_kerchunk.py +36 -0
  58. anemoi_datasets-0.4.3/tests/xarray/test_netcdf.py +55 -0
  59. anemoi_datasets-0.4.3/tests/xarray/test_opendap.py +24 -0
  60. anemoi_datasets-0.4.3/tests/xarray/test_zarr.py +54 -0
  61. anemoi_datasets-0.4.0/docs/requirements.txt +0 -10
  62. anemoi_datasets-0.4.0/src/anemoi/datasets/commands/compare.py +0 -46
  63. anemoi_datasets-0.4.0/src/anemoi/datasets/commands/create.py +0 -33
  64. anemoi_datasets-0.4.0/src/anemoi/datasets/create/functions/sources/hindcasts.py +0 -450
  65. anemoi_datasets-0.4.0/src/anemoi/datasets/create/functions/sources/netcdf.py +0 -72
  66. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/.github/workflows/python-publish.yml +0 -0
  67. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/.gitignore +0 -0
  68. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/.vscode/spellright.dict +0 -0
  69. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/LICENSE +0 -0
  70. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/README.md +0 -0
  71. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/Makefile +0 -0
  72. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/_static/logo.png +0 -0
  73. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/_static/style.css +0 -0
  74. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/_templates/.gitkeep +0 -0
  75. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/apply-fmt.sh +0 -0
  76. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/empty.rst +0 -0
  77. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/noop.rst +0 -0
  78. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/rename.rst +0 -0
  79. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/rotate_winds.rst +0 -0
  80. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/select.rst +0 -0
  81. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters/unrotate_winds.rst +0 -0
  82. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/filters.rst +0 -0
  83. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/handling-missing-dates.rst +0 -0
  84. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/handling-missing-values.rst +0 -0
  85. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/introduction.rst +0 -0
  86. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/naming-variables.rst +0 -0
  87. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/operations.rst +0 -0
  88. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/accumulations.rst +0 -0
  89. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/accumulations1.yaml +0 -0
  90. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/accumulations2.yaml +0 -0
  91. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/forcings.rst +0 -0
  92. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/forcings.yaml +0 -0
  93. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/grib.rst +0 -0
  94. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/hindcasts.rst +0 -0
  95. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/mars.rst +0 -0
  96. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/mars1.yaml +0 -0
  97. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/mars2.yaml +0 -0
  98. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/netcdf.rst +0 -0
  99. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/netcdf.yaml +0 -0
  100. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/opendap.rst +0 -0
  101. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/opendap.yaml +0 -0
  102. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/recentre.rst +0 -0
  103. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/grib1.yaml +0 -0
  104. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/grib2.yaml +0 -0
  105. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/grib3.yaml +0 -0
  106. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/grib4.yaml +0 -0
  107. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/hindcasts.yaml +0 -0
  108. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/sources/yaml/recentre.yaml +0 -0
  109. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/statistics.rst +0 -0
  110. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/syntax.rst +0 -0
  111. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/syntax.yaml +0 -0
  112. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/Makefile +0 -0
  113. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building1.txt +0 -0
  114. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building1.yaml +0 -0
  115. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building2.txt +0 -0
  116. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building2.yaml +0 -0
  117. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building3.txt +0 -0
  118. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/building3.yaml +0 -0
  119. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/concat.yaml +0 -0
  120. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/hindcasts.yaml +0 -0
  121. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/input.yaml +0 -0
  122. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/missing_dates.yaml +0 -0
  123. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/nan.yaml +0 -0
  124. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/building/yaml/pipe.yaml +0 -0
  125. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/check-index.sh +0 -0
  126. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/compare.rst +0 -0
  127. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/copy.rst +0 -0
  128. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/create.rst +0 -0
  129. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/inspect.rst +0 -0
  130. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/introduction.rst +0 -0
  131. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/cli/scan.rst +0 -0
  132. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/conf.py +0 -0
  133. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/images.pptx +0 -0
  134. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/installing.rst +0 -0
  135. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/overview.rst +0 -0
  136. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/overview_.py +0 -0
  137. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/matrix.excalidraw +0 -0
  138. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/matrix.png +0 -0
  139. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/overview.excalidraw +0 -0
  140. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/overview.png +0 -0
  141. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/recipe.excalidraw +0 -0
  142. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/schemas/recipe.png +0 -0
  143. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/test.ipynb +0 -0
  144. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/area1_.py +0 -0
  145. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/area2_.py +0 -0
  146. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/chain_.py +0 -0
  147. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/combine_example.py +0 -0
  148. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/concat1.py +0 -0
  149. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/cutout_.py +0 -0
  150. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/drop_.py +0 -0
  151. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/end_.py +0 -0
  152. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/ensembles1_.py +0 -0
  153. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/frequency_.py +0 -0
  154. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/grids1_.py +0 -0
  155. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/join1.py +0 -0
  156. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/matching0_.py +0 -0
  157. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/matching1_.py +0 -0
  158. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/matching2_.py +0 -0
  159. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/matching3_.py +0 -0
  160. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/matching4_.py +0 -0
  161. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/misc1.py +0 -0
  162. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/misc2.py +0 -0
  163. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/missing_.py +0 -0
  164. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_cloud.py +0 -0
  165. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_combine1_.py +0 -0
  166. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_combine2_.py +0 -0
  167. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_complex.py +0 -0
  168. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_dict_.py +0 -0
  169. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_first_.py +0 -0
  170. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_list_.py +0 -0
  171. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_name.py +0 -0
  172. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_other.py +0 -0
  173. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_path.py +0 -0
  174. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/open_yaml_.py +0 -0
  175. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/rename_.py +0 -0
  176. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/reorder1_.py +0 -0
  177. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/reorder2_.py +0 -0
  178. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/select1_.py +0 -0
  179. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/select2_.py +0 -0
  180. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/shuffle_.py +0 -0
  181. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/some_attributes_.py +0 -0
  182. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/start_.py +0 -0
  183. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/statistics_.py +0 -0
  184. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/subset_example.py +0 -0
  185. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/thinning_.py +0 -0
  186. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/zip1_.py +0 -0
  187. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/code/zip2_.py +0 -0
  188. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/combining.rst +0 -0
  189. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/configuration.rst +0 -0
  190. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/configuration.toml +0 -0
  191. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/grids.rst +0 -0
  192. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/area-1.png +0 -0
  193. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/concat.png +0 -0
  194. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/cutout-1.png +0 -0
  195. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/cutout-2.png +0 -0
  196. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/cutout-3.png +0 -0
  197. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/cutout-4.png +0 -0
  198. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/join.png +0 -0
  199. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/overlay.png +0 -0
  200. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/thinning-after.png +0 -0
  201. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/images/thinning-before.png +0 -0
  202. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/introduction.rst +0 -0
  203. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/matching.rst +0 -0
  204. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/methods.rst +0 -0
  205. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/miscellaneous.rst +0 -0
  206. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/opening.rst +0 -0
  207. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/other.rst +0 -0
  208. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/selecting.rst +0 -0
  209. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/statistics.rst +0 -0
  210. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/docs/using/subsetting.rst +0 -0
  211. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/setup.cfg +0 -0
  212. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/__init__.py +0 -0
  213. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/__main__.py +0 -0
  214. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/commands/__init__.py +0 -0
  215. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/commands/copy.py +0 -0
  216. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/commands/scan.py +0 -0
  217. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/compute/__init__.py +0 -0
  218. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/compute/recentre.py +0 -0
  219. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/__init__.py +0 -0
  220. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/__init__.py +0 -0
  221. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/empty.py +0 -0
  222. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/noop.py +0 -0
  223. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/filters/unrotate_winds.py +0 -0
  224. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/empty.py +0 -0
  225. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/forcings.py +0 -0
  226. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/recentre.py +0 -0
  227. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/source.py +0 -0
  228. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/functions/sources/tendencies.py +0 -0
  229. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/statistics/summary.py +0 -0
  230. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/create/writer.py +0 -0
  231. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/__init__.py +0 -0
  232. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/concat.py +0 -0
  233. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/dataset.py +0 -0
  234. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/debug.css +0 -0
  235. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/debug.py +0 -0
  236. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/ensemble.py +0 -0
  237. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/forwards.py +0 -0
  238. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/grids.py +0 -0
  239. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/indexing.py +0 -0
  240. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/join.py +0 -0
  241. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/masked.py +0 -0
  242. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/select.py +0 -0
  243. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/statistics.py +0 -0
  244. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/subset.py +0 -0
  245. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/data/unchecked.py +0 -0
  246. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/grids.py +0 -0
  247. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi/datasets/utils/__init__.py +0 -0
  248. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/dependency_links.txt +0 -0
  249. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/entry_points.txt +0 -0
  250. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/src/anemoi_datasets.egg-info/top_level.txt +0 -0
  251. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/concat.yaml +0 -0
  252. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/data_sources.yaml +0 -0
  253. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/join.yaml +0 -0
  254. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/missing.yaml +0 -0
  255. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/nan.yaml +0 -0
  256. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/pipe.yaml +0 -0
  257. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/recentre.yaml +0 -0
  258. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create/test_create.py +0 -0
  259. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create-perturbations-full.yaml +0 -0
  260. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/create-shift.yaml +0 -0
  261. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/test_chunks.py +0 -0
  262. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/test_data.py +0 -0
  263. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/test_dates.py +0 -0
  264. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tests/test_indexing.py +0 -0
  265. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/.gitignore +0 -0
  266. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/examples/Makefile +0 -0
  267. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/examples/an-oper-2023-2023-2p5-6h-v1.yaml +0 -0
  268. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/grids/Makefile +0 -0
  269. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/grids/grids.ipynb +0 -0
  270. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/grids/grids1.yaml +0 -0
  271. {anemoi_datasets-0.4.0 → anemoi_datasets-0.4.3}/tools/grids/grids2.yaml +0 -0
@@ -37,8 +37,8 @@ repos:
37
37
  rev: v0.4.6
38
38
  hooks:
39
39
  - id: ruff
40
- # Next line if for documenation cod snippets
41
- exclude: '^[^_].*_\.py$'
40
+ # Next line is to exclude for documentation code snippets
41
+ exclude: 'docs/(.*/)?[a-z]\w+_.py$'
42
42
  args:
43
43
  - --line-length=120
44
44
  - --fix
@@ -10,7 +10,6 @@ sphinx:
10
10
 
11
11
  python:
12
12
  install:
13
- - requirements: docs/requirements.txt
14
13
  - method: pip
15
14
  path: .
16
15
  extra_requirements:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: anemoi-datasets
3
- Version: 0.4.0
3
+ Version: 0.4.3
4
4
  Summary: A package to hold various functions to support training of ML models on ECMWF data.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License: Apache License
@@ -223,19 +223,22 @@ Classifier: Programming Language :: Python :: Implementation :: CPython
223
223
  Classifier: Programming Language :: Python :: Implementation :: PyPy
224
224
  Requires-Python: >=3.9
225
225
  License-File: LICENSE
226
- Requires-Dist: anemoi-utils[provenance]>=0.3.5
226
+ Requires-Dist: anemoi-utils[provenance]>=0.3.13
227
227
  Requires-Dist: numpy
228
228
  Requires-Dist: pyyaml
229
229
  Requires-Dist: semantic-version
230
230
  Requires-Dist: tqdm
231
- Requires-Dist: zarr<=2.17
231
+ Requires-Dist: zarr
232
232
  Provides-Extra: all
233
+ Requires-Dist: aiohttp; extra == "all"
233
234
  Requires-Dist: boto3; extra == "all"
234
235
  Requires-Dist: earthkit-data[mars]>=0.9; extra == "all"
235
236
  Requires-Dist: earthkit-geo>=0.2; extra == "all"
236
237
  Requires-Dist: earthkit-meteo; extra == "all"
237
238
  Requires-Dist: ecmwflibs>=0.6.3; extra == "all"
238
239
  Requires-Dist: entrypoints; extra == "all"
240
+ Requires-Dist: gcsfs; extra == "all"
241
+ Requires-Dist: kerchunk; extra == "all"
239
242
  Requires-Dist: pyproj; extra == "all"
240
243
  Requires-Dist: requests; extra == "all"
241
244
  Requires-Dist: s3fs; extra == "all"
@@ -247,26 +250,38 @@ Requires-Dist: ecmwflibs>=0.6.3; extra == "create"
247
250
  Requires-Dist: entrypoints; extra == "create"
248
251
  Requires-Dist: pyproj; extra == "create"
249
252
  Provides-Extra: dev
253
+ Requires-Dist: aiohttp; extra == "dev"
250
254
  Requires-Dist: boto3; extra == "dev"
251
255
  Requires-Dist: earthkit-data[mars]>=0.9; extra == "dev"
252
256
  Requires-Dist: earthkit-geo>=0.2; extra == "dev"
253
257
  Requires-Dist: earthkit-meteo; extra == "dev"
254
258
  Requires-Dist: ecmwflibs>=0.6.3; extra == "dev"
255
259
  Requires-Dist: entrypoints; extra == "dev"
260
+ Requires-Dist: gcsfs; extra == "dev"
261
+ Requires-Dist: kerchunk; extra == "dev"
256
262
  Requires-Dist: nbsphinx; extra == "dev"
257
263
  Requires-Dist: pandoc; extra == "dev"
258
264
  Requires-Dist: pyproj; extra == "dev"
259
265
  Requires-Dist: pytest; extra == "dev"
260
266
  Requires-Dist: requests; extra == "dev"
267
+ Requires-Dist: rstfmt; extra == "dev"
261
268
  Requires-Dist: s3fs; extra == "dev"
262
269
  Requires-Dist: sphinx; extra == "dev"
270
+ Requires-Dist: sphinx-argparse<0.5; extra == "dev"
263
271
  Requires-Dist: sphinx-rtd-theme; extra == "dev"
264
272
  Provides-Extra: docs
265
273
  Requires-Dist: nbsphinx; extra == "docs"
266
274
  Requires-Dist: pandoc; extra == "docs"
275
+ Requires-Dist: rstfmt; extra == "docs"
267
276
  Requires-Dist: sphinx; extra == "docs"
277
+ Requires-Dist: sphinx-argparse<0.5; extra == "docs"
268
278
  Requires-Dist: sphinx-rtd-theme; extra == "docs"
279
+ Provides-Extra: kerchunk
280
+ Requires-Dist: gcsfs; extra == "kerchunk"
281
+ Requires-Dist: kerchunk; extra == "kerchunk"
282
+ Requires-Dist: s3fs; extra == "kerchunk"
269
283
  Provides-Extra: remote
284
+ Requires-Dist: aiohttp; extra == "remote"
270
285
  Requires-Dist: boto3; extra == "remote"
271
286
  Requires-Dist: requests; extra == "remote"
272
287
  Requires-Dist: s3fs; extra == "remote"
@@ -0,0 +1,3 @@
1
+ ##################
2
+ Advanced Options
3
+ ##################
@@ -0,0 +1,6 @@
1
+ ########
2
+ xarray
3
+ ########
4
+
5
+ .. literalinclude:: xarray.yaml
6
+ :language: yaml
@@ -0,0 +1,3 @@
1
+ input:
2
+ xarray:
3
+ url: https://...
@@ -23,6 +23,7 @@ The following `sources` are currently available:
23
23
  sources/mars
24
24
  sources/grib
25
25
  sources/netcdf
26
+ sources/xarray
26
27
  sources/opendap
27
28
  sources/forcings
28
29
  sources/accumulations
@@ -89,6 +89,7 @@ datasets <building-introduction>`.
89
89
  building/handling-missing-dates
90
90
  building/handling-missing-values
91
91
  building/statistics
92
+ building/advanced-options
92
93
 
93
94
  **Command line tool**
94
95
 
@@ -50,22 +50,24 @@ dynamic = [
50
50
  "version",
51
51
  ]
52
52
  dependencies = [
53
- "anemoi-utils[provenance]>=0.3.5",
53
+ "anemoi-utils[provenance]>=0.3.13",
54
54
  "numpy",
55
55
  "pyyaml",
56
56
  "semantic-version",
57
57
  "tqdm",
58
- "zarr<=2.17",
59
-
58
+ "zarr",
60
59
  ]
61
60
 
62
61
  optional-dependencies.all = [
62
+ "aiohttp",
63
63
  "boto3",
64
64
  "earthkit-data[mars]>=0.9",
65
65
  "earthkit-geo>=0.2",
66
66
  "earthkit-meteo",
67
67
  "ecmwflibs>=0.6.3",
68
68
  "entrypoints",
69
+ "gcsfs",
70
+ "kerchunk",
69
71
  "pyproj",
70
72
  "requests",
71
73
  "s3fs",
@@ -81,37 +83,53 @@ optional-dependencies.create = [
81
83
  ]
82
84
 
83
85
  optional-dependencies.dev = [
86
+ "aiohttp",
84
87
  "boto3",
85
88
  "earthkit-data[mars]>=0.9",
86
89
  "earthkit-geo>=0.2",
87
90
  "earthkit-meteo",
88
91
  "ecmwflibs>=0.6.3",
89
92
  "entrypoints",
93
+ "gcsfs",
94
+ "kerchunk",
90
95
  "nbsphinx",
91
96
  "pandoc",
92
97
  "pyproj",
93
98
  "pytest",
94
99
  "requests",
100
+ "rstfmt",
95
101
  "s3fs",
96
102
  "sphinx",
103
+ "sphinx-argparse<0.5",
97
104
  "sphinx-rtd-theme",
98
105
  ]
99
106
 
100
107
  optional-dependencies.docs = [
101
108
  "nbsphinx",
102
109
  "pandoc",
110
+ "rstfmt",
103
111
  "sphinx",
112
+ "sphinx-argparse<0.5",
104
113
  "sphinx-rtd-theme",
105
114
  ]
106
115
 
116
+ optional-dependencies.kerchunk = [
117
+ "gcsfs",
118
+ "kerchunk",
119
+ "s3fs",
120
+ ]
121
+
107
122
  optional-dependencies.remote = [
123
+ "aiohttp",
108
124
  "boto3",
109
125
  "requests",
110
126
  "s3fs",
111
127
  ]
128
+
112
129
  optional-dependencies.tests = [
113
130
  "pytest",
114
131
  ]
132
+
115
133
  urls.Documentation = "https://anemoi-datasets.readthedocs.io/"
116
134
  urls.Homepage = "https://github.com/ecmwf/anemoi-datasets/"
117
135
  urls.Issues = "https://github.com/ecmwf/anemoi-datasets/issues"
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.4.0'
16
- __version_tuple__ = version_tuple = (0, 4, 0)
15
+ __version__ = version = '0.4.3'
16
+ __version_tuple__ = version_tuple = (0, 4, 3)
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env python
2
+ # (C) Copyright 2024 ECMWF.
3
+ #
4
+ # This software is licensed under the terms of the Apache Licence Version 2.0
5
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
6
+ # In applying this licence, ECMWF does not waive the privileges and immunities
7
+ # granted to it by virtue of its status as an intergovernmental organisation
8
+ # nor does it submit to any jurisdiction.
9
+ #
10
+
11
+ import numpy as np
12
+ import tqdm
13
+ import zarr
14
+
15
+ from anemoi.datasets import open_dataset
16
+
17
+ from . import Command
18
+
19
+
20
+ class Compare(Command):
21
+ """Compare two datasets. This command compares the variables in two datasets and prints the mean of the common variables. It does not compare the data itself (yet)."""
22
+
23
+ def add_arguments(self, command_parser):
24
+ command_parser.add_argument("dataset1")
25
+ command_parser.add_argument("dataset2")
26
+ command_parser.add_argument("--data", action="store_true", help="Compare the data.")
27
+ command_parser.add_argument("--statistics", action="store_true", help="Compare the statistics.")
28
+
29
+ def run(self, args):
30
+ ds1 = open_dataset(args.dataset1)
31
+ ds2 = open_dataset(args.dataset2)
32
+
33
+ v1 = set(ds1.variables)
34
+ v2 = set(ds2.variables)
35
+
36
+ print("Only in dataset 1:", ", ".join(sorted(v1 - v2)))
37
+ print("Only in dataset 2:", ", ".join(sorted(v2 - v1)))
38
+ print()
39
+ common = sorted(v1 & v2)
40
+ print("Common:")
41
+ print("-------")
42
+ print()
43
+
44
+ for v in common:
45
+ print(
46
+ f"{v:14}",
47
+ f"{ds1.statistics['mean'][ds1.name_to_index[v]]:14g}",
48
+ f"{ds2.statistics['mean'][ds2.name_to_index[v]]:14g}",
49
+ )
50
+
51
+ if args.data:
52
+ print()
53
+ print("Data:")
54
+ print("-----")
55
+ print()
56
+
57
+ diff = 0
58
+ for a, b in tqdm.tqdm(zip(ds1, ds2)):
59
+ if not np.array_equal(a, b, equal_nan=True):
60
+ diff += 1
61
+
62
+ print(f"Number of different rows: {diff}/{len(ds1)}")
63
+
64
+ if args.data:
65
+ print()
66
+ print("Data 2:")
67
+ print("-----")
68
+ print()
69
+
70
+ ds1 = zarr.open(args.dataset1, mode="r")
71
+ ds2 = zarr.open(args.dataset2, mode="r")
72
+
73
+ for name in (
74
+ "data",
75
+ "count",
76
+ "sums",
77
+ "squares",
78
+ "mean",
79
+ "stdev",
80
+ "minimum",
81
+ "maximum",
82
+ "latitudes",
83
+ "longitudes",
84
+ ):
85
+ a1 = ds1[name]
86
+ a2 = ds2[name]
87
+
88
+ if len(a1) != len(a2):
89
+ print(f"{name}: lengths mismatch {len(a1)} != {len(a2)}")
90
+ continue
91
+
92
+ diff = 0
93
+ for a, b in tqdm.tqdm(zip(a1, a2), leave=False):
94
+ if not np.array_equal(a, b, equal_nan=True):
95
+ if diff == 0:
96
+ print(f"\n{name}: first different row:")
97
+ print(a[a != b])
98
+ print(b[a != b])
99
+
100
+ diff += 1
101
+
102
+ print(f"{name}: {diff} different rows out of {len(a1)}")
103
+
104
+
105
+ command = Compare
@@ -0,0 +1,114 @@
1
+ import datetime
2
+ import logging
3
+ import time
4
+ from concurrent.futures import ProcessPoolExecutor
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from concurrent.futures import as_completed
7
+
8
+ import tqdm
9
+ from anemoi.utils.humanize import seconds_to_human
10
+
11
+ from anemoi.datasets.create.trace import enable_trace
12
+
13
+ from . import Command
14
+
15
+ LOG = logging.getLogger(__name__)
16
+
17
+
18
+ def task(what, options, *args, **kwargs):
19
+ """
20
+ Make sure `import Creator` is done in the sub-processes, and not in the main one.
21
+ """
22
+
23
+ now = datetime.datetime.now()
24
+ LOG.debug(f"Task {what}({args},{kwargs}) starting")
25
+
26
+ from anemoi.datasets.create import Creator
27
+
28
+ if "trace" in options:
29
+ enable_trace(options["trace"])
30
+
31
+ c = Creator(**options)
32
+ result = getattr(c, what)(*args, **kwargs)
33
+
34
+ LOG.debug(f"Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
35
+ return result
36
+
37
+
38
+ class Create(Command):
39
+ """Create a dataset."""
40
+
41
+ internal = True
42
+ timestamp = True
43
+
44
+ def add_arguments(self, command_parser):
45
+ command_parser.add_argument(
46
+ "--overwrite",
47
+ action="store_true",
48
+ help="Overwrite existing files. This will delete the target dataset if it already exists.",
49
+ )
50
+ command_parser.add_argument(
51
+ "--test",
52
+ action="store_true",
53
+ help="Build a small dataset, using only the first dates. And, when possible, using low resolution and less ensemble members.",
54
+ )
55
+ command_parser.add_argument("config", help="Configuration yaml file defining the recipe to create the dataset.")
56
+ command_parser.add_argument("path", help="Path to store the created data.")
57
+ group = command_parser.add_mutually_exclusive_group()
58
+ group.add_argument("--threads", help="Use `n` parallel thread workers.", type=int, default=0)
59
+ group.add_argument("--processes", help="Use `n` parallel process workers.", type=int, default=0)
60
+ command_parser.add_argument("--trace", action="store_true")
61
+
62
+ def run(self, args):
63
+ now = time.time()
64
+ if args.threads + args.processes:
65
+ self.parallel_create(args)
66
+ else:
67
+ self.serial_create(args)
68
+ LOG.info(f"Create completed in {seconds_to_human(time.time()-now)}")
69
+
70
+ def serial_create(self, args):
71
+ from anemoi.datasets.create import Creator
72
+
73
+ options = vars(args)
74
+ c = Creator(**options)
75
+ c.create()
76
+
77
+ def parallel_create(self, args):
78
+ """Some modules, like fsspec do not work well with fork()
79
+ Other modules may not be thread safe. So we implement
80
+ parallel loadining using multiprocessing before any
81
+ of the modules are imported.
82
+ """
83
+
84
+ options = vars(args)
85
+ parallel = args.threads + args.processes
86
+ args.use_threads = args.threads > 0
87
+
88
+ if args.use_threads:
89
+ ExecutorClass = ThreadPoolExecutor
90
+ else:
91
+ ExecutorClass = ProcessPoolExecutor
92
+
93
+ with ExecutorClass(max_workers=1) as executor:
94
+ total = executor.submit(task, "init", options).result()
95
+
96
+ futures = []
97
+
98
+ with ExecutorClass(max_workers=parallel) as executor:
99
+ for n in range(total):
100
+ futures.append(executor.submit(task, "load", options, parts=f"{n+1}/{total}"))
101
+
102
+ for future in tqdm.tqdm(
103
+ as_completed(futures), desc="Loading", total=len(futures), colour="green", position=parallel + 1
104
+ ):
105
+ future.result()
106
+
107
+ with ExecutorClass(max_workers=1) as executor:
108
+ executor.submit(task, "statistics", options).result()
109
+ executor.submit(task, "additions", options).result()
110
+ executor.submit(task, "cleanup", options).result()
111
+ executor.submit(task, "verify", options).result()
112
+
113
+
114
+ command = Create
@@ -16,7 +16,7 @@ import numpy as np
16
16
  import semantic_version
17
17
  import tqdm
18
18
  from anemoi.utils.humanize import bytes
19
- from anemoi.utils.humanize import number
19
+ from anemoi.utils.humanize import bytes_to_human
20
20
  from anemoi.utils.humanize import when
21
21
  from anemoi.utils.text import dotted_line
22
22
  from anemoi.utils.text import progress
@@ -215,9 +215,9 @@ class Version:
215
215
  total_size, n = compute_directory_size(self.path)
216
216
 
217
217
  if total_size is not None:
218
- print(f"💽 Size : {bytes(total_size)} ({number(total_size)})")
218
+ print(f"💽 Size : {bytes(total_size)} ({bytes_to_human(total_size)})")
219
219
  if n is not None:
220
- print(f"📁 Files : {number(n)}")
220
+ print(f"📁 Files : {n:,}")
221
221
 
222
222
  @property
223
223
  def statistics(self):
@@ -7,8 +7,15 @@
7
7
  # nor does it submit to any jurisdiction.
8
8
  #
9
9
 
10
+ import logging
10
11
  import os
11
12
 
13
+ LOG = logging.getLogger(__name__)
14
+
15
+
16
+ def _ignore(*args, **kwargs):
17
+ pass
18
+
12
19
 
13
20
  class Creator:
14
21
  def __init__(
@@ -16,19 +23,21 @@ class Creator:
16
23
  path,
17
24
  config=None,
18
25
  cache=None,
19
- print=print,
26
+ use_threads=False,
20
27
  statistics_tmp=None,
21
28
  overwrite=False,
22
29
  test=None,
30
+ progress=None,
23
31
  **kwargs,
24
32
  ):
25
33
  self.path = path # Output path
26
34
  self.config = config
27
35
  self.cache = cache
28
- self.print = print
36
+ self.use_threads = use_threads
29
37
  self.statistics_tmp = statistics_tmp
30
38
  self.overwrite = overwrite
31
39
  self.test = test
40
+ self.progress = progress if progress is not None else _ignore
32
41
 
33
42
  def init(self, check_name=False):
34
43
  # check path
@@ -44,10 +53,11 @@ class Creator:
44
53
  path=self.path,
45
54
  config=self.config,
46
55
  statistics_tmp=self.statistics_tmp,
47
- print=self.print,
56
+ use_threads=self.use_threads,
57
+ progress=self.progress,
48
58
  test=self.test,
49
59
  )
50
- obj.initialise(check_name=check_name)
60
+ return obj.initialise(check_name=check_name)
51
61
 
52
62
  def load(self, parts=None):
53
63
  from .loaders import ContentLoader
@@ -56,7 +66,8 @@ class Creator:
56
66
  loader = ContentLoader.from_dataset_config(
57
67
  path=self.path,
58
68
  statistics_tmp=self.statistics_tmp,
59
- print=self.print,
69
+ use_threads=self.use_threads,
70
+ progress=self.progress,
60
71
  parts=parts,
61
72
  )
62
73
  loader.load()
@@ -66,7 +77,8 @@ class Creator:
66
77
 
67
78
  loader = StatisticsAdder.from_dataset(
68
79
  path=self.path,
69
- print=self.print,
80
+ use_threads=self.use_threads,
81
+ progress=self.progress,
70
82
  statistics_tmp=self.statistics_tmp,
71
83
  statistics_output=output,
72
84
  recompute=False,
@@ -74,20 +86,21 @@ class Creator:
74
86
  statistics_end=end,
75
87
  )
76
88
  loader.run()
89
+ assert loader.ready()
77
90
 
78
91
  def size(self):
79
92
  from .loaders import DatasetHandler
80
93
  from .size import compute_directory_sizes
81
94
 
82
95
  metadata = compute_directory_sizes(self.path)
83
- handle = DatasetHandler.from_dataset(path=self.path, print=self.print)
96
+ handle = DatasetHandler.from_dataset(path=self.path, use_threads=self.use_threads)
84
97
  handle.update_metadata(**metadata)
85
98
 
86
99
  def cleanup(self):
87
100
  from .loaders import DatasetHandlerWithStatistics
88
101
 
89
102
  cleaner = DatasetHandlerWithStatistics.from_dataset(
90
- path=self.path, print=self.print, statistics_tmp=self.statistics_tmp
103
+ path=self.path, use_threads=self.use_threads, progress=self.progress, statistics_tmp=self.statistics_tmp
91
104
  )
92
105
  cleaner.tmp_statistics.delete()
93
106
  cleaner.registry.clean()
@@ -103,15 +116,17 @@ class Creator:
103
116
  from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
104
117
 
105
118
  if statistics:
106
- a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
119
+ a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
107
120
  a.initialise()
108
121
 
109
122
  for d in delta:
110
123
  try:
111
- a = TendenciesStatisticsAddition.from_dataset(path=self.path, print=self.print, delta=d)
124
+ a = TendenciesStatisticsAddition.from_dataset(
125
+ path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
126
+ )
112
127
  a.initialise()
113
128
  except TendenciesStatisticsDeltaNotMultipleOfFrequency:
114
- self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
129
+ LOG.info(f"Skipping delta={d} as it is not a multiple of the frequency.")
115
130
 
116
131
  def run_additions(self, parts=None, delta=[1, 3, 6, 12, 24], statistics=True):
117
132
  from .loaders import StatisticsAddition
@@ -119,15 +134,17 @@ class Creator:
119
134
  from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
120
135
 
121
136
  if statistics:
122
- a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
137
+ a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
123
138
  a.run(parts)
124
139
 
125
140
  for d in delta:
126
141
  try:
127
- a = TendenciesStatisticsAddition.from_dataset(path=self.path, print=self.print, delta=d)
142
+ a = TendenciesStatisticsAddition.from_dataset(
143
+ path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
144
+ )
128
145
  a.run(parts)
129
146
  except TendenciesStatisticsDeltaNotMultipleOfFrequency:
130
- self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
147
+ LOG.debug(f"Skipping delta={d} as it is not a multiple of the frequency.")
131
148
 
132
149
  def finalise_additions(self, delta=[1, 3, 6, 12, 24], statistics=True):
133
150
  from .loaders import StatisticsAddition
@@ -135,15 +152,17 @@ class Creator:
135
152
  from .loaders import TendenciesStatisticsDeltaNotMultipleOfFrequency
136
153
 
137
154
  if statistics:
138
- a = StatisticsAddition.from_dataset(path=self.path, print=self.print)
155
+ a = StatisticsAddition.from_dataset(path=self.path, use_threads=self.use_threads)
139
156
  a.finalise()
140
157
 
141
158
  for d in delta:
142
159
  try:
143
- a = TendenciesStatisticsAddition.from_dataset(path=self.path, print=self.print, delta=d)
160
+ a = TendenciesStatisticsAddition.from_dataset(
161
+ path=self.path, use_threads=self.use_threads, progress=self.progress, delta=d
162
+ )
144
163
  a.finalise()
145
164
  except TendenciesStatisticsDeltaNotMultipleOfFrequency:
146
- self.print(f"Skipping delta={d} as it is not a multiple of the frequency.")
165
+ LOG.debug(f"Skipping delta={d} as it is not a multiple of the frequency.")
147
166
 
148
167
  def finalise(self, **kwargs):
149
168
  self.statistics(**kwargs)
@@ -174,3 +193,10 @@ class Creator:
174
193
  return True
175
194
  except zarr.errors.PathNotFoundError:
176
195
  return False
196
+
197
+ def verify(self):
198
+ from .loaders import DatasetVerifier
199
+
200
+ handle = DatasetVerifier.from_dataset(path=self.path, use_threads=self.use_threads)
201
+
202
+ handle.verify()
@@ -56,7 +56,7 @@ class DatasetName:
56
56
  raise ValueError(self.error_message)
57
57
 
58
58
  def _parse(self, name):
59
- pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?(.*)$"
59
+ pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)$"
60
60
  match = re.match(pattern, name)
61
61
 
62
62
  assert match, (name, pattern)
@@ -136,18 +136,19 @@ class StatisticsValueError(ValueError):
136
136
  pass
137
137
 
138
138
 
139
- def check_data_values(arr, *, name: str, log=[], allow_nan=False):
140
- if allow_nan is False:
141
- allow_nan = lambda x: False # noqa: E731
139
+ def check_data_values(arr, *, name: str, log=[], allow_nans=False):
142
140
 
143
- if allow_nan(name):
141
+ if (isinstance(allow_nans, (set, list, tuple, dict)) and name in allow_nans) or allow_nans:
144
142
  arr = arr[~np.isnan(arr)]
145
143
 
144
+ assert arr.size > 0, (name, *log)
145
+
146
146
  min, max = arr.min(), arr.max()
147
147
  assert not (np.isnan(arr).any()), (name, min, max, *log)
148
148
 
149
149
  if min == 9999.0:
150
150
  warnings.warn(f"Min value 9999 for {name}")
151
+
151
152
  if max == 9999.0:
152
153
  warnings.warn(f"Max value 9999 for {name}")
153
154
 
@@ -57,7 +57,7 @@ class ChunkFilter:
57
57
  if not parts:
58
58
  warnings.warn(f"Nothing to do for chunk {i}/{n}.")
59
59
 
60
- LOG.info(f"Running parts: {parts}")
60
+ LOG.debug(f"Running parts: {parts}")
61
61
 
62
62
  self.allowed = parts
63
63