roms-tools 1.6.2__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ci/environment.yml +1 -1
- roms_tools/__init__.py +1 -0
- roms_tools/_version.py +1 -1
- roms_tools/setup/boundary_forcing.py +266 -256
- roms_tools/setup/datasets.py +986 -231
- roms_tools/setup/download.py +41 -15
- roms_tools/setup/grid.py +561 -512
- roms_tools/setup/initial_conditions.py +162 -106
- roms_tools/setup/mask.py +69 -0
- roms_tools/setup/plot.py +81 -23
- roms_tools/setup/regrid.py +4 -2
- roms_tools/setup/river_forcing.py +589 -0
- roms_tools/setup/surface_forcing.py +21 -130
- roms_tools/setup/tides.py +15 -79
- roms_tools/setup/topography.py +92 -128
- roms_tools/setup/utils.py +307 -25
- roms_tools/setup/vertical_coordinate.py +5 -16
- roms_tools/tests/test_setup/test_boundary_forcing.py +10 -7
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/.zmetadata +157 -130
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/abs_time/.zattrs +1 -0
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/bry_time/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/month/.zarray +20 -0
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/month/.zattrs +6 -0
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/month/0 +0 -0
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_east/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_north/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_south/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_west/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/.zmetadata +39 -12
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/abs_time/.zattrs +1 -0
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/dust/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/dust_time/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/iron/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/iron_time/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/month/.zarray +20 -0
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/month/.zattrs +6 -0
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/month/0 +0 -0
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/nhy/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/nhy_time/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/nox/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/nox_time/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/pco2_air/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/pco2_air_alt/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/pco2_time/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/grid.zarr/.zattrs +0 -1
- roms_tools/tests/test_setup/test_data/grid.zarr/.zmetadata +56 -201
- roms_tools/tests/test_setup/test_data/grid.zarr/Cs_r/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/grid.zarr/Cs_w/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/grid.zarr/{interface_depth_rho → sigma_r}/.zarray +2 -6
- roms_tools/tests/test_setup/test_data/grid.zarr/sigma_r/.zattrs +7 -0
- roms_tools/tests/test_setup/test_data/grid.zarr/sigma_r/0 +0 -0
- roms_tools/tests/test_setup/test_data/grid.zarr/{interface_depth_u → sigma_w}/.zarray +2 -6
- roms_tools/tests/test_setup/test_data/grid.zarr/sigma_w/.zattrs +7 -0
- roms_tools/tests/test_setup/test_data/grid.zarr/sigma_w/0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/.zattrs +1 -2
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/.zmetadata +58 -203
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/Cs_r/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/Cs_w/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/h/.zattrs +1 -1
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/h/0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_coarse/0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_rho/0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_u/0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_v/0.0 +0 -0
- roms_tools/tests/test_setup/test_data/{grid.zarr/interface_depth_v → grid_that_straddles_dateline.zarr/sigma_r}/.zarray +2 -6
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/sigma_r/.zattrs +7 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/sigma_r/0 +0 -0
- roms_tools/tests/test_setup/test_data/{grid.zarr/layer_depth_rho → grid_that_straddles_dateline.zarr/sigma_w}/.zarray +2 -6
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/sigma_w/.zattrs +7 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/sigma_w/0 +0 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/.zattrs +3 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/.zgroup +3 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/.zmetadata +214 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/abs_time/.zarray +20 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/abs_time/.zattrs +8 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/abs_time/0 +0 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/month/.zarray +20 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/month/.zattrs +6 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/month/0 +0 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_name/.zarray +24 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_name/.zattrs +6 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_name/0 +0 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_time/.zarray +20 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_time/.zattrs +8 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_time/0 +0 -0
- roms_tools/tests/test_setup/test_data/{grid.zarr/layer_depth_v → river_forcing.zarr/river_tracer}/.zarray +4 -4
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_tracer/.zattrs +10 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_tracer/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_volume/.zarray +22 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_volume/.zattrs +9 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_volume/0.0 +0 -0
- roms_tools/tests/test_setup/test_data/{grid.zarr/layer_depth_u → river_forcing.zarr/tracer_name}/.zarray +2 -6
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/tracer_name/.zattrs +6 -0
- roms_tools/tests/test_setup/test_data/river_forcing.zarr/tracer_name/0 +0 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/.zattrs +1 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/.zgroup +3 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/.zmetadata +185 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/abs_time/.zarray +20 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/abs_time/.zattrs +8 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/abs_time/0 +0 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_name/.zarray +24 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_name/.zattrs +6 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_name/0 +0 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_time/.zarray +20 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_time/.zattrs +7 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_time/0 +0 -0
- roms_tools/tests/test_setup/test_data/{grid_that_straddles_dateline.zarr/interface_depth_v → river_forcing_no_climatology.zarr/river_tracer}/.zarray +4 -4
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_tracer/.zattrs +10 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_tracer/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_volume/.zarray +22 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_volume/.zattrs +9 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_volume/0.0 +0 -0
- roms_tools/tests/test_setup/test_data/{grid_that_straddles_dateline.zarr/interface_depth_u → river_forcing_no_climatology.zarr/tracer_name}/.zarray +2 -6
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/tracer_name/.zattrs +6 -0
- roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/tracer_name/0 +0 -0
- roms_tools/tests/test_setup/test_grid.py +110 -12
- roms_tools/tests/test_setup/test_initial_conditions.py +2 -3
- roms_tools/tests/test_setup/test_river_forcing.py +367 -0
- roms_tools/tests/test_setup/test_surface_forcing.py +2 -24
- roms_tools/tests/test_setup/test_tides.py +2 -3
- roms_tools/tests/test_setup/test_topography.py +106 -1
- roms_tools/tests/test_setup/test_validation.py +4 -0
- roms_tools/utils.py +12 -10
- {roms_tools-1.6.2.dist-info → roms_tools-2.0.0.dist-info}/LICENSE +1 -1
- {roms_tools-1.6.2.dist-info → roms_tools-2.0.0.dist-info}/METADATA +6 -5
- {roms_tools-1.6.2.dist-info → roms_tools-2.0.0.dist-info}/RECORD +254 -225
- {roms_tools-1.6.2.dist-info → roms_tools-2.0.0.dist-info}/WHEEL +1 -1
- roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_rho/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_rho/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_u/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_u/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_v/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_v/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_rho/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_rho/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_u/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_u/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_v/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_v/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_rho/.zarray +0 -24
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_rho/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_rho/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_u/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_u/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_v/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_v/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_rho/.zarray +0 -24
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_rho/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_rho/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_u/.zarray +0 -24
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_u/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_u/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_v/.zarray +0 -24
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_v/.zattrs +0 -9
- roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_v/0.0.0 +0 -0
- roms_tools/tests/test_setup/test_vertical_coordinate.py +0 -91
- {roms_tools-1.6.2.dist-info → roms_tools-2.0.0.dist-info}/top_level.txt +0 -0
roms_tools/setup/datasets.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import time
|
|
1
2
|
import re
|
|
2
3
|
import xarray as xr
|
|
3
4
|
from dataclasses import dataclass, field
|
|
@@ -13,10 +14,17 @@ from roms_tools.setup.utils import (
|
|
|
13
14
|
get_time_type,
|
|
14
15
|
convert_cftime_to_datetime,
|
|
15
16
|
one_dim_fill,
|
|
17
|
+
gc_dist,
|
|
18
|
+
)
|
|
19
|
+
from roms_tools.setup.download import (
|
|
20
|
+
download_correction_data,
|
|
21
|
+
download_topo,
|
|
22
|
+
download_river_data,
|
|
16
23
|
)
|
|
17
|
-
from roms_tools.setup.download import download_correction_data
|
|
18
24
|
from roms_tools.setup.fill import LateralFill
|
|
19
25
|
|
|
26
|
+
# lat-lon datasets
|
|
27
|
+
|
|
20
28
|
|
|
21
29
|
@dataclass(frozen=True, kw_only=True)
|
|
22
30
|
class Dataset:
|
|
@@ -32,10 +40,10 @@ class Dataset:
|
|
|
32
40
|
end_time : Optional[datetime], optional
|
|
33
41
|
The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided,
|
|
34
42
|
or no filtering is applied if start_time is not provided.
|
|
35
|
-
var_names: Dict[str, str]
|
|
36
|
-
Dictionary of variable names that are required in the dataset.
|
|
37
43
|
dim_names: Dict[str, str], optional
|
|
38
44
|
Dictionary specifying the names of dimensions in the dataset.
|
|
45
|
+
var_names: Dict[str, str]
|
|
46
|
+
Dictionary of variable names that are required in the dataset.
|
|
39
47
|
climatology : bool
|
|
40
48
|
Indicates whether the dataset is climatological. Defaults to False.
|
|
41
49
|
use_dask: bool
|
|
@@ -62,7 +70,6 @@ class Dataset:
|
|
|
62
70
|
filename: Union[str, Path, List[Union[str, Path]]]
|
|
63
71
|
start_time: Optional[datetime] = None
|
|
64
72
|
end_time: Optional[datetime] = None
|
|
65
|
-
var_names: Dict[str, str]
|
|
66
73
|
dim_names: Dict[str, str] = field(
|
|
67
74
|
default_factory=lambda: {
|
|
68
75
|
"longitude": "longitude",
|
|
@@ -70,8 +77,9 @@ class Dataset:
|
|
|
70
77
|
"time": "time",
|
|
71
78
|
}
|
|
72
79
|
)
|
|
80
|
+
var_names: Dict[str, str]
|
|
73
81
|
climatology: Optional[bool] = False
|
|
74
|
-
use_dask: Optional[bool] =
|
|
82
|
+
use_dask: Optional[bool] = False
|
|
75
83
|
apply_post_processing: Optional[bool] = True
|
|
76
84
|
|
|
77
85
|
is_global: bool = field(init=False, repr=False)
|
|
@@ -114,6 +122,8 @@ class Dataset:
|
|
|
114
122
|
|
|
115
123
|
# Make sure that latitude is ascending
|
|
116
124
|
ds = self.ensure_dimension_is_ascending(ds, dim="latitude")
|
|
125
|
+
# Make sure there are no 360 degree jumps in longitude
|
|
126
|
+
ds = self.ensure_dimension_is_ascending(ds, dim="longitude")
|
|
117
127
|
|
|
118
128
|
if "depth" in self.dim_names:
|
|
119
129
|
# Make sure that depth is ascending
|
|
@@ -123,11 +133,6 @@ class Dataset:
|
|
|
123
133
|
|
|
124
134
|
# Check whether the data covers the entire globe
|
|
125
135
|
object.__setattr__(self, "is_global", self.check_if_global(ds))
|
|
126
|
-
|
|
127
|
-
# If dataset is global concatenate three copies of field along longitude dimension
|
|
128
|
-
if self.is_global:
|
|
129
|
-
ds = self.concatenate_longitudes(ds)
|
|
130
|
-
|
|
131
136
|
object.__setattr__(self, "ds", ds)
|
|
132
137
|
|
|
133
138
|
if self.apply_post_processing:
|
|
@@ -149,101 +154,7 @@ class Dataset:
|
|
|
149
154
|
If a list of files is provided but self.dim_names["time"] is not available or use_dask=False.
|
|
150
155
|
"""
|
|
151
156
|
|
|
152
|
-
|
|
153
|
-
wildcard_regex = re.compile(r"[\*\?\[\]]")
|
|
154
|
-
|
|
155
|
-
# Convert Path objects to strings
|
|
156
|
-
if isinstance(self.filename, (str, Path)):
|
|
157
|
-
filename_str = str(self.filename)
|
|
158
|
-
elif isinstance(self.filename, list):
|
|
159
|
-
filename_str = [str(f) for f in self.filename]
|
|
160
|
-
else:
|
|
161
|
-
raise ValueError(
|
|
162
|
-
"filename must be a string, Path, or a list of strings/Paths."
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
# Handle the case when filename is a string
|
|
166
|
-
contains_wildcard = False
|
|
167
|
-
if isinstance(filename_str, str):
|
|
168
|
-
contains_wildcard = bool(wildcard_regex.search(filename_str))
|
|
169
|
-
if contains_wildcard:
|
|
170
|
-
matching_files = glob.glob(filename_str)
|
|
171
|
-
if not matching_files:
|
|
172
|
-
raise FileNotFoundError(
|
|
173
|
-
f"No files found matching the pattern '{filename_str}'."
|
|
174
|
-
)
|
|
175
|
-
else:
|
|
176
|
-
matching_files = [filename_str]
|
|
177
|
-
|
|
178
|
-
# Handle the case when filename is a list
|
|
179
|
-
elif isinstance(filename_str, list):
|
|
180
|
-
contains_wildcard = any(wildcard_regex.search(f) for f in filename_str)
|
|
181
|
-
if contains_wildcard:
|
|
182
|
-
matching_files = []
|
|
183
|
-
for f in filename_str:
|
|
184
|
-
files = glob.glob(f)
|
|
185
|
-
if not files:
|
|
186
|
-
raise FileNotFoundError(
|
|
187
|
-
f"No files found matching the pattern '{f}'."
|
|
188
|
-
)
|
|
189
|
-
matching_files.extend(files)
|
|
190
|
-
else:
|
|
191
|
-
matching_files = filename_str
|
|
192
|
-
|
|
193
|
-
# Check if time dimension is available when multiple files are provided
|
|
194
|
-
if isinstance(filename_str, list) and "time" not in self.dim_names:
|
|
195
|
-
raise ValueError(
|
|
196
|
-
"A list of files is provided, but time dimension is not available. "
|
|
197
|
-
"A time dimension must be available to concatenate the files."
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
# Determine the kwargs for combining datasets
|
|
201
|
-
if contains_wildcard or len(matching_files) == 1:
|
|
202
|
-
# If there is a wildcard or just one file, use by_coords
|
|
203
|
-
kwargs = {"combine": "by_coords"}
|
|
204
|
-
else:
|
|
205
|
-
# Otherwise, use nested combine based on time
|
|
206
|
-
kwargs = {"combine": "nested", "concat_dim": self.dim_names["time"]}
|
|
207
|
-
|
|
208
|
-
# Base kwargs used for dataset combination
|
|
209
|
-
combine_kwargs = {
|
|
210
|
-
"coords": "minimal",
|
|
211
|
-
"compat": "override",
|
|
212
|
-
"combine_attrs": "override",
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
if self.use_dask:
|
|
216
|
-
|
|
217
|
-
chunks = {
|
|
218
|
-
self.dim_names["latitude"]: -1,
|
|
219
|
-
self.dim_names["longitude"]: -1,
|
|
220
|
-
}
|
|
221
|
-
if "depth" in self.dim_names:
|
|
222
|
-
chunks[self.dim_names["depth"]] = -1
|
|
223
|
-
if "time" in self.dim_names:
|
|
224
|
-
chunks[self.dim_names["time"]] = 1
|
|
225
|
-
|
|
226
|
-
ds = xr.open_mfdataset(
|
|
227
|
-
matching_files,
|
|
228
|
-
chunks=chunks,
|
|
229
|
-
**combine_kwargs,
|
|
230
|
-
**kwargs,
|
|
231
|
-
)
|
|
232
|
-
else:
|
|
233
|
-
ds_list = []
|
|
234
|
-
for file in matching_files:
|
|
235
|
-
ds = xr.open_dataset(file, chunks=None)
|
|
236
|
-
ds_list.append(ds)
|
|
237
|
-
|
|
238
|
-
if kwargs["combine"] == "by_coords":
|
|
239
|
-
ds = xr.combine_by_coords(ds_list, **combine_kwargs)
|
|
240
|
-
elif kwargs["combine"] == "nested":
|
|
241
|
-
ds = xr.combine_nested(
|
|
242
|
-
ds_list, concat_dim=kwargs["concat_dim"], **combine_kwargs
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
if "time" in self.dim_names and self.dim_names["time"] not in ds.dims:
|
|
246
|
-
ds = ds.expand_dims(self.dim_names["time"])
|
|
157
|
+
ds = _load_data(self.filename, self.dim_names, self.use_dask)
|
|
247
158
|
|
|
248
159
|
return ds
|
|
249
160
|
|
|
@@ -278,19 +189,8 @@ class Dataset:
|
|
|
278
189
|
ValueError
|
|
279
190
|
If the dataset does not contain the specified variables or dimensions.
|
|
280
191
|
"""
|
|
281
|
-
missing_vars = [
|
|
282
|
-
var for var in self.var_names.values() if var not in ds.data_vars
|
|
283
|
-
]
|
|
284
|
-
if missing_vars:
|
|
285
|
-
raise ValueError(
|
|
286
|
-
f"Dataset does not contain all required variables. The following variables are missing: {missing_vars}"
|
|
287
|
-
)
|
|
288
192
|
|
|
289
|
-
|
|
290
|
-
if missing_dims:
|
|
291
|
-
raise ValueError(
|
|
292
|
-
f"Dataset does not contain all required dimensions. The following dimensions are missing: {missing_vars}"
|
|
293
|
-
)
|
|
193
|
+
_check_dataset(ds, self.dim_names, self.var_names)
|
|
294
194
|
|
|
295
195
|
def select_relevant_fields(self, ds) -> xr.Dataset:
|
|
296
196
|
"""Selects and returns a subset of the dataset containing only the variables
|
|
@@ -379,86 +279,10 @@ class Dataset:
|
|
|
379
279
|
"""
|
|
380
280
|
|
|
381
281
|
time_dim = self.dim_names["time"]
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
f"The dataset contains {len(ds[time_dim])} time steps, but the climatology flag is set to True, which requires exactly 12 time steps."
|
|
387
|
-
)
|
|
388
|
-
if not self.end_time:
|
|
389
|
-
# Interpolate from climatology for initial conditions
|
|
390
|
-
ds = interpolate_from_climatology(
|
|
391
|
-
ds, self.dim_names["time"], self.start_time
|
|
392
|
-
)
|
|
393
|
-
else:
|
|
394
|
-
time_type = get_time_type(ds[time_dim])
|
|
395
|
-
if time_type == "int":
|
|
396
|
-
raise ValueError(
|
|
397
|
-
"The dataset contains integer time values, which are only supported when the climatology flag is set to True. However, your climatology flag is set to False."
|
|
398
|
-
)
|
|
399
|
-
if time_type == "cftime":
|
|
400
|
-
ds = ds.assign_coords(
|
|
401
|
-
{time_dim: convert_cftime_to_datetime(ds[time_dim])}
|
|
402
|
-
)
|
|
403
|
-
if self.end_time:
|
|
404
|
-
end_time = self.end_time
|
|
405
|
-
|
|
406
|
-
# Identify records before or at start_time
|
|
407
|
-
before_start = ds[time_dim] <= np.datetime64(self.start_time)
|
|
408
|
-
if before_start.any():
|
|
409
|
-
closest_before_start = (
|
|
410
|
-
ds[time_dim].where(before_start, drop=True).max()
|
|
411
|
-
)
|
|
412
|
-
else:
|
|
413
|
-
logging.warning("No records found at or before the start_time.")
|
|
414
|
-
closest_before_start = ds[time_dim].min()
|
|
415
|
-
|
|
416
|
-
# Identify records after or at end_time
|
|
417
|
-
after_end = ds[time_dim] >= np.datetime64(end_time)
|
|
418
|
-
if after_end.any():
|
|
419
|
-
closest_after_end = (
|
|
420
|
-
ds[time_dim].where(after_end, drop=True).min()
|
|
421
|
-
)
|
|
422
|
-
else:
|
|
423
|
-
logging.warning("No records found at or after the end_time.")
|
|
424
|
-
closest_after_end = ds[time_dim].max()
|
|
425
|
-
|
|
426
|
-
# Select records within the time range and add the closest before/after
|
|
427
|
-
within_range = (ds[time_dim] > np.datetime64(self.start_time)) & (
|
|
428
|
-
ds[time_dim] < np.datetime64(end_time)
|
|
429
|
-
)
|
|
430
|
-
selected_times = ds[time_dim].where(
|
|
431
|
-
within_range
|
|
432
|
-
| (ds[time_dim] == closest_before_start)
|
|
433
|
-
| (ds[time_dim] == closest_after_end),
|
|
434
|
-
drop=True,
|
|
435
|
-
)
|
|
436
|
-
ds = ds.sel({time_dim: selected_times})
|
|
437
|
-
else:
|
|
438
|
-
# Look in time range [self.start_time, self.start_time + 24h]
|
|
439
|
-
end_time = self.start_time + timedelta(days=1)
|
|
440
|
-
times = (np.datetime64(self.start_time) <= ds[time_dim]) & (
|
|
441
|
-
ds[time_dim] < np.datetime64(end_time)
|
|
442
|
-
)
|
|
443
|
-
if np.all(~times):
|
|
444
|
-
raise ValueError(
|
|
445
|
-
f"The dataset does not contain any time entries between the specified start_time: {self.start_time} "
|
|
446
|
-
f"and {self.start_time + timedelta(hours=24)}. "
|
|
447
|
-
"Please ensure the dataset includes time entries for that range."
|
|
448
|
-
)
|
|
449
|
-
|
|
450
|
-
ds = ds.where(times, drop=True)
|
|
451
|
-
if ds.sizes[time_dim] > 1:
|
|
452
|
-
# Pick the time closest to self.start_time
|
|
453
|
-
ds = ds.isel({time_dim: 0})
|
|
454
|
-
logging.info(
|
|
455
|
-
f"Selected time entry closest to the specified start_time ({self.start_time}) within the range [{self.start_time}, {self.start_time + timedelta(hours=24)}]: {ds[time_dim].values}"
|
|
456
|
-
)
|
|
457
|
-
else:
|
|
458
|
-
logging.warning(
|
|
459
|
-
"Dataset does not contain any time information. Please check if the time dimension "
|
|
460
|
-
"is correctly named or if the dataset includes time data."
|
|
461
|
-
)
|
|
282
|
+
|
|
283
|
+
ds = _select_relevant_times(
|
|
284
|
+
ds, time_dim, self.start_time, self.end_time, self.climatology
|
|
285
|
+
)
|
|
462
286
|
|
|
463
287
|
return ds
|
|
464
288
|
|
|
@@ -467,7 +291,11 @@ class Dataset:
|
|
|
467
291
|
) -> xr.Dataset:
|
|
468
292
|
"""Ensure that the specified dimension in the dataset is in ascending order.
|
|
469
293
|
|
|
470
|
-
|
|
294
|
+
This function checks the order of values along the specified dimension. If they
|
|
295
|
+
are in descending order, it reverses the dimension to make it ascending. For
|
|
296
|
+
the "longitude" dimension, if it has a discontinuity (e.g., [0, 180][-180, 0]),
|
|
297
|
+
the function adjusts values to eliminate the 360-degree jump, transforming
|
|
298
|
+
the range into a continuous [0, 360) span.
|
|
471
299
|
|
|
472
300
|
Parameters
|
|
473
301
|
----------
|
|
@@ -481,14 +309,23 @@ class Dataset:
|
|
|
481
309
|
-------
|
|
482
310
|
xr.Dataset
|
|
483
311
|
A new `xarray.Dataset` with the specified dimension in ascending order.
|
|
484
|
-
If the dimension was already in ascending order, the original dataset is returned unchanged.
|
|
485
|
-
If the dimension was in descending order, the dataset is returned with the dimension reversed.
|
|
312
|
+
- If the dimension was already in ascending order, the original dataset is returned unchanged.
|
|
313
|
+
- If the dimension was in descending order, the dataset is returned with the dimension reversed.
|
|
314
|
+
- If the dimension is "longitude" with a discontinuity (e.g., [0, 180][-180, 0]), the values are adjusted to eliminate the 360-degree jump.
|
|
486
315
|
"""
|
|
487
|
-
#
|
|
316
|
+
# Check if the dimension is in descending order and reverse if needed
|
|
488
317
|
diff = np.diff(ds[self.dim_names[dim]])
|
|
489
318
|
if np.all(diff < 0):
|
|
490
319
|
ds = ds.isel(**{self.dim_names[dim]: slice(None, None, -1)})
|
|
491
320
|
|
|
321
|
+
# Check for a discontinuity in longitude and adjust values if present
|
|
322
|
+
elif np.any(diff < 0) and dim == "longitude":
|
|
323
|
+
ds[self.dim_names[dim]] = xr.where(
|
|
324
|
+
ds[self.dim_names[dim]] < 0,
|
|
325
|
+
ds[self.dim_names[dim]] + 360,
|
|
326
|
+
ds[self.dim_names[dim]],
|
|
327
|
+
)
|
|
328
|
+
|
|
492
329
|
return ds
|
|
493
330
|
|
|
494
331
|
def infer_horizontal_resolution(self, ds: xr.Dataset):
|
|
@@ -542,43 +379,68 @@ class Dataset:
|
|
|
542
379
|
|
|
543
380
|
return is_global
|
|
544
381
|
|
|
545
|
-
def concatenate_longitudes(self, ds):
|
|
546
|
-
"""
|
|
547
|
-
Concatenates the field three times: with longitudes shifted by -360, original longitudes, and shifted by +360.
|
|
382
|
+
def concatenate_longitudes(self, ds, end="upper", verbose=False):
|
|
383
|
+
"""Concatenates fields in dataset twice along the longitude dimension.
|
|
548
384
|
|
|
549
385
|
Parameters
|
|
550
386
|
----------
|
|
551
|
-
|
|
552
|
-
The
|
|
387
|
+
ds: xr.Dataset
|
|
388
|
+
The dataset to be concatenated. The longitude dimension must be present in this dataset.
|
|
389
|
+
end : str, optional
|
|
390
|
+
Specifies which end to shift the longitudes.
|
|
391
|
+
Options are:
|
|
392
|
+
- "lower": shifts longitudes by -360 degrees and concatenates to the lower end.
|
|
393
|
+
- "upper": shifts longitudes by +360 degrees and concatenates to the upper end.
|
|
394
|
+
- "both": shifts longitudes by -360 degrees and 360 degrees and concatenates to both ends.
|
|
395
|
+
Default is "upper".
|
|
396
|
+
verbose : bool, optional
|
|
397
|
+
If True, print message if dataset is concatenated along longitude dimension.
|
|
398
|
+
Defaults to False.
|
|
553
399
|
|
|
554
400
|
Returns
|
|
555
401
|
-------
|
|
556
|
-
xr.
|
|
557
|
-
The concatenated
|
|
402
|
+
ds_concatenated : xr.Dataset
|
|
403
|
+
The concatenated dataset.
|
|
404
|
+
"""
|
|
558
405
|
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
Concatenating three times may be overkill in most situations, but it is safe. Alternatively, we could refactor
|
|
562
|
-
to figure out whether concatenating on the lower end, upper end, or at all is needed.
|
|
406
|
+
if verbose:
|
|
407
|
+
start_time = time.time()
|
|
563
408
|
|
|
564
|
-
"""
|
|
565
409
|
ds_concatenated = xr.Dataset()
|
|
566
410
|
|
|
567
411
|
lon = ds[self.dim_names["longitude"]]
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
412
|
+
if end == "lower":
|
|
413
|
+
lon_minus360 = lon - 360
|
|
414
|
+
lon_concatenated = xr.concat(
|
|
415
|
+
[lon_minus360, lon], dim=self.dim_names["longitude"]
|
|
416
|
+
)
|
|
573
417
|
|
|
574
|
-
|
|
418
|
+
elif end == "upper":
|
|
419
|
+
lon_plus360 = lon + 360
|
|
420
|
+
lon_concatenated = xr.concat(
|
|
421
|
+
[lon, lon_plus360], dim=self.dim_names["longitude"]
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
elif end == "both":
|
|
425
|
+
lon_minus360 = lon - 360
|
|
426
|
+
lon_plus360 = lon + 360
|
|
427
|
+
lon_concatenated = xr.concat(
|
|
428
|
+
[lon_minus360, lon, lon_plus360], dim=self.dim_names["longitude"]
|
|
429
|
+
)
|
|
575
430
|
|
|
576
|
-
for var in
|
|
431
|
+
for var in ds.data_vars:
|
|
577
432
|
if self.dim_names["longitude"] in ds[var].dims:
|
|
578
433
|
field = ds[var]
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
434
|
+
|
|
435
|
+
if end == "both":
|
|
436
|
+
field_concatenated = xr.concat(
|
|
437
|
+
[field, field, field], dim=self.dim_names["longitude"]
|
|
438
|
+
)
|
|
439
|
+
else:
|
|
440
|
+
field_concatenated = xr.concat(
|
|
441
|
+
[field, field], dim=self.dim_names["longitude"]
|
|
442
|
+
)
|
|
443
|
+
|
|
582
444
|
if self.use_dask:
|
|
583
445
|
field_concatenated = field_concatenated.chunk(
|
|
584
446
|
{self.dim_names["longitude"]: -1}
|
|
@@ -588,6 +450,13 @@ class Dataset:
|
|
|
588
450
|
else:
|
|
589
451
|
ds_concatenated[var] = ds[var]
|
|
590
452
|
|
|
453
|
+
ds_concatenated[self.dim_names["longitude"]] = lon_concatenated
|
|
454
|
+
|
|
455
|
+
if verbose:
|
|
456
|
+
logging.info(
|
|
457
|
+
f"Concatenating the data along the longitude dimension: {time.time() - start_time:.3f} seconds"
|
|
458
|
+
)
|
|
459
|
+
|
|
591
460
|
return ds_concatenated
|
|
592
461
|
|
|
593
462
|
def post_process(self):
|
|
@@ -601,7 +470,9 @@ class Dataset:
|
|
|
601
470
|
"""
|
|
602
471
|
pass
|
|
603
472
|
|
|
604
|
-
def choose_subdomain(
|
|
473
|
+
def choose_subdomain(
|
|
474
|
+
self, target_coords, buffer_points=20, return_copy=False, verbose=False
|
|
475
|
+
):
|
|
605
476
|
"""Selects a subdomain from the xarray Dataset based on specified target
|
|
606
477
|
coordinates, extending the selection by a defined buffer. Adjusts longitude
|
|
607
478
|
ranges as necessary to accommodate the dataset's expected range and handles
|
|
@@ -618,6 +489,9 @@ class Dataset:
|
|
|
618
489
|
return_subdomain : bool, optional
|
|
619
490
|
If True, returns the subset of the original dataset representing the chosen
|
|
620
491
|
subdomain. If False, assigns the subset to `self.ds`. Defaults to False.
|
|
492
|
+
verbose : bool, optional
|
|
493
|
+
If True, print message if dataset is concatenated along longitude dimension.
|
|
494
|
+
Defaults to False.
|
|
621
495
|
|
|
622
496
|
Returns
|
|
623
497
|
-------
|
|
@@ -640,9 +514,43 @@ class Dataset:
|
|
|
640
514
|
|
|
641
515
|
margin = self.resolution * buffer_points
|
|
642
516
|
|
|
643
|
-
if
|
|
517
|
+
# Select the subdomain in latitude direction (so that we have to concatenate fewer latitudes below if concatenation is necessary)
|
|
518
|
+
subdomain = self.ds.sel(
|
|
519
|
+
**{
|
|
520
|
+
self.dim_names["latitude"]: slice(lat_min - margin, lat_max + margin),
|
|
521
|
+
}
|
|
522
|
+
)
|
|
523
|
+
lon = subdomain[self.dim_names["longitude"]]
|
|
524
|
+
|
|
525
|
+
if self.is_global:
|
|
526
|
+
# Concatenate only if necessary
|
|
527
|
+
if lon_max + margin > lon.max():
|
|
528
|
+
# See if shifting by +360 degrees helps
|
|
529
|
+
if (lon_min - margin > (lon + 360).min()) and (
|
|
530
|
+
lon_max + margin < (lon + 360).max()
|
|
531
|
+
):
|
|
532
|
+
subdomain[self.dim_names["longitude"]] = lon + 360
|
|
533
|
+
lon = subdomain[self.dim_names["longitude"]]
|
|
534
|
+
else:
|
|
535
|
+
subdomain = self.concatenate_longitudes(
|
|
536
|
+
subdomain, end="upper", verbose=verbose
|
|
537
|
+
)
|
|
538
|
+
lon = subdomain[self.dim_names["longitude"]]
|
|
539
|
+
if lon_min - margin < lon.min():
|
|
540
|
+
# See if shifting by -360 degrees helps
|
|
541
|
+
if (lon_min - margin > (lon - 360).min()) and (
|
|
542
|
+
lon_max + margin < (lon - 360).max()
|
|
543
|
+
):
|
|
544
|
+
subdomain[self.dim_names["longitude"]] = lon - 360
|
|
545
|
+
lon = subdomain[self.dim_names["longitude"]]
|
|
546
|
+
else:
|
|
547
|
+
subdomain = self.concatenate_longitudes(
|
|
548
|
+
subdomain, end="lower", verbose=verbose
|
|
549
|
+
)
|
|
550
|
+
lon = subdomain[self.dim_names["longitude"]]
|
|
551
|
+
|
|
552
|
+
else:
|
|
644
553
|
# Adjust longitude range if needed to match the expected range
|
|
645
|
-
lon = self.ds[self.dim_names["longitude"]]
|
|
646
554
|
if not target_coords["straddle"]:
|
|
647
555
|
if lon.min() < -180:
|
|
648
556
|
if lon_max + margin > 0:
|
|
@@ -662,12 +570,9 @@ class Dataset:
|
|
|
662
570
|
if lon_min - margin < 0:
|
|
663
571
|
lon_min += 360
|
|
664
572
|
lon_max += 360
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
subdomain = self.ds.sel(
|
|
573
|
+
# Select the subdomain in longitude direction
|
|
574
|
+
subdomain = subdomain.sel(
|
|
669
575
|
**{
|
|
670
|
-
self.dim_names["latitude"]: slice(lat_min - margin, lat_max + margin),
|
|
671
576
|
self.dim_names["longitude"]: slice(lon_min - margin, lon_max + margin),
|
|
672
577
|
}
|
|
673
578
|
)
|
|
@@ -1522,3 +1427,853 @@ class ERA5Correction(Dataset):
|
|
|
1522
1427
|
"The correction dataset does not contain all specified longitude values."
|
|
1523
1428
|
)
|
|
1524
1429
|
object.__setattr__(self, "ds", subdomain)
|
|
1430
|
+
|
|
1431
|
+
|
|
1432
|
+
@dataclass(frozen=True, kw_only=True)
|
|
1433
|
+
class ETOPO5Dataset(Dataset):
|
|
1434
|
+
"""Represents topography data on the original grid from the ETOPO5 dataset.
|
|
1435
|
+
|
|
1436
|
+
Parameters
|
|
1437
|
+
----------
|
|
1438
|
+
filename : str, optional
|
|
1439
|
+
The path to the ETOPO5 dataset file. If not provided, the dataset will be downloaded
|
|
1440
|
+
automatically via the `pooch` library.
|
|
1441
|
+
var_names : Dict[str, str], optional
|
|
1442
|
+
Dictionary of variable names required in the dataset. Defaults to:
|
|
1443
|
+
{
|
|
1444
|
+
"topo": "topo",
|
|
1445
|
+
}
|
|
1446
|
+
dim_names : Dict[str, str], optional
|
|
1447
|
+
Dictionary specifying the names of dimensions in the dataset. Defaults to:
|
|
1448
|
+
{"longitude": "lon", "latitude": "lat"}.
|
|
1449
|
+
|
|
1450
|
+
Attributes
|
|
1451
|
+
----------
|
|
1452
|
+
ds : xr.Dataset
|
|
1453
|
+
The xarray Dataset containing the ETOPO5 data, loaded from the specified file.
|
|
1454
|
+
"""
|
|
1455
|
+
|
|
1456
|
+
filename: str = field(default_factory=lambda: download_topo("etopo5.nc"))
|
|
1457
|
+
var_names: Dict[str, str] = field(
|
|
1458
|
+
default_factory=lambda: {
|
|
1459
|
+
"topo": "topo",
|
|
1460
|
+
}
|
|
1461
|
+
)
|
|
1462
|
+
dim_names: Dict[str, str] = field(
|
|
1463
|
+
default_factory=lambda: {"longitude": "lon", "latitude": "lat"}
|
|
1464
|
+
)
|
|
1465
|
+
ds: xr.Dataset = field(init=False, repr=False)
|
|
1466
|
+
|
|
1467
|
+
def clean_up(self, ds: xr.Dataset) -> xr.Dataset:
|
|
1468
|
+
"""Assign lat and lon as coordinates.
|
|
1469
|
+
|
|
1470
|
+
Parameters
|
|
1471
|
+
----------
|
|
1472
|
+
ds : xr.Dataset
|
|
1473
|
+
The input dataset.
|
|
1474
|
+
|
|
1475
|
+
Returns
|
|
1476
|
+
-------
|
|
1477
|
+
ds : xr.Dataset
|
|
1478
|
+
A cleaned `xarray.Dataset` with updated coordinates.
|
|
1479
|
+
"""
|
|
1480
|
+
ds = ds.assign_coords(
|
|
1481
|
+
{
|
|
1482
|
+
"lon": ds["topo_lon"],
|
|
1483
|
+
"lat": ds["topo_lat"],
|
|
1484
|
+
}
|
|
1485
|
+
)
|
|
1486
|
+
return ds
|
|
1487
|
+
|
|
1488
|
+
|
|
1489
|
+
@dataclass(frozen=True, kw_only=True)
|
|
1490
|
+
class SRTM15Dataset(Dataset):
|
|
1491
|
+
"""Represents topography data on the original grid from the SRTM15 dataset.
|
|
1492
|
+
|
|
1493
|
+
Parameters
|
|
1494
|
+
----------
|
|
1495
|
+
filename : str
|
|
1496
|
+
The path to the SRTM15 dataset file.
|
|
1497
|
+
var_names : Dict[str, str], optional
|
|
1498
|
+
Dictionary of variable names required in the dataset. Defaults to:
|
|
1499
|
+
{
|
|
1500
|
+
"topo": "z",
|
|
1501
|
+
}
|
|
1502
|
+
dim_names : Dict[str, str], optional
|
|
1503
|
+
Dictionary specifying the names of dimensions in the dataset. Defaults to:
|
|
1504
|
+
{"longitude": "lon", "latitude": "lat"}.
|
|
1505
|
+
|
|
1506
|
+
Attributes
|
|
1507
|
+
----------
|
|
1508
|
+
ds : xr.Dataset
|
|
1509
|
+
The xarray Dataset containing the SRTM15 data, loaded from the specified file.
|
|
1510
|
+
"""
|
|
1511
|
+
|
|
1512
|
+
filename: str
|
|
1513
|
+
var_names: Dict[str, str] = field(
|
|
1514
|
+
default_factory=lambda: {
|
|
1515
|
+
"topo": "z",
|
|
1516
|
+
}
|
|
1517
|
+
)
|
|
1518
|
+
dim_names: Dict[str, str] = field(
|
|
1519
|
+
default_factory=lambda: {"longitude": "lon", "latitude": "lat"}
|
|
1520
|
+
)
|
|
1521
|
+
ds: xr.Dataset = field(init=False, repr=False)
|
|
1522
|
+
|
|
1523
|
+
|
|
1524
|
+
# river datasets
|
|
1525
|
+
@dataclass(frozen=True, kw_only=True)
|
|
1526
|
+
class RiverDataset:
|
|
1527
|
+
"""Represents river data.
|
|
1528
|
+
|
|
1529
|
+
Parameters
|
|
1530
|
+
----------
|
|
1531
|
+
filename : Union[str, Path, List[Union[str, Path]]]
|
|
1532
|
+
The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
|
|
1533
|
+
or a list of strings or Path objects containing multiple files.
|
|
1534
|
+
start_time : datetime
|
|
1535
|
+
The start time for selecting relevant data.
|
|
1536
|
+
end_time : datetime
|
|
1537
|
+
The end time for selecting relevant data.
|
|
1538
|
+
dim_names: Dict[str, str]
|
|
1539
|
+
Dictionary specifying the names of dimensions in the dataset.
|
|
1540
|
+
Requires "station" and "time" as keys.
|
|
1541
|
+
var_names: Dict[str, str]
|
|
1542
|
+
Dictionary of variable names that are required in the dataset.
|
|
1543
|
+
Requires the keys "latitude", "longitude", "flux", "ratio", and "name".
|
|
1544
|
+
opt_var_names: Dict[str, str], optional
|
|
1545
|
+
Dictionary of variable names that are optional in the dataset.
|
|
1546
|
+
Defaults to an empty dictionary.
|
|
1547
|
+
climatology : bool
|
|
1548
|
+
Indicates whether the dataset is climatological. Defaults to False.
|
|
1549
|
+
|
|
1550
|
+
Attributes
|
|
1551
|
+
----------
|
|
1552
|
+
ds : xr.Dataset
|
|
1553
|
+
The xarray Dataset containing the forcing data on its original grid.
|
|
1554
|
+
"""
|
|
1555
|
+
|
|
1556
|
+
filename: Union[str, Path, List[Union[str, Path]]]
|
|
1557
|
+
start_time: datetime
|
|
1558
|
+
end_time: datetime
|
|
1559
|
+
dim_names: Dict[str, str]
|
|
1560
|
+
var_names: Dict[str, str]
|
|
1561
|
+
opt_var_names: Optional[Dict[str, str]] = field(default_factory=dict)
|
|
1562
|
+
climatology: Optional[bool] = False
|
|
1563
|
+
ds: xr.Dataset = field(init=False, repr=False)
|
|
1564
|
+
|
|
1565
|
+
def __post_init__(self):
|
|
1566
|
+
|
|
1567
|
+
# Validate start_time and end_time
|
|
1568
|
+
if not isinstance(self.start_time, datetime):
|
|
1569
|
+
raise TypeError(
|
|
1570
|
+
f"start_time must be a datetime object, but got {type(self.start_time).__name__}."
|
|
1571
|
+
)
|
|
1572
|
+
if not isinstance(self.end_time, datetime):
|
|
1573
|
+
raise TypeError(
|
|
1574
|
+
f"end_time must be a datetime object, but got {type(self.end_time).__name__}."
|
|
1575
|
+
)
|
|
1576
|
+
|
|
1577
|
+
ds = self.load_data()
|
|
1578
|
+
ds = self.clean_up(ds)
|
|
1579
|
+
self.check_dataset(ds)
|
|
1580
|
+
|
|
1581
|
+
# Select relevant times
|
|
1582
|
+
ds = self.add_time_info(ds)
|
|
1583
|
+
object.__setattr__(self, "ds", ds)
|
|
1584
|
+
|
|
1585
|
+
def load_data(self) -> xr.Dataset:
|
|
1586
|
+
"""Load dataset from the specified file.
|
|
1587
|
+
|
|
1588
|
+
Returns
|
|
1589
|
+
-------
|
|
1590
|
+
ds : xr.Dataset
|
|
1591
|
+
The loaded xarray Dataset containing the forcing data.
|
|
1592
|
+
"""
|
|
1593
|
+
ds = _load_data(
|
|
1594
|
+
self.filename, self.dim_names, use_dask=False, decode_times=False
|
|
1595
|
+
)
|
|
1596
|
+
|
|
1597
|
+
return ds
|
|
1598
|
+
|
|
1599
|
+
def clean_up(self, ds: xr.Dataset) -> xr.Dataset:
|
|
1600
|
+
"""Decodes the 'name' variable (if byte-encoded) and updates the dataset.
|
|
1601
|
+
|
|
1602
|
+
This method checks if the 'name' variable is of dtype 'object' (i.e., byte-encoded),
|
|
1603
|
+
and if so, decodes each byte array to a string and updates the dataset.
|
|
1604
|
+
It also ensures that the 'station' dimension is of integer type.
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
Parameters
|
|
1608
|
+
----------
|
|
1609
|
+
ds : xr.Dataset
|
|
1610
|
+
The dataset containing the 'name' variable to decode.
|
|
1611
|
+
|
|
1612
|
+
Returns
|
|
1613
|
+
-------
|
|
1614
|
+
ds : xr.Dataset
|
|
1615
|
+
The dataset with the decoded 'name' variable.
|
|
1616
|
+
"""
|
|
1617
|
+
|
|
1618
|
+
if ds[self.var_names["name"]].dtype == "object":
|
|
1619
|
+
names = []
|
|
1620
|
+
for i in range(len(ds[self.dim_names["station"]])):
|
|
1621
|
+
byte_array = ds[self.var_names["name"]].isel(
|
|
1622
|
+
**{self.dim_names["station"]: i}
|
|
1623
|
+
)
|
|
1624
|
+
name = decode_string(byte_array)
|
|
1625
|
+
names.append(name)
|
|
1626
|
+
ds[self.var_names["name"]] = xr.DataArray(
|
|
1627
|
+
data=names, dims=self.dim_names["station"]
|
|
1628
|
+
)
|
|
1629
|
+
|
|
1630
|
+
if ds[self.dim_names["station"]].dtype == "float64":
|
|
1631
|
+
ds[self.dim_names["station"]] = ds[self.dim_names["station"]].astype(int)
|
|
1632
|
+
|
|
1633
|
+
# Drop all variables that have chars dim
|
|
1634
|
+
vars_to_drop = ["ocn_name", "stn_name", "ct_name", "cn_name", "chars"]
|
|
1635
|
+
existing_vars = [var for var in vars_to_drop if var in ds]
|
|
1636
|
+
ds = ds.drop_vars(existing_vars)
|
|
1637
|
+
|
|
1638
|
+
return ds
|
|
1639
|
+
|
|
1640
|
+
def check_dataset(self, ds: xr.Dataset) -> None:
|
|
1641
|
+
"""Check if the dataset contains the specified variables and dimensions.
|
|
1642
|
+
|
|
1643
|
+
Parameters
|
|
1644
|
+
----------
|
|
1645
|
+
ds : xr.Dataset
|
|
1646
|
+
The xarray Dataset to check.
|
|
1647
|
+
|
|
1648
|
+
Raises
|
|
1649
|
+
------
|
|
1650
|
+
ValueError
|
|
1651
|
+
If the dataset does not contain the specified variables or dimensions.
|
|
1652
|
+
"""
|
|
1653
|
+
|
|
1654
|
+
_check_dataset(ds, self.dim_names, self.var_names, self.opt_var_names)
|
|
1655
|
+
|
|
1656
|
+
def add_time_info(self, ds: xr.Dataset) -> xr.Dataset:
|
|
1657
|
+
"""Dummy method to be overridden by child classes to add time information to the
|
|
1658
|
+
dataset.
|
|
1659
|
+
|
|
1660
|
+
This method is intended as a placeholder and should be implemented in subclasses
|
|
1661
|
+
to provide specific functionality for adding time-related information to the dataset.
|
|
1662
|
+
|
|
1663
|
+
Parameters
|
|
1664
|
+
----------
|
|
1665
|
+
ds : xr.Dataset
|
|
1666
|
+
The xarray Dataset to which time information will be added.
|
|
1667
|
+
|
|
1668
|
+
Returns
|
|
1669
|
+
-------
|
|
1670
|
+
xr.Dataset
|
|
1671
|
+
The xarray Dataset with time information added (as implemented by child classes).
|
|
1672
|
+
"""
|
|
1673
|
+
return ds
|
|
1674
|
+
|
|
1675
|
+
def select_relevant_times(self, ds) -> xr.Dataset:
|
|
1676
|
+
"""Select a subset of the dataset based on the specified time range.
|
|
1677
|
+
|
|
1678
|
+
This method filters the dataset to include all records between `start_time` and `end_time`.
|
|
1679
|
+
Additionally, it ensures that one record at or before `start_time` and one record at or
|
|
1680
|
+
after `end_time` are included, even if they fall outside the strict time range.
|
|
1681
|
+
|
|
1682
|
+
If no `end_time` is specified, the method will select the time range of
|
|
1683
|
+
[start_time, start_time + 24 hours] and return the closest time entry to `start_time` within that range.
|
|
1684
|
+
|
|
1685
|
+
Parameters
|
|
1686
|
+
----------
|
|
1687
|
+
ds : xr.Dataset
|
|
1688
|
+
The input dataset to be filtered. Must contain a time dimension.
|
|
1689
|
+
|
|
1690
|
+
Returns
|
|
1691
|
+
-------
|
|
1692
|
+
xr.Dataset
|
|
1693
|
+
A dataset filtered to the specified time range, including the closest entries
|
|
1694
|
+
at or before `start_time` and at or after `end_time` if applicable.
|
|
1695
|
+
|
|
1696
|
+
Warns
|
|
1697
|
+
-----
|
|
1698
|
+
UserWarning
|
|
1699
|
+
If no records at or before `start_time` or no records at or after `end_time` are found.
|
|
1700
|
+
|
|
1701
|
+
UserWarning
|
|
1702
|
+
If the dataset does not contain any time dimension or the time dimension is incorrectly named.
|
|
1703
|
+
"""
|
|
1704
|
+
|
|
1705
|
+
time_dim = self.dim_names["time"]
|
|
1706
|
+
|
|
1707
|
+
ds = _select_relevant_times(ds, time_dim, self.start_time, self.end_time, False)
|
|
1708
|
+
|
|
1709
|
+
return ds
|
|
1710
|
+
|
|
1711
|
+
def compute_climatology(self):
|
|
1712
|
+
logging.info("Compute climatology for river forcing.")
|
|
1713
|
+
|
|
1714
|
+
time_dim = self.dim_names["time"]
|
|
1715
|
+
|
|
1716
|
+
flux = self.ds[self.var_names["flux"]].groupby(f"{time_dim}.month").mean()
|
|
1717
|
+
self.ds[self.var_names["flux"]] = flux
|
|
1718
|
+
|
|
1719
|
+
ds = assign_dates_to_climatology(self.ds, "month")
|
|
1720
|
+
ds = ds.swap_dims({"month": "time"})
|
|
1721
|
+
object.__setattr__(self, "ds", ds)
|
|
1722
|
+
|
|
1723
|
+
updated_dim_names = {**self.dim_names}
|
|
1724
|
+
updated_dim_names["time"] = "time"
|
|
1725
|
+
object.__setattr__(self, "dim_names", updated_dim_names)
|
|
1726
|
+
|
|
1727
|
+
object.__setattr__(self, "climatology", True)
|
|
1728
|
+
|
|
1729
|
+
def sort_by_river_volume(self, ds: xr.Dataset) -> xr.Dataset:
|
|
1730
|
+
"""Sorts the dataset by river volume in descending order (largest rivers first),
|
|
1731
|
+
if the volume variable is available.
|
|
1732
|
+
|
|
1733
|
+
This method uses the river volume to reorder the dataset such that the rivers with
|
|
1734
|
+
the largest volumes come first in the `station` dimension. If the volume variable
|
|
1735
|
+
is not present in the dataset, a warning is logged.
|
|
1736
|
+
|
|
1737
|
+
Parameters
|
|
1738
|
+
----------
|
|
1739
|
+
ds : xr.Dataset
|
|
1740
|
+
The xarray Dataset containing the river data to be sorted by volume.
|
|
1741
|
+
|
|
1742
|
+
Returns
|
|
1743
|
+
-------
|
|
1744
|
+
xr.Dataset
|
|
1745
|
+
The dataset with rivers sorted by their volume in descending order.
|
|
1746
|
+
If the volume variable is not available, the original dataset is returned.
|
|
1747
|
+
"""
|
|
1748
|
+
|
|
1749
|
+
if "vol" in self.opt_var_names:
|
|
1750
|
+
volume_values = ds[self.opt_var_names["vol"]].values
|
|
1751
|
+
if isinstance(volume_values, np.ndarray):
|
|
1752
|
+
# Check if all volume values are the same
|
|
1753
|
+
if np.all(volume_values == volume_values[0]):
|
|
1754
|
+
# If all volumes are the same, no need to reverse order
|
|
1755
|
+
sorted_indices = np.argsort(
|
|
1756
|
+
volume_values
|
|
1757
|
+
) # Sort in ascending order
|
|
1758
|
+
else:
|
|
1759
|
+
# If volumes differ, reverse order for descending sort
|
|
1760
|
+
sorted_indices = np.argsort(volume_values)[
|
|
1761
|
+
::-1
|
|
1762
|
+
] # Reverse for descending order
|
|
1763
|
+
|
|
1764
|
+
ds = ds.isel(**{self.dim_names["station"]: sorted_indices})
|
|
1765
|
+
|
|
1766
|
+
else:
|
|
1767
|
+
logging.warning("The volume data is not in a valid array format.")
|
|
1768
|
+
else:
|
|
1769
|
+
logging.warning(
|
|
1770
|
+
"Cannot sort rivers by volume. 'vol' is missing in the variable names."
|
|
1771
|
+
)
|
|
1772
|
+
|
|
1773
|
+
return ds
|
|
1774
|
+
|
|
1775
|
+
def extract_relevant_rivers(self, target_coords, dx):
|
|
1776
|
+
"""Extracts a subset of the dataset based on the proximity of river mouths to
|
|
1777
|
+
target coordinates.
|
|
1778
|
+
|
|
1779
|
+
This method calculates the distance between each river mouth and the provided target coordinates
|
|
1780
|
+
(latitude and longitude) using the `gc_dist` function. It then filters the dataset to include only those
|
|
1781
|
+
river stations whose minimum distance from the target is less than a specified threshold distance (`dx`).
|
|
1782
|
+
|
|
1783
|
+
Parameters
|
|
1784
|
+
----------
|
|
1785
|
+
target_coords : dict
|
|
1786
|
+
A dictionary containing the target coordinates for the comparison. It should include:
|
|
1787
|
+
- "lon" (float): The target longitude in degrees.
|
|
1788
|
+
- "lat" (float): The target latitude in degrees.
|
|
1789
|
+
- "straddle" (bool): A flag indicating whether to adjust the longitudes for stations that cross the
|
|
1790
|
+
International Date Line. If `True`, longitudes greater than 180 degrees are adjusted by subtracting 360,
|
|
1791
|
+
otherwise, negative longitudes are adjusted by adding 360.
|
|
1792
|
+
|
|
1793
|
+
dx : float
|
|
1794
|
+
The maximum distance threshold (in meters) for including a river station. Only river mouths that are
|
|
1795
|
+
within `dx` meters from the target coordinates will be included in the returned dataset.
|
|
1796
|
+
|
|
1797
|
+
Returns
|
|
1798
|
+
-------
|
|
1799
|
+
indices : dict
|
|
1800
|
+
A dictionary containing the indices of the rivers that are within the threshold distance from
|
|
1801
|
+
the target coordinates. The dictionary keys are:
|
|
1802
|
+
- "station" : numpy.ndarray
|
|
1803
|
+
The indices of the rivers that satisfy the distance threshold.
|
|
1804
|
+
- "eta_rho" : numpy.ndarray
|
|
1805
|
+
The indices of the `eta_rho` dimension corresponding to the selected stations.
|
|
1806
|
+
- "xi_rho" : numpy.ndarray
|
|
1807
|
+
The indices of the `xi_rho` dimension corresponding to the selected stations.
|
|
1808
|
+
"""
|
|
1809
|
+
|
|
1810
|
+
# Retrieve longitude and latitude of river mouths
|
|
1811
|
+
river_lon = self.ds[self.var_names["longitude"]]
|
|
1812
|
+
river_lat = self.ds[self.var_names["latitude"]]
|
|
1813
|
+
|
|
1814
|
+
# Adjust longitude based on whether it crosses the International Date Line (straddle case)
|
|
1815
|
+
if target_coords["straddle"]:
|
|
1816
|
+
river_lon = xr.where(river_lon > 180, river_lon - 360, river_lon)
|
|
1817
|
+
else:
|
|
1818
|
+
river_lon = xr.where(river_lon < 0, river_lon + 360, river_lon)
|
|
1819
|
+
|
|
1820
|
+
# Calculate the distance between the target coordinates and each river mouth
|
|
1821
|
+
dist = gc_dist(target_coords["lon"], target_coords["lat"], river_lon, river_lat)
|
|
1822
|
+
dist_min = dist.min(dim=["eta_rho", "xi_rho"])
|
|
1823
|
+
# Filter the dataset to include only stations within the distance threshold
|
|
1824
|
+
if (dist_min < dx).any():
|
|
1825
|
+
ds = self.ds.where(dist_min < dx, drop=True)
|
|
1826
|
+
ds = self.sort_by_river_volume(ds)
|
|
1827
|
+
dist = dist.where(dist_min < dx, drop=True).transpose(
|
|
1828
|
+
self.dim_names["station"], "eta_rho", "xi_rho"
|
|
1829
|
+
)
|
|
1830
|
+
dist_min = dist_min.where(dist_min < dx, drop=True)
|
|
1831
|
+
|
|
1832
|
+
# Find the indices of the closest grid cell to the river mouth
|
|
1833
|
+
indices = np.where(dist == dist_min)
|
|
1834
|
+
names = (
|
|
1835
|
+
self.ds[self.var_names["name"]]
|
|
1836
|
+
.isel({self.dim_names["station"]: indices[0]})
|
|
1837
|
+
.values
|
|
1838
|
+
)
|
|
1839
|
+
# Return the indices in a dictionary format
|
|
1840
|
+
indices = {
|
|
1841
|
+
"station": indices[0],
|
|
1842
|
+
"eta_rho": indices[1],
|
|
1843
|
+
"xi_rho": indices[2],
|
|
1844
|
+
"name": names,
|
|
1845
|
+
}
|
|
1846
|
+
else:
|
|
1847
|
+
ds = xr.Dataset()
|
|
1848
|
+
indices = {
|
|
1849
|
+
"station": [],
|
|
1850
|
+
"eta_rho": [],
|
|
1851
|
+
"xi_rho": [],
|
|
1852
|
+
"name": [],
|
|
1853
|
+
}
|
|
1854
|
+
|
|
1855
|
+
object.__setattr__(self, "ds", ds)
|
|
1856
|
+
|
|
1857
|
+
return indices
|
|
1858
|
+
|
|
1859
|
+
|
|
1860
|
+
@dataclass(frozen=True, kw_only=True)
|
|
1861
|
+
class DaiRiverDataset(RiverDataset):
|
|
1862
|
+
"""Represents river data from the Dai river dataset.
|
|
1863
|
+
|
|
1864
|
+
Parameters
|
|
1865
|
+
----------
|
|
1866
|
+
filename : Union[str, Path, List[Union[str, Path]]], optional
|
|
1867
|
+
The path to the Dai River dataset file. If not provided, the dataset will be downloaded
|
|
1868
|
+
automatically via the `pooch` library.
|
|
1869
|
+
start_time : datetime
|
|
1870
|
+
The start time for selecting relevant data.
|
|
1871
|
+
end_time : datetime
|
|
1872
|
+
The end time for selecting relevant data.
|
|
1873
|
+
dim_names: Dict[str, str], optional
|
|
1874
|
+
Dictionary specifying the names of dimensions in the dataset.
|
|
1875
|
+
var_names: Dict[str, str], optional
|
|
1876
|
+
Dictionary of variable names that are required in the dataset.
|
|
1877
|
+
opt_var_names: Dict[str, str], optional
|
|
1878
|
+
Dictionary of variable names that are optional in the dataset.
|
|
1879
|
+
climatology : bool
|
|
1880
|
+
Indicates whether the dataset is climatological. Defaults to False.
|
|
1881
|
+
|
|
1882
|
+
Attributes
|
|
1883
|
+
----------
|
|
1884
|
+
ds : xr.Dataset
|
|
1885
|
+
The xarray Dataset containing the forcing data on its original grid.
|
|
1886
|
+
"""
|
|
1887
|
+
|
|
1888
|
+
filename: Union[str, Path, List[Union[str, Path]]] = field(
|
|
1889
|
+
default_factory=lambda: download_river_data("dai_trenberth_may2019.nc")
|
|
1890
|
+
)
|
|
1891
|
+
start_time: datetime
|
|
1892
|
+
end_time: datetime
|
|
1893
|
+
dim_names: Dict[str, str] = field(
|
|
1894
|
+
default_factory=lambda: {
|
|
1895
|
+
"station": "station",
|
|
1896
|
+
"time": "time",
|
|
1897
|
+
}
|
|
1898
|
+
)
|
|
1899
|
+
var_names: Dict[str, str] = field(
|
|
1900
|
+
default_factory=lambda: {
|
|
1901
|
+
"latitude": "lat_mou",
|
|
1902
|
+
"longitude": "lon_mou",
|
|
1903
|
+
"flux": "FLOW",
|
|
1904
|
+
"ratio": "ratio_m2s",
|
|
1905
|
+
"name": "riv_name",
|
|
1906
|
+
}
|
|
1907
|
+
)
|
|
1908
|
+
opt_var_names: Dict[str, str] = field(
|
|
1909
|
+
default_factory=lambda: {
|
|
1910
|
+
"vol": "vol_stn",
|
|
1911
|
+
}
|
|
1912
|
+
)
|
|
1913
|
+
climatology: Optional[bool] = False
|
|
1914
|
+
ds: xr.Dataset = field(init=False, repr=False)
|
|
1915
|
+
|
|
1916
|
+
def add_time_info(self, ds: xr.Dataset) -> xr.Dataset:
|
|
1917
|
+
"""Adds time information to the dataset based on the climatology flag and
|
|
1918
|
+
dimension names.
|
|
1919
|
+
|
|
1920
|
+
This method processes the dataset to include time information according to the climatology
|
|
1921
|
+
setting. If the dataset represents climatology data and the time dimension is labeled as
|
|
1922
|
+
"month", it assigns dates to the dataset based on a monthly climatology. Additionally, it
|
|
1923
|
+
handles dimension name updates if necessary.
|
|
1924
|
+
|
|
1925
|
+
Parameters
|
|
1926
|
+
----------
|
|
1927
|
+
ds : xr.Dataset
|
|
1928
|
+
The input dataset to which time information will be added.
|
|
1929
|
+
|
|
1930
|
+
Returns
|
|
1931
|
+
-------
|
|
1932
|
+
xr.Dataset
|
|
1933
|
+
The dataset with time information added, including adjustments for climatology and
|
|
1934
|
+
dimension names.
|
|
1935
|
+
"""
|
|
1936
|
+
time_dim = self.dim_names["time"]
|
|
1937
|
+
|
|
1938
|
+
# Extract the 'time' variable as a numpy array
|
|
1939
|
+
time_vals = ds[time_dim].values
|
|
1940
|
+
|
|
1941
|
+
# Handle rounding of the time values
|
|
1942
|
+
year = np.round(time_vals * 1e-2).astype(int)
|
|
1943
|
+
month = np.round((time_vals * 1e-2 - year) * 1e2).astype(int)
|
|
1944
|
+
|
|
1945
|
+
# Convert to datetime (assuming the day is always 15th for this example)
|
|
1946
|
+
dates = [datetime(year=i, month=m, day=15) for i, m in zip(year, month)]
|
|
1947
|
+
|
|
1948
|
+
ds[time_dim] = dates
|
|
1949
|
+
|
|
1950
|
+
return ds
|
|
1951
|
+
|
|
1952
|
+
|
|
1953
|
+
# shared functions
|
|
1954
|
+
|
|
1955
|
+
|
|
1956
|
+
def _load_data(filename, dim_names, use_dask, decode_times=True):
|
|
1957
|
+
"""Load dataset from the specified file.
|
|
1958
|
+
|
|
1959
|
+
Parameters
|
|
1960
|
+
----------
|
|
1961
|
+
filename : Union[str, Path, List[Union[str, Path]]]
|
|
1962
|
+
The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
|
|
1963
|
+
or a list of strings or Path objects containing multiple files.
|
|
1964
|
+
dim_names: Dict[str, str], optional
|
|
1965
|
+
Dictionary specifying the names of dimensions in the dataset.
|
|
1966
|
+
use_dask: bool
|
|
1967
|
+
Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
|
|
1968
|
+
decode_times: bool, optional
|
|
1969
|
+
If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers.
|
|
1970
|
+
Defaults to True.
|
|
1971
|
+
|
|
1972
|
+
Returns
|
|
1973
|
+
-------
|
|
1974
|
+
ds : xr.Dataset
|
|
1975
|
+
The loaded xarray Dataset containing the forcing data.
|
|
1976
|
+
|
|
1977
|
+
Raises
|
|
1978
|
+
------
|
|
1979
|
+
FileNotFoundError
|
|
1980
|
+
If the specified file does not exist.
|
|
1981
|
+
ValueError
|
|
1982
|
+
If a list of files is provided but dim_names["time"] is not available or use_dask=False.
|
|
1983
|
+
"""
|
|
1984
|
+
|
|
1985
|
+
# Precompile the regex for matching wildcard characters
|
|
1986
|
+
wildcard_regex = re.compile(r"[\*\?\[\]]")
|
|
1987
|
+
|
|
1988
|
+
# Convert Path objects to strings
|
|
1989
|
+
if isinstance(filename, (str, Path)):
|
|
1990
|
+
filename_str = str(filename)
|
|
1991
|
+
elif isinstance(filename, list):
|
|
1992
|
+
filename_str = [str(f) for f in filename]
|
|
1993
|
+
else:
|
|
1994
|
+
raise ValueError("filename must be a string, Path, or a list of strings/Paths.")
|
|
1995
|
+
# Handle the case when filename is a string
|
|
1996
|
+
contains_wildcard = False
|
|
1997
|
+
if isinstance(filename_str, str):
|
|
1998
|
+
contains_wildcard = bool(wildcard_regex.search(filename_str))
|
|
1999
|
+
if contains_wildcard:
|
|
2000
|
+
matching_files = glob.glob(filename_str)
|
|
2001
|
+
if not matching_files:
|
|
2002
|
+
raise FileNotFoundError(
|
|
2003
|
+
f"No files found matching the pattern '{filename_str}'."
|
|
2004
|
+
)
|
|
2005
|
+
else:
|
|
2006
|
+
matching_files = [filename_str]
|
|
2007
|
+
|
|
2008
|
+
# Handle the case when filename is a list
|
|
2009
|
+
elif isinstance(filename_str, list):
|
|
2010
|
+
contains_wildcard = any(wildcard_regex.search(f) for f in filename_str)
|
|
2011
|
+
if contains_wildcard:
|
|
2012
|
+
matching_files = []
|
|
2013
|
+
for f in filename_str:
|
|
2014
|
+
files = glob.glob(f)
|
|
2015
|
+
if not files:
|
|
2016
|
+
raise FileNotFoundError(
|
|
2017
|
+
f"No files found matching the pattern '{f}'."
|
|
2018
|
+
)
|
|
2019
|
+
matching_files.extend(files)
|
|
2020
|
+
else:
|
|
2021
|
+
matching_files = filename_str
|
|
2022
|
+
|
|
2023
|
+
# Check if time dimension is available when multiple files are provided
|
|
2024
|
+
if isinstance(filename_str, list) and "time" not in dim_names:
|
|
2025
|
+
raise ValueError(
|
|
2026
|
+
"A list of files is provided, but time dimension is not available. "
|
|
2027
|
+
"A time dimension must be available to concatenate the files."
|
|
2028
|
+
)
|
|
2029
|
+
|
|
2030
|
+
# Determine the kwargs for combining datasets
|
|
2031
|
+
if contains_wildcard or len(matching_files) == 1:
|
|
2032
|
+
# If there is a wildcard or just one file, use by_coords
|
|
2033
|
+
kwargs = {"combine": "by_coords"}
|
|
2034
|
+
else:
|
|
2035
|
+
# Otherwise, use nested combine based on time
|
|
2036
|
+
kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
|
|
2037
|
+
|
|
2038
|
+
# Base kwargs used for dataset combination
|
|
2039
|
+
combine_kwargs = {
|
|
2040
|
+
"coords": "minimal",
|
|
2041
|
+
"compat": "override",
|
|
2042
|
+
"combine_attrs": "override",
|
|
2043
|
+
}
|
|
2044
|
+
|
|
2045
|
+
if use_dask:
|
|
2046
|
+
|
|
2047
|
+
chunks = {
|
|
2048
|
+
dim_names["latitude"]: -1,
|
|
2049
|
+
dim_names["longitude"]: -1,
|
|
2050
|
+
}
|
|
2051
|
+
if "depth" in dim_names:
|
|
2052
|
+
chunks[dim_names["depth"]] = -1
|
|
2053
|
+
if "time" in dim_names:
|
|
2054
|
+
chunks[dim_names["time"]] = 1
|
|
2055
|
+
|
|
2056
|
+
ds = xr.open_mfdataset(
|
|
2057
|
+
matching_files,
|
|
2058
|
+
decode_times=decode_times,
|
|
2059
|
+
chunks=chunks,
|
|
2060
|
+
**combine_kwargs,
|
|
2061
|
+
**kwargs,
|
|
2062
|
+
)
|
|
2063
|
+
else:
|
|
2064
|
+
ds_list = []
|
|
2065
|
+
for file in matching_files:
|
|
2066
|
+
ds = xr.open_dataset(file, decode_times=decode_times, chunks=None)
|
|
2067
|
+
ds_list.append(ds)
|
|
2068
|
+
|
|
2069
|
+
if kwargs["combine"] == "by_coords":
|
|
2070
|
+
ds = xr.combine_by_coords(ds_list, **combine_kwargs)
|
|
2071
|
+
elif kwargs["combine"] == "nested":
|
|
2072
|
+
ds = xr.combine_nested(
|
|
2073
|
+
ds_list, concat_dim=kwargs["concat_dim"], **combine_kwargs
|
|
2074
|
+
)
|
|
2075
|
+
|
|
2076
|
+
if "time" in dim_names and dim_names["time"] not in ds.dims:
|
|
2077
|
+
ds = ds.expand_dims(dim_names["time"])
|
|
2078
|
+
|
|
2079
|
+
return ds
|
|
2080
|
+
|
|
2081
|
+
|
|
2082
|
+
def _check_dataset(
|
|
2083
|
+
ds: xr.Dataset,
|
|
2084
|
+
dim_names: Dict[str, str],
|
|
2085
|
+
var_names: Dict[str, str],
|
|
2086
|
+
opt_var_names: Optional[Dict[str, str]] = None,
|
|
2087
|
+
) -> None:
|
|
2088
|
+
"""Check if the dataset contains the specified variables and dimensions.
|
|
2089
|
+
|
|
2090
|
+
Parameters
|
|
2091
|
+
----------
|
|
2092
|
+
ds : xr.Dataset
|
|
2093
|
+
The xarray Dataset to check.
|
|
2094
|
+
dim_names: Dict[str, str], optional
|
|
2095
|
+
Dictionary specifying the names of dimensions in the dataset.
|
|
2096
|
+
var_names: Dict[str, str]
|
|
2097
|
+
Dictionary of variable names that are required in the dataset.
|
|
2098
|
+
opt_var_names : Optional[Dict[str, str]], optional
|
|
2099
|
+
Dictionary of optional variable names.
|
|
2100
|
+
These variables are not strictly required, and the function will not raise an error if they are missing.
|
|
2101
|
+
Default is None, meaning no optional variables are considered.
|
|
2102
|
+
|
|
2103
|
+
|
|
2104
|
+
Raises
|
|
2105
|
+
------
|
|
2106
|
+
ValueError
|
|
2107
|
+
If the dataset does not contain the specified variables or dimensions.
|
|
2108
|
+
"""
|
|
2109
|
+
missing_dims = [dim for dim in dim_names.values() if dim not in ds.dims]
|
|
2110
|
+
if missing_dims:
|
|
2111
|
+
raise ValueError(
|
|
2112
|
+
f"Dataset does not contain all required dimensions. The following dimensions are missing: {missing_dims}"
|
|
2113
|
+
)
|
|
2114
|
+
|
|
2115
|
+
missing_vars = [var for var in var_names.values() if var not in ds.data_vars]
|
|
2116
|
+
if missing_vars:
|
|
2117
|
+
raise ValueError(
|
|
2118
|
+
f"Dataset does not contain all required variables. The following variables are missing: {missing_vars}"
|
|
2119
|
+
)
|
|
2120
|
+
|
|
2121
|
+
if opt_var_names:
|
|
2122
|
+
missing_optional_vars = [
|
|
2123
|
+
var for var in opt_var_names.values() if var not in ds.data_vars
|
|
2124
|
+
]
|
|
2125
|
+
if missing_optional_vars:
|
|
2126
|
+
logging.warning(
|
|
2127
|
+
f"Optional variables missing (but not critical): {missing_optional_vars}"
|
|
2128
|
+
)
|
|
2129
|
+
|
|
2130
|
+
|
|
2131
|
+
def _select_relevant_times(
|
|
2132
|
+
ds, time_dim, start_time=None, end_time=None, climatology=False
|
|
2133
|
+
) -> xr.Dataset:
|
|
2134
|
+
"""Select a subset of the dataset based on the specified time range.
|
|
2135
|
+
|
|
2136
|
+
This method filters the dataset to include all records between `start_time` and `end_time`.
|
|
2137
|
+
Additionally, it ensures that one record at or before `start_time` and one record at or
|
|
2138
|
+
after `end_time` are included, even if they fall outside the strict time range.
|
|
2139
|
+
|
|
2140
|
+
If no `end_time` is specified, the method will select the time range of
|
|
2141
|
+
[start_time, start_time + 24 hours] and return the closest time entry to `start_time` within that range.
|
|
2142
|
+
|
|
2143
|
+
Parameters
|
|
2144
|
+
----------
|
|
2145
|
+
ds : xr.Dataset
|
|
2146
|
+
The input dataset to be filtered. Must contain a time dimension.
|
|
2147
|
+
time_dim: str
|
|
2148
|
+
Name of time dimension.
|
|
2149
|
+
start_time : Optional[datetime], optional
|
|
2150
|
+
The start time for selecting relevant data. If not provided, the data is not filtered by start time.
|
|
2151
|
+
end_time : Optional[datetime], optional
|
|
2152
|
+
The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided,
|
|
2153
|
+
or no filtering is applied if start_time is not provided.
|
|
2154
|
+
climatology : bool
|
|
2155
|
+
Indicates whether the dataset is climatological. Defaults to False.
|
|
2156
|
+
|
|
2157
|
+
Returns
|
|
2158
|
+
-------
|
|
2159
|
+
xr.Dataset
|
|
2160
|
+
A dataset filtered to the specified time range, including the closest entries
|
|
2161
|
+
at or before `start_time` and at or after `end_time` if applicable.
|
|
2162
|
+
|
|
2163
|
+
Raises
|
|
2164
|
+
------
|
|
2165
|
+
ValueError
|
|
2166
|
+
If no matching times are found between `start_time` and `start_time + 24 hours`.
|
|
2167
|
+
|
|
2168
|
+
Warns
|
|
2169
|
+
-----
|
|
2170
|
+
UserWarning
|
|
2171
|
+
If the dataset contains exactly 12 time steps but the climatology flag is not set.
|
|
2172
|
+
This may indicate that the dataset represents climatology data.
|
|
2173
|
+
|
|
2174
|
+
UserWarning
|
|
2175
|
+
If no records at or before `start_time` or no records at or after `end_time` are found.
|
|
2176
|
+
|
|
2177
|
+
UserWarning
|
|
2178
|
+
If the dataset does not contain any time dimension or the time dimension is incorrectly named.
|
|
2179
|
+
|
|
2180
|
+
Notes
|
|
2181
|
+
-----
|
|
2182
|
+
- If the `climatology` flag is set and `end_time` is not provided, the method will
|
|
2183
|
+
interpolate initial conditions from climatology data.
|
|
2184
|
+
- If the dataset uses `cftime` datetime objects, these will be converted to standard
|
|
2185
|
+
`np.datetime64` objects before filtering.
|
|
2186
|
+
"""
|
|
2187
|
+
|
|
2188
|
+
if time_dim in ds.variables:
|
|
2189
|
+
if climatology:
|
|
2190
|
+
if len(ds[time_dim]) != 12:
|
|
2191
|
+
raise ValueError(
|
|
2192
|
+
f"The dataset contains {len(ds[time_dim])} time steps, but the climatology flag is set to True, which requires exactly 12 time steps."
|
|
2193
|
+
)
|
|
2194
|
+
if not end_time:
|
|
2195
|
+
# Interpolate from climatology for initial conditions
|
|
2196
|
+
ds = interpolate_from_climatology(ds, time_dim, start_time)
|
|
2197
|
+
else:
|
|
2198
|
+
time_type = get_time_type(ds[time_dim])
|
|
2199
|
+
if time_type == "int":
|
|
2200
|
+
raise ValueError(
|
|
2201
|
+
"The dataset contains integer time values, which are only supported when the climatology flag is set to True. However, your climatology flag is set to False."
|
|
2202
|
+
)
|
|
2203
|
+
if time_type == "cftime":
|
|
2204
|
+
ds = ds.assign_coords(
|
|
2205
|
+
{time_dim: convert_cftime_to_datetime(ds[time_dim])}
|
|
2206
|
+
)
|
|
2207
|
+
if end_time:
|
|
2208
|
+
end_time = end_time
|
|
2209
|
+
|
|
2210
|
+
# Identify records before or at start_time
|
|
2211
|
+
before_start = ds[time_dim] <= np.datetime64(start_time)
|
|
2212
|
+
if before_start.any():
|
|
2213
|
+
closest_before_start = (
|
|
2214
|
+
ds[time_dim].where(before_start, drop=True).max()
|
|
2215
|
+
)
|
|
2216
|
+
else:
|
|
2217
|
+
logging.warning("No records found at or before the start_time.")
|
|
2218
|
+
closest_before_start = ds[time_dim].min()
|
|
2219
|
+
|
|
2220
|
+
# Identify records after or at end_time
|
|
2221
|
+
after_end = ds[time_dim] >= np.datetime64(end_time)
|
|
2222
|
+
if after_end.any():
|
|
2223
|
+
closest_after_end = ds[time_dim].where(after_end, drop=True).min()
|
|
2224
|
+
else:
|
|
2225
|
+
logging.warning("No records found at or after the end_time.")
|
|
2226
|
+
closest_after_end = ds[time_dim].max()
|
|
2227
|
+
|
|
2228
|
+
# Select records within the time range and add the closest before/after
|
|
2229
|
+
within_range = (ds[time_dim] > np.datetime64(start_time)) & (
|
|
2230
|
+
ds[time_dim] < np.datetime64(end_time)
|
|
2231
|
+
)
|
|
2232
|
+
selected_times = ds[time_dim].where(
|
|
2233
|
+
within_range
|
|
2234
|
+
| (ds[time_dim] == closest_before_start)
|
|
2235
|
+
| (ds[time_dim] == closest_after_end),
|
|
2236
|
+
drop=True,
|
|
2237
|
+
)
|
|
2238
|
+
ds = ds.sel({time_dim: selected_times})
|
|
2239
|
+
else:
|
|
2240
|
+
# Look in time range [start_time, start_time + 24h]
|
|
2241
|
+
end_time = start_time + timedelta(days=1)
|
|
2242
|
+
times = (np.datetime64(start_time) <= ds[time_dim]) & (
|
|
2243
|
+
ds[time_dim] < np.datetime64(end_time)
|
|
2244
|
+
)
|
|
2245
|
+
if np.all(~times):
|
|
2246
|
+
raise ValueError(
|
|
2247
|
+
f"The dataset does not contain any time entries between the specified start_time: {start_time} "
|
|
2248
|
+
f"and {start_time + timedelta(hours=24)}. "
|
|
2249
|
+
"Please ensure the dataset includes time entries for that range."
|
|
2250
|
+
)
|
|
2251
|
+
|
|
2252
|
+
ds = ds.where(times, drop=True)
|
|
2253
|
+
if ds.sizes[time_dim] > 1:
|
|
2254
|
+
# Pick the time closest to start_time
|
|
2255
|
+
ds = ds.isel({time_dim: 0})
|
|
2256
|
+
logging.info(
|
|
2257
|
+
f"Selected time entry closest to the specified start_time ({start_time}) within the range [{start_time}, {start_time + timedelta(hours=24)}]: {ds[time_dim].values}"
|
|
2258
|
+
)
|
|
2259
|
+
else:
|
|
2260
|
+
logging.warning(
|
|
2261
|
+
"Dataset does not contain any time information. Please check if the time dimension "
|
|
2262
|
+
"is correctly named or if the dataset includes time data."
|
|
2263
|
+
)
|
|
2264
|
+
|
|
2265
|
+
return ds
|
|
2266
|
+
|
|
2267
|
+
|
|
2268
|
+
def decode_string(byte_array):
|
|
2269
|
+
|
|
2270
|
+
# Decode each byte and handle errors with 'ignore'
|
|
2271
|
+
decoded_string = "".join(
|
|
2272
|
+
[
|
|
2273
|
+
x.decode("utf-8", errors="ignore") # Ignore invalid byte sequences
|
|
2274
|
+
for x in byte_array.values
|
|
2275
|
+
if isinstance(x, bytes) and x != b" " and x is not np.nan
|
|
2276
|
+
]
|
|
2277
|
+
)
|
|
2278
|
+
|
|
2279
|
+
return decoded_string
|