roms-tools 1.6.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. ci/environment.yml +1 -1
  2. roms_tools/__init__.py +1 -0
  3. roms_tools/_version.py +1 -1
  4. roms_tools/setup/boundary_forcing.py +266 -256
  5. roms_tools/setup/datasets.py +986 -231
  6. roms_tools/setup/download.py +41 -15
  7. roms_tools/setup/grid.py +561 -512
  8. roms_tools/setup/initial_conditions.py +162 -106
  9. roms_tools/setup/mask.py +69 -0
  10. roms_tools/setup/plot.py +81 -23
  11. roms_tools/setup/regrid.py +4 -2
  12. roms_tools/setup/river_forcing.py +589 -0
  13. roms_tools/setup/surface_forcing.py +21 -130
  14. roms_tools/setup/tides.py +15 -79
  15. roms_tools/setup/topography.py +92 -128
  16. roms_tools/setup/utils.py +307 -25
  17. roms_tools/setup/vertical_coordinate.py +5 -16
  18. roms_tools/tests/test_setup/test_boundary_forcing.py +10 -7
  19. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/.zattrs +1 -1
  20. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/.zmetadata +157 -130
  21. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_east/.zattrs +1 -1
  22. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_north/.zattrs +1 -1
  23. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_south/.zattrs +1 -1
  24. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_west/.zattrs +1 -1
  25. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_east/.zattrs +1 -1
  26. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_north/.zattrs +1 -1
  27. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_south/.zattrs +1 -1
  28. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_west/.zattrs +1 -1
  29. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_east/.zattrs +1 -1
  30. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_north/.zattrs +1 -1
  31. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_south/.zattrs +1 -1
  32. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_west/.zattrs +1 -1
  33. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_east/.zattrs +1 -1
  34. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_north/.zattrs +1 -1
  35. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_south/.zattrs +1 -1
  36. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_west/.zattrs +1 -1
  37. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_east/.zattrs +1 -1
  38. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_north/.zattrs +1 -1
  39. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_south/.zattrs +1 -1
  40. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_west/.zattrs +1 -1
  41. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_east/.zattrs +1 -1
  42. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_north/.zattrs +1 -1
  43. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_south/.zattrs +1 -1
  44. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_west/.zattrs +1 -1
  45. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_east/.zattrs +1 -1
  46. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_north/.zattrs +1 -1
  47. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_south/.zattrs +1 -1
  48. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_west/.zattrs +1 -1
  49. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_east/.zattrs +1 -1
  50. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_north/.zattrs +1 -1
  51. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_south/.zattrs +1 -1
  52. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_west/.zattrs +1 -1
  53. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_east/.zattrs +1 -1
  54. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_north/.zattrs +1 -1
  55. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_south/.zattrs +1 -1
  56. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_west/.zattrs +1 -1
  57. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_east/.zattrs +1 -1
  58. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_north/.zattrs +1 -1
  59. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_south/.zattrs +1 -1
  60. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_west/.zattrs +1 -1
  61. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_east/.zattrs +1 -1
  62. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_north/.zattrs +1 -1
  63. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_south/.zattrs +1 -1
  64. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_west/.zattrs +1 -1
  65. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_east/.zattrs +1 -1
  66. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_north/.zattrs +1 -1
  67. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_south/.zattrs +1 -1
  68. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_west/.zattrs +1 -1
  69. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_east/.zattrs +1 -1
  70. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_north/.zattrs +1 -1
  71. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_south/.zattrs +1 -1
  72. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_west/.zattrs +1 -1
  73. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_east/.zattrs +1 -1
  74. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_north/.zattrs +1 -1
  75. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_south/.zattrs +1 -1
  76. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_west/.zattrs +1 -1
  77. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_east/.zattrs +1 -1
  78. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_north/.zattrs +1 -1
  79. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_south/.zattrs +1 -1
  80. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_west/.zattrs +1 -1
  81. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_east/.zattrs +1 -1
  82. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_north/.zattrs +1 -1
  83. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_south/.zattrs +1 -1
  84. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_west/.zattrs +1 -1
  85. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_east/.zattrs +1 -1
  86. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_north/.zattrs +1 -1
  87. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_south/.zattrs +1 -1
  88. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_west/.zattrs +1 -1
  89. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/abs_time/.zattrs +1 -0
  90. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/bry_time/.zattrs +1 -1
  91. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_east/.zattrs +1 -1
  92. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_north/.zattrs +1 -1
  93. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_south/.zattrs +1 -1
  94. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_west/.zattrs +1 -1
  95. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_east/.zattrs +1 -1
  96. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_north/.zattrs +1 -1
  97. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_south/.zattrs +1 -1
  98. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_west/.zattrs +1 -1
  99. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_east/.zattrs +1 -1
  100. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_north/.zattrs +1 -1
  101. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_south/.zattrs +1 -1
  102. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_west/.zattrs +1 -1
  103. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_east/.zattrs +1 -1
  104. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_north/.zattrs +1 -1
  105. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_south/.zattrs +1 -1
  106. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_west/.zattrs +1 -1
  107. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_east/.zattrs +1 -1
  108. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_north/.zattrs +1 -1
  109. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_south/.zattrs +1 -1
  110. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_west/.zattrs +1 -1
  111. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_east/.zattrs +1 -1
  112. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_north/.zattrs +1 -1
  113. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_south/.zattrs +1 -1
  114. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_west/.zattrs +1 -1
  115. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_east/.zattrs +1 -1
  116. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_north/.zattrs +1 -1
  117. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_south/.zattrs +1 -1
  118. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_west/.zattrs +1 -1
  119. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_east/.zattrs +1 -1
  120. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_north/.zattrs +1 -1
  121. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_south/.zattrs +1 -1
  122. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_west/.zattrs +1 -1
  123. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_east/.zattrs +1 -1
  124. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_north/.zattrs +1 -1
  125. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_south/.zattrs +1 -1
  126. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_west/.zattrs +1 -1
  127. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/month/.zarray +20 -0
  128. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/month/.zattrs +6 -0
  129. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/month/0 +0 -0
  130. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_east/.zattrs +1 -1
  131. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_north/.zattrs +1 -1
  132. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_south/.zattrs +1 -1
  133. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_west/.zattrs +1 -1
  134. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_east/.zattrs +1 -1
  135. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_north/.zattrs +1 -1
  136. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_south/.zattrs +1 -1
  137. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_west/.zattrs +1 -1
  138. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_east/.zattrs +1 -1
  139. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_north/.zattrs +1 -1
  140. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_south/.zattrs +1 -1
  141. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_west/.zattrs +1 -1
  142. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_east/.zattrs +1 -1
  143. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_north/.zattrs +1 -1
  144. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_south/.zattrs +1 -1
  145. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_west/.zattrs +1 -1
  146. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_east/.zattrs +1 -1
  147. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_north/.zattrs +1 -1
  148. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_south/.zattrs +1 -1
  149. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_west/.zattrs +1 -1
  150. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_east/.zattrs +1 -1
  151. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_north/.zattrs +1 -1
  152. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_south/.zattrs +1 -1
  153. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_west/.zattrs +1 -1
  154. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/.zattrs +1 -1
  155. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/.zmetadata +39 -12
  156. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/abs_time/.zattrs +1 -0
  157. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/dust/.zattrs +1 -1
  158. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/dust_time/.zattrs +1 -1
  159. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/iron/.zattrs +1 -1
  160. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/iron_time/.zattrs +1 -1
  161. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/month/.zarray +20 -0
  162. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/month/.zattrs +6 -0
  163. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/month/0 +0 -0
  164. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/nhy/.zattrs +1 -1
  165. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/nhy_time/.zattrs +1 -1
  166. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/nox/.zattrs +1 -1
  167. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/nox_time/.zattrs +1 -1
  168. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/pco2_air/.zattrs +1 -1
  169. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/pco2_air_alt/.zattrs +1 -1
  170. roms_tools/tests/test_setup/test_data/bgc_surface_forcing_from_climatology.zarr/pco2_time/.zattrs +1 -1
  171. roms_tools/tests/test_setup/test_data/grid.zarr/.zattrs +0 -1
  172. roms_tools/tests/test_setup/test_data/grid.zarr/.zmetadata +56 -201
  173. roms_tools/tests/test_setup/test_data/grid.zarr/Cs_r/.zattrs +1 -1
  174. roms_tools/tests/test_setup/test_data/grid.zarr/Cs_w/.zattrs +1 -1
  175. roms_tools/tests/test_setup/test_data/grid.zarr/{interface_depth_rho → sigma_r}/.zarray +2 -6
  176. roms_tools/tests/test_setup/test_data/grid.zarr/sigma_r/.zattrs +7 -0
  177. roms_tools/tests/test_setup/test_data/grid.zarr/sigma_r/0 +0 -0
  178. roms_tools/tests/test_setup/test_data/grid.zarr/{interface_depth_u → sigma_w}/.zarray +2 -6
  179. roms_tools/tests/test_setup/test_data/grid.zarr/sigma_w/.zattrs +7 -0
  180. roms_tools/tests/test_setup/test_data/grid.zarr/sigma_w/0 +0 -0
  181. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/.zattrs +1 -2
  182. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/.zmetadata +58 -203
  183. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/Cs_r/.zattrs +1 -1
  184. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/Cs_w/.zattrs +1 -1
  185. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/h/.zattrs +1 -1
  186. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/h/0.0 +0 -0
  187. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_coarse/0.0 +0 -0
  188. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_rho/0.0 +0 -0
  189. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_u/0.0 +0 -0
  190. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_v/0.0 +0 -0
  191. roms_tools/tests/test_setup/test_data/{grid.zarr/interface_depth_v → grid_that_straddles_dateline.zarr/sigma_r}/.zarray +2 -6
  192. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/sigma_r/.zattrs +7 -0
  193. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/sigma_r/0 +0 -0
  194. roms_tools/tests/test_setup/test_data/{grid.zarr/layer_depth_rho → grid_that_straddles_dateline.zarr/sigma_w}/.zarray +2 -6
  195. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/sigma_w/.zattrs +7 -0
  196. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/sigma_w/0 +0 -0
  197. roms_tools/tests/test_setup/test_data/river_forcing.zarr/.zattrs +3 -0
  198. roms_tools/tests/test_setup/test_data/river_forcing.zarr/.zgroup +3 -0
  199. roms_tools/tests/test_setup/test_data/river_forcing.zarr/.zmetadata +214 -0
  200. roms_tools/tests/test_setup/test_data/river_forcing.zarr/abs_time/.zarray +20 -0
  201. roms_tools/tests/test_setup/test_data/river_forcing.zarr/abs_time/.zattrs +8 -0
  202. roms_tools/tests/test_setup/test_data/river_forcing.zarr/abs_time/0 +0 -0
  203. roms_tools/tests/test_setup/test_data/river_forcing.zarr/month/.zarray +20 -0
  204. roms_tools/tests/test_setup/test_data/river_forcing.zarr/month/.zattrs +6 -0
  205. roms_tools/tests/test_setup/test_data/river_forcing.zarr/month/0 +0 -0
  206. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_name/.zarray +24 -0
  207. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_name/.zattrs +6 -0
  208. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_name/0 +0 -0
  209. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_time/.zarray +20 -0
  210. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_time/.zattrs +8 -0
  211. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_time/0 +0 -0
  212. roms_tools/tests/test_setup/test_data/{grid.zarr/layer_depth_v → river_forcing.zarr/river_tracer}/.zarray +4 -4
  213. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_tracer/.zattrs +10 -0
  214. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_tracer/0.0.0 +0 -0
  215. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_volume/.zarray +22 -0
  216. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_volume/.zattrs +9 -0
  217. roms_tools/tests/test_setup/test_data/river_forcing.zarr/river_volume/0.0 +0 -0
  218. roms_tools/tests/test_setup/test_data/{grid.zarr/layer_depth_u → river_forcing.zarr/tracer_name}/.zarray +2 -6
  219. roms_tools/tests/test_setup/test_data/river_forcing.zarr/tracer_name/.zattrs +6 -0
  220. roms_tools/tests/test_setup/test_data/river_forcing.zarr/tracer_name/0 +0 -0
  221. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/.zattrs +1 -0
  222. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/.zgroup +3 -0
  223. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/.zmetadata +185 -0
  224. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/abs_time/.zarray +20 -0
  225. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/abs_time/.zattrs +8 -0
  226. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/abs_time/0 +0 -0
  227. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_name/.zarray +24 -0
  228. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_name/.zattrs +6 -0
  229. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_name/0 +0 -0
  230. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_time/.zarray +20 -0
  231. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_time/.zattrs +7 -0
  232. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_time/0 +0 -0
  233. roms_tools/tests/test_setup/test_data/{grid_that_straddles_dateline.zarr/interface_depth_v → river_forcing_no_climatology.zarr/river_tracer}/.zarray +4 -4
  234. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_tracer/.zattrs +10 -0
  235. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_tracer/0.0.0 +0 -0
  236. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_volume/.zarray +22 -0
  237. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_volume/.zattrs +9 -0
  238. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_volume/0.0 +0 -0
  239. roms_tools/tests/test_setup/test_data/{grid_that_straddles_dateline.zarr/interface_depth_u → river_forcing_no_climatology.zarr/tracer_name}/.zarray +2 -6
  240. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/tracer_name/.zattrs +6 -0
  241. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/tracer_name/0 +0 -0
  242. roms_tools/tests/test_setup/test_grid.py +110 -12
  243. roms_tools/tests/test_setup/test_initial_conditions.py +2 -3
  244. roms_tools/tests/test_setup/test_river_forcing.py +367 -0
  245. roms_tools/tests/test_setup/test_surface_forcing.py +2 -24
  246. roms_tools/tests/test_setup/test_tides.py +2 -3
  247. roms_tools/tests/test_setup/test_topography.py +106 -1
  248. roms_tools/tests/test_setup/test_validation.py +4 -0
  249. roms_tools/utils.py +12 -10
  250. {roms_tools-1.6.2.dist-info → roms_tools-2.0.0.dist-info}/LICENSE +1 -1
  251. {roms_tools-1.6.2.dist-info → roms_tools-2.0.0.dist-info}/METADATA +6 -5
  252. {roms_tools-1.6.2.dist-info → roms_tools-2.0.0.dist-info}/RECORD +254 -225
  253. {roms_tools-1.6.2.dist-info → roms_tools-2.0.0.dist-info}/WHEEL +1 -1
  254. roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_rho/.zattrs +0 -9
  255. roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_rho/0.0.0 +0 -0
  256. roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_u/.zattrs +0 -9
  257. roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_u/0.0.0 +0 -0
  258. roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_v/.zattrs +0 -9
  259. roms_tools/tests/test_setup/test_data/grid.zarr/interface_depth_v/0.0.0 +0 -0
  260. roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_rho/.zattrs +0 -9
  261. roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_rho/0.0.0 +0 -0
  262. roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_u/.zattrs +0 -9
  263. roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_u/0.0.0 +0 -0
  264. roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_v/.zattrs +0 -9
  265. roms_tools/tests/test_setup/test_data/grid.zarr/layer_depth_v/0.0.0 +0 -0
  266. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_rho/.zarray +0 -24
  267. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_rho/.zattrs +0 -9
  268. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_rho/0.0.0 +0 -0
  269. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_u/.zattrs +0 -9
  270. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_u/0.0.0 +0 -0
  271. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_v/.zattrs +0 -9
  272. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/interface_depth_v/0.0.0 +0 -0
  273. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_rho/.zarray +0 -24
  274. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_rho/.zattrs +0 -9
  275. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_rho/0.0.0 +0 -0
  276. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_u/.zarray +0 -24
  277. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_u/.zattrs +0 -9
  278. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_u/0.0.0 +0 -0
  279. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_v/.zarray +0 -24
  280. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_v/.zattrs +0 -9
  281. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/layer_depth_v/0.0.0 +0 -0
  282. roms_tools/tests/test_setup/test_vertical_coordinate.py +0 -91
  283. {roms_tools-1.6.2.dist-info → roms_tools-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import time
1
2
  import re
2
3
  import xarray as xr
3
4
  from dataclasses import dataclass, field
@@ -13,10 +14,17 @@ from roms_tools.setup.utils import (
13
14
  get_time_type,
14
15
  convert_cftime_to_datetime,
15
16
  one_dim_fill,
17
+ gc_dist,
18
+ )
19
+ from roms_tools.setup.download import (
20
+ download_correction_data,
21
+ download_topo,
22
+ download_river_data,
16
23
  )
17
- from roms_tools.setup.download import download_correction_data
18
24
  from roms_tools.setup.fill import LateralFill
19
25
 
26
+ # lat-lon datasets
27
+
20
28
 
21
29
  @dataclass(frozen=True, kw_only=True)
22
30
  class Dataset:
@@ -32,10 +40,10 @@ class Dataset:
32
40
  end_time : Optional[datetime], optional
33
41
  The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided,
34
42
  or no filtering is applied if start_time is not provided.
35
- var_names: Dict[str, str]
36
- Dictionary of variable names that are required in the dataset.
37
43
  dim_names: Dict[str, str], optional
38
44
  Dictionary specifying the names of dimensions in the dataset.
45
+ var_names: Dict[str, str]
46
+ Dictionary of variable names that are required in the dataset.
39
47
  climatology : bool
40
48
  Indicates whether the dataset is climatological. Defaults to False.
41
49
  use_dask: bool
@@ -62,7 +70,6 @@ class Dataset:
62
70
  filename: Union[str, Path, List[Union[str, Path]]]
63
71
  start_time: Optional[datetime] = None
64
72
  end_time: Optional[datetime] = None
65
- var_names: Dict[str, str]
66
73
  dim_names: Dict[str, str] = field(
67
74
  default_factory=lambda: {
68
75
  "longitude": "longitude",
@@ -70,8 +77,9 @@ class Dataset:
70
77
  "time": "time",
71
78
  }
72
79
  )
80
+ var_names: Dict[str, str]
73
81
  climatology: Optional[bool] = False
74
- use_dask: Optional[bool] = True
82
+ use_dask: Optional[bool] = False
75
83
  apply_post_processing: Optional[bool] = True
76
84
 
77
85
  is_global: bool = field(init=False, repr=False)
@@ -114,6 +122,8 @@ class Dataset:
114
122
 
115
123
  # Make sure that latitude is ascending
116
124
  ds = self.ensure_dimension_is_ascending(ds, dim="latitude")
125
+ # Make sure there are no 360 degree jumps in longitude
126
+ ds = self.ensure_dimension_is_ascending(ds, dim="longitude")
117
127
 
118
128
  if "depth" in self.dim_names:
119
129
  # Make sure that depth is ascending
@@ -123,11 +133,6 @@ class Dataset:
123
133
 
124
134
  # Check whether the data covers the entire globe
125
135
  object.__setattr__(self, "is_global", self.check_if_global(ds))
126
-
127
- # If dataset is global concatenate three copies of field along longitude dimension
128
- if self.is_global:
129
- ds = self.concatenate_longitudes(ds)
130
-
131
136
  object.__setattr__(self, "ds", ds)
132
137
 
133
138
  if self.apply_post_processing:
@@ -149,101 +154,7 @@ class Dataset:
149
154
  If a list of files is provided but self.dim_names["time"] is not available or use_dask=False.
150
155
  """
151
156
 
152
- # Precompile the regex for matching wildcard characters
153
- wildcard_regex = re.compile(r"[\*\?\[\]]")
154
-
155
- # Convert Path objects to strings
156
- if isinstance(self.filename, (str, Path)):
157
- filename_str = str(self.filename)
158
- elif isinstance(self.filename, list):
159
- filename_str = [str(f) for f in self.filename]
160
- else:
161
- raise ValueError(
162
- "filename must be a string, Path, or a list of strings/Paths."
163
- )
164
-
165
- # Handle the case when filename is a string
166
- contains_wildcard = False
167
- if isinstance(filename_str, str):
168
- contains_wildcard = bool(wildcard_regex.search(filename_str))
169
- if contains_wildcard:
170
- matching_files = glob.glob(filename_str)
171
- if not matching_files:
172
- raise FileNotFoundError(
173
- f"No files found matching the pattern '{filename_str}'."
174
- )
175
- else:
176
- matching_files = [filename_str]
177
-
178
- # Handle the case when filename is a list
179
- elif isinstance(filename_str, list):
180
- contains_wildcard = any(wildcard_regex.search(f) for f in filename_str)
181
- if contains_wildcard:
182
- matching_files = []
183
- for f in filename_str:
184
- files = glob.glob(f)
185
- if not files:
186
- raise FileNotFoundError(
187
- f"No files found matching the pattern '{f}'."
188
- )
189
- matching_files.extend(files)
190
- else:
191
- matching_files = filename_str
192
-
193
- # Check if time dimension is available when multiple files are provided
194
- if isinstance(filename_str, list) and "time" not in self.dim_names:
195
- raise ValueError(
196
- "A list of files is provided, but time dimension is not available. "
197
- "A time dimension must be available to concatenate the files."
198
- )
199
-
200
- # Determine the kwargs for combining datasets
201
- if contains_wildcard or len(matching_files) == 1:
202
- # If there is a wildcard or just one file, use by_coords
203
- kwargs = {"combine": "by_coords"}
204
- else:
205
- # Otherwise, use nested combine based on time
206
- kwargs = {"combine": "nested", "concat_dim": self.dim_names["time"]}
207
-
208
- # Base kwargs used for dataset combination
209
- combine_kwargs = {
210
- "coords": "minimal",
211
- "compat": "override",
212
- "combine_attrs": "override",
213
- }
214
-
215
- if self.use_dask:
216
-
217
- chunks = {
218
- self.dim_names["latitude"]: -1,
219
- self.dim_names["longitude"]: -1,
220
- }
221
- if "depth" in self.dim_names:
222
- chunks[self.dim_names["depth"]] = -1
223
- if "time" in self.dim_names:
224
- chunks[self.dim_names["time"]] = 1
225
-
226
- ds = xr.open_mfdataset(
227
- matching_files,
228
- chunks=chunks,
229
- **combine_kwargs,
230
- **kwargs,
231
- )
232
- else:
233
- ds_list = []
234
- for file in matching_files:
235
- ds = xr.open_dataset(file, chunks=None)
236
- ds_list.append(ds)
237
-
238
- if kwargs["combine"] == "by_coords":
239
- ds = xr.combine_by_coords(ds_list, **combine_kwargs)
240
- elif kwargs["combine"] == "nested":
241
- ds = xr.combine_nested(
242
- ds_list, concat_dim=kwargs["concat_dim"], **combine_kwargs
243
- )
244
-
245
- if "time" in self.dim_names and self.dim_names["time"] not in ds.dims:
246
- ds = ds.expand_dims(self.dim_names["time"])
157
+ ds = _load_data(self.filename, self.dim_names, self.use_dask)
247
158
 
248
159
  return ds
249
160
 
@@ -278,19 +189,8 @@ class Dataset:
278
189
  ValueError
279
190
  If the dataset does not contain the specified variables or dimensions.
280
191
  """
281
- missing_vars = [
282
- var for var in self.var_names.values() if var not in ds.data_vars
283
- ]
284
- if missing_vars:
285
- raise ValueError(
286
- f"Dataset does not contain all required variables. The following variables are missing: {missing_vars}"
287
- )
288
192
 
289
- missing_dims = [dim for dim in self.dim_names.values() if dim not in ds.dims]
290
- if missing_dims:
291
- raise ValueError(
292
- f"Dataset does not contain all required dimensions. The following dimensions are missing: {missing_vars}"
293
- )
193
+ _check_dataset(ds, self.dim_names, self.var_names)
294
194
 
295
195
  def select_relevant_fields(self, ds) -> xr.Dataset:
296
196
  """Selects and returns a subset of the dataset containing only the variables
@@ -379,86 +279,10 @@ class Dataset:
379
279
  """
380
280
 
381
281
  time_dim = self.dim_names["time"]
382
- if time_dim in ds.variables:
383
- if self.climatology:
384
- if len(ds[time_dim]) != 12:
385
- raise ValueError(
386
- f"The dataset contains {len(ds[time_dim])} time steps, but the climatology flag is set to True, which requires exactly 12 time steps."
387
- )
388
- if not self.end_time:
389
- # Interpolate from climatology for initial conditions
390
- ds = interpolate_from_climatology(
391
- ds, self.dim_names["time"], self.start_time
392
- )
393
- else:
394
- time_type = get_time_type(ds[time_dim])
395
- if time_type == "int":
396
- raise ValueError(
397
- "The dataset contains integer time values, which are only supported when the climatology flag is set to True. However, your climatology flag is set to False."
398
- )
399
- if time_type == "cftime":
400
- ds = ds.assign_coords(
401
- {time_dim: convert_cftime_to_datetime(ds[time_dim])}
402
- )
403
- if self.end_time:
404
- end_time = self.end_time
405
-
406
- # Identify records before or at start_time
407
- before_start = ds[time_dim] <= np.datetime64(self.start_time)
408
- if before_start.any():
409
- closest_before_start = (
410
- ds[time_dim].where(before_start, drop=True).max()
411
- )
412
- else:
413
- logging.warning("No records found at or before the start_time.")
414
- closest_before_start = ds[time_dim].min()
415
-
416
- # Identify records after or at end_time
417
- after_end = ds[time_dim] >= np.datetime64(end_time)
418
- if after_end.any():
419
- closest_after_end = (
420
- ds[time_dim].where(after_end, drop=True).min()
421
- )
422
- else:
423
- logging.warning("No records found at or after the end_time.")
424
- closest_after_end = ds[time_dim].max()
425
-
426
- # Select records within the time range and add the closest before/after
427
- within_range = (ds[time_dim] > np.datetime64(self.start_time)) & (
428
- ds[time_dim] < np.datetime64(end_time)
429
- )
430
- selected_times = ds[time_dim].where(
431
- within_range
432
- | (ds[time_dim] == closest_before_start)
433
- | (ds[time_dim] == closest_after_end),
434
- drop=True,
435
- )
436
- ds = ds.sel({time_dim: selected_times})
437
- else:
438
- # Look in time range [self.start_time, self.start_time + 24h]
439
- end_time = self.start_time + timedelta(days=1)
440
- times = (np.datetime64(self.start_time) <= ds[time_dim]) & (
441
- ds[time_dim] < np.datetime64(end_time)
442
- )
443
- if np.all(~times):
444
- raise ValueError(
445
- f"The dataset does not contain any time entries between the specified start_time: {self.start_time} "
446
- f"and {self.start_time + timedelta(hours=24)}. "
447
- "Please ensure the dataset includes time entries for that range."
448
- )
449
-
450
- ds = ds.where(times, drop=True)
451
- if ds.sizes[time_dim] > 1:
452
- # Pick the time closest to self.start_time
453
- ds = ds.isel({time_dim: 0})
454
- logging.info(
455
- f"Selected time entry closest to the specified start_time ({self.start_time}) within the range [{self.start_time}, {self.start_time + timedelta(hours=24)}]: {ds[time_dim].values}"
456
- )
457
- else:
458
- logging.warning(
459
- "Dataset does not contain any time information. Please check if the time dimension "
460
- "is correctly named or if the dataset includes time data."
461
- )
282
+
283
+ ds = _select_relevant_times(
284
+ ds, time_dim, self.start_time, self.end_time, self.climatology
285
+ )
462
286
 
463
287
  return ds
464
288
 
@@ -467,7 +291,11 @@ class Dataset:
467
291
  ) -> xr.Dataset:
468
292
  """Ensure that the specified dimension in the dataset is in ascending order.
469
293
 
470
- If the values along the specified dimension are in descending order, this function reverses the order of the dimension to make it ascending.
294
+ This function checks the order of values along the specified dimension. If they
295
+ are in descending order, it reverses the dimension to make it ascending. For
296
+ the "longitude" dimension, if it has a discontinuity (e.g., [0, 180][-180, 0]),
297
+ the function adjusts values to eliminate the 360-degree jump, transforming
298
+ the range into a continuous [0, 360) span.
471
299
 
472
300
  Parameters
473
301
  ----------
@@ -481,14 +309,23 @@ class Dataset:
481
309
  -------
482
310
  xr.Dataset
483
311
  A new `xarray.Dataset` with the specified dimension in ascending order.
484
- If the dimension was already in ascending order, the original dataset is returned unchanged.
485
- If the dimension was in descending order, the dataset is returned with the dimension reversed.
312
+ - If the dimension was already in ascending order, the original dataset is returned unchanged.
313
+ - If the dimension was in descending order, the dataset is returned with the dimension reversed.
314
+ - If the dimension is "longitude" with a discontinuity (e.g., [0, 180][-180, 0]), the values are adjusted to eliminate the 360-degree jump.
486
315
  """
487
- # Make sure that latitude is ascending
316
+ # Check if the dimension is in descending order and reverse if needed
488
317
  diff = np.diff(ds[self.dim_names[dim]])
489
318
  if np.all(diff < 0):
490
319
  ds = ds.isel(**{self.dim_names[dim]: slice(None, None, -1)})
491
320
 
321
+ # Check for a discontinuity in longitude and adjust values if present
322
+ elif np.any(diff < 0) and dim == "longitude":
323
+ ds[self.dim_names[dim]] = xr.where(
324
+ ds[self.dim_names[dim]] < 0,
325
+ ds[self.dim_names[dim]] + 360,
326
+ ds[self.dim_names[dim]],
327
+ )
328
+
492
329
  return ds
493
330
 
494
331
  def infer_horizontal_resolution(self, ds: xr.Dataset):
@@ -542,43 +379,68 @@ class Dataset:
542
379
 
543
380
  return is_global
544
381
 
545
- def concatenate_longitudes(self, ds):
546
- """
547
- Concatenates the field three times: with longitudes shifted by -360, original longitudes, and shifted by +360.
382
+ def concatenate_longitudes(self, ds, end="upper", verbose=False):
383
+ """Concatenates fields in dataset twice along the longitude dimension.
548
384
 
549
385
  Parameters
550
386
  ----------
551
- field : xr.DataArray
552
- The field to be concatenated.
387
+ ds: xr.Dataset
388
+ The dataset to be concatenated. The longitude dimension must be present in this dataset.
389
+ end : str, optional
390
+ Specifies which end to shift the longitudes.
391
+ Options are:
392
+ - "lower": shifts longitudes by -360 degrees and concatenates to the lower end.
393
+ - "upper": shifts longitudes by +360 degrees and concatenates to the upper end.
394
+ - "both": shifts longitudes by -360 degrees and 360 degrees and concatenates to both ends.
395
+ Default is "upper".
396
+ verbose : bool, optional
397
+ If True, print message if dataset is concatenated along longitude dimension.
398
+ Defaults to False.
553
399
 
554
400
  Returns
555
401
  -------
556
- xr.DataArray
557
- The concatenated field, with the longitude dimension extended.
402
+ ds_concatenated : xr.Dataset
403
+ The concatenated dataset.
404
+ """
558
405
 
559
- Notes
560
- -----
561
- Concatenating three times may be overkill in most situations, but it is safe. Alternatively, we could refactor
562
- to figure out whether concatenating on the lower end, upper end, or at all is needed.
406
+ if verbose:
407
+ start_time = time.time()
563
408
 
564
- """
565
409
  ds_concatenated = xr.Dataset()
566
410
 
567
411
  lon = ds[self.dim_names["longitude"]]
568
- lon_minus360 = lon - 360
569
- lon_plus360 = lon + 360
570
- lon_concatenated = xr.concat(
571
- [lon_minus360, lon, lon_plus360], dim=self.dim_names["longitude"]
572
- )
412
+ if end == "lower":
413
+ lon_minus360 = lon - 360
414
+ lon_concatenated = xr.concat(
415
+ [lon_minus360, lon], dim=self.dim_names["longitude"]
416
+ )
573
417
 
574
- ds_concatenated[self.dim_names["longitude"]] = lon_concatenated
418
+ elif end == "upper":
419
+ lon_plus360 = lon + 360
420
+ lon_concatenated = xr.concat(
421
+ [lon, lon_plus360], dim=self.dim_names["longitude"]
422
+ )
423
+
424
+ elif end == "both":
425
+ lon_minus360 = lon - 360
426
+ lon_plus360 = lon + 360
427
+ lon_concatenated = xr.concat(
428
+ [lon_minus360, lon, lon_plus360], dim=self.dim_names["longitude"]
429
+ )
575
430
 
576
- for var in self.var_names.values():
431
+ for var in ds.data_vars:
577
432
  if self.dim_names["longitude"] in ds[var].dims:
578
433
  field = ds[var]
579
- field_concatenated = xr.concat(
580
- [field, field, field], dim=self.dim_names["longitude"]
581
- )
434
+
435
+ if end == "both":
436
+ field_concatenated = xr.concat(
437
+ [field, field, field], dim=self.dim_names["longitude"]
438
+ )
439
+ else:
440
+ field_concatenated = xr.concat(
441
+ [field, field], dim=self.dim_names["longitude"]
442
+ )
443
+
582
444
  if self.use_dask:
583
445
  field_concatenated = field_concatenated.chunk(
584
446
  {self.dim_names["longitude"]: -1}
@@ -588,6 +450,13 @@ class Dataset:
588
450
  else:
589
451
  ds_concatenated[var] = ds[var]
590
452
 
453
+ ds_concatenated[self.dim_names["longitude"]] = lon_concatenated
454
+
455
+ if verbose:
456
+ logging.info(
457
+ f"Concatenating the data along the longitude dimension: {time.time() - start_time:.3f} seconds"
458
+ )
459
+
591
460
  return ds_concatenated
592
461
 
593
462
  def post_process(self):
@@ -601,7 +470,9 @@ class Dataset:
601
470
  """
602
471
  pass
603
472
 
604
- def choose_subdomain(self, target_coords, buffer_points=20, return_copy=False):
473
+ def choose_subdomain(
474
+ self, target_coords, buffer_points=20, return_copy=False, verbose=False
475
+ ):
605
476
  """Selects a subdomain from the xarray Dataset based on specified target
606
477
  coordinates, extending the selection by a defined buffer. Adjusts longitude
607
478
  ranges as necessary to accommodate the dataset's expected range and handles
@@ -618,6 +489,9 @@ class Dataset:
618
489
  return_subdomain : bool, optional
619
490
  If True, returns the subset of the original dataset representing the chosen
620
491
  subdomain. If False, assigns the subset to `self.ds`. Defaults to False.
492
+ verbose : bool, optional
493
+ If True, print message if dataset is concatenated along longitude dimension.
494
+ Defaults to False.
621
495
 
622
496
  Returns
623
497
  -------
@@ -640,9 +514,43 @@ class Dataset:
640
514
 
641
515
  margin = self.resolution * buffer_points
642
516
 
643
- if not self.is_global:
517
+ # Select the subdomain in latitude direction (so that we have to concatenate fewer latitudes below if concatenation is necessary)
518
+ subdomain = self.ds.sel(
519
+ **{
520
+ self.dim_names["latitude"]: slice(lat_min - margin, lat_max + margin),
521
+ }
522
+ )
523
+ lon = subdomain[self.dim_names["longitude"]]
524
+
525
+ if self.is_global:
526
+ # Concatenate only if necessary
527
+ if lon_max + margin > lon.max():
528
+ # See if shifting by +360 degrees helps
529
+ if (lon_min - margin > (lon + 360).min()) and (
530
+ lon_max + margin < (lon + 360).max()
531
+ ):
532
+ subdomain[self.dim_names["longitude"]] = lon + 360
533
+ lon = subdomain[self.dim_names["longitude"]]
534
+ else:
535
+ subdomain = self.concatenate_longitudes(
536
+ subdomain, end="upper", verbose=verbose
537
+ )
538
+ lon = subdomain[self.dim_names["longitude"]]
539
+ if lon_min - margin < lon.min():
540
+ # See if shifting by -360 degrees helps
541
+ if (lon_min - margin > (lon - 360).min()) and (
542
+ lon_max + margin < (lon - 360).max()
543
+ ):
544
+ subdomain[self.dim_names["longitude"]] = lon - 360
545
+ lon = subdomain[self.dim_names["longitude"]]
546
+ else:
547
+ subdomain = self.concatenate_longitudes(
548
+ subdomain, end="lower", verbose=verbose
549
+ )
550
+ lon = subdomain[self.dim_names["longitude"]]
551
+
552
+ else:
644
553
  # Adjust longitude range if needed to match the expected range
645
- lon = self.ds[self.dim_names["longitude"]]
646
554
  if not target_coords["straddle"]:
647
555
  if lon.min() < -180:
648
556
  if lon_max + margin > 0:
@@ -662,12 +570,9 @@ class Dataset:
662
570
  if lon_min - margin < 0:
663
571
  lon_min += 360
664
572
  lon_max += 360
665
-
666
- # Select the subdomain
667
-
668
- subdomain = self.ds.sel(
573
+ # Select the subdomain in longitude direction
574
+ subdomain = subdomain.sel(
669
575
  **{
670
- self.dim_names["latitude"]: slice(lat_min - margin, lat_max + margin),
671
576
  self.dim_names["longitude"]: slice(lon_min - margin, lon_max + margin),
672
577
  }
673
578
  )
@@ -1522,3 +1427,853 @@ class ERA5Correction(Dataset):
1522
1427
  "The correction dataset does not contain all specified longitude values."
1523
1428
  )
1524
1429
  object.__setattr__(self, "ds", subdomain)
1430
+
1431
+
1432
+ @dataclass(frozen=True, kw_only=True)
1433
+ class ETOPO5Dataset(Dataset):
1434
+ """Represents topography data on the original grid from the ETOPO5 dataset.
1435
+
1436
+ Parameters
1437
+ ----------
1438
+ filename : str, optional
1439
+ The path to the ETOPO5 dataset file. If not provided, the dataset will be downloaded
1440
+ automatically via the `pooch` library.
1441
+ var_names : Dict[str, str], optional
1442
+ Dictionary of variable names required in the dataset. Defaults to:
1443
+ {
1444
+ "topo": "topo",
1445
+ }
1446
+ dim_names : Dict[str, str], optional
1447
+ Dictionary specifying the names of dimensions in the dataset. Defaults to:
1448
+ {"longitude": "lon", "latitude": "lat"}.
1449
+
1450
+ Attributes
1451
+ ----------
1452
+ ds : xr.Dataset
1453
+ The xarray Dataset containing the ETOPO5 data, loaded from the specified file.
1454
+ """
1455
+
1456
+ filename: str = field(default_factory=lambda: download_topo("etopo5.nc"))
1457
+ var_names: Dict[str, str] = field(
1458
+ default_factory=lambda: {
1459
+ "topo": "topo",
1460
+ }
1461
+ )
1462
+ dim_names: Dict[str, str] = field(
1463
+ default_factory=lambda: {"longitude": "lon", "latitude": "lat"}
1464
+ )
1465
+ ds: xr.Dataset = field(init=False, repr=False)
1466
+
1467
+ def clean_up(self, ds: xr.Dataset) -> xr.Dataset:
1468
+ """Assign lat and lon as coordinates.
1469
+
1470
+ Parameters
1471
+ ----------
1472
+ ds : xr.Dataset
1473
+ The input dataset.
1474
+
1475
+ Returns
1476
+ -------
1477
+ ds : xr.Dataset
1478
+ A cleaned `xarray.Dataset` with updated coordinates.
1479
+ """
1480
+ ds = ds.assign_coords(
1481
+ {
1482
+ "lon": ds["topo_lon"],
1483
+ "lat": ds["topo_lat"],
1484
+ }
1485
+ )
1486
+ return ds
1487
+
1488
+
1489
+ @dataclass(frozen=True, kw_only=True)
1490
+ class SRTM15Dataset(Dataset):
1491
+ """Represents topography data on the original grid from the SRTM15 dataset.
1492
+
1493
+ Parameters
1494
+ ----------
1495
+ filename : str
1496
+ The path to the SRTM15 dataset file.
1497
+ var_names : Dict[str, str], optional
1498
+ Dictionary of variable names required in the dataset. Defaults to:
1499
+ {
1500
+ "topo": "z",
1501
+ }
1502
+ dim_names : Dict[str, str], optional
1503
+ Dictionary specifying the names of dimensions in the dataset. Defaults to:
1504
+ {"longitude": "lon", "latitude": "lat"}.
1505
+
1506
+ Attributes
1507
+ ----------
1508
+ ds : xr.Dataset
1509
+ The xarray Dataset containing the SRTM15 data, loaded from the specified file.
1510
+ """
1511
+
1512
+ filename: str
1513
+ var_names: Dict[str, str] = field(
1514
+ default_factory=lambda: {
1515
+ "topo": "z",
1516
+ }
1517
+ )
1518
+ dim_names: Dict[str, str] = field(
1519
+ default_factory=lambda: {"longitude": "lon", "latitude": "lat"}
1520
+ )
1521
+ ds: xr.Dataset = field(init=False, repr=False)
1522
+
1523
+
1524
+ # river datasets
1525
+ @dataclass(frozen=True, kw_only=True)
1526
+ class RiverDataset:
1527
+ """Represents river data.
1528
+
1529
+ Parameters
1530
+ ----------
1531
+ filename : Union[str, Path, List[Union[str, Path]]]
1532
+ The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
1533
+ or a list of strings or Path objects containing multiple files.
1534
+ start_time : datetime
1535
+ The start time for selecting relevant data.
1536
+ end_time : datetime
1537
+ The end time for selecting relevant data.
1538
+ dim_names: Dict[str, str]
1539
+ Dictionary specifying the names of dimensions in the dataset.
1540
+ Requires "station" and "time" as keys.
1541
+ var_names: Dict[str, str]
1542
+ Dictionary of variable names that are required in the dataset.
1543
+ Requires the keys "latitude", "longitude", "flux", "ratio", and "name".
1544
+ opt_var_names: Dict[str, str], optional
1545
+ Dictionary of variable names that are optional in the dataset.
1546
+ Defaults to an empty dictionary.
1547
+ climatology : bool
1548
+ Indicates whether the dataset is climatological. Defaults to False.
1549
+
1550
+ Attributes
1551
+ ----------
1552
+ ds : xr.Dataset
1553
+ The xarray Dataset containing the forcing data on its original grid.
1554
+ """
1555
+
1556
+ filename: Union[str, Path, List[Union[str, Path]]]
1557
+ start_time: datetime
1558
+ end_time: datetime
1559
+ dim_names: Dict[str, str]
1560
+ var_names: Dict[str, str]
1561
+ opt_var_names: Optional[Dict[str, str]] = field(default_factory=dict)
1562
+ climatology: Optional[bool] = False
1563
+ ds: xr.Dataset = field(init=False, repr=False)
1564
+
1565
+ def __post_init__(self):
1566
+
1567
+ # Validate start_time and end_time
1568
+ if not isinstance(self.start_time, datetime):
1569
+ raise TypeError(
1570
+ f"start_time must be a datetime object, but got {type(self.start_time).__name__}."
1571
+ )
1572
+ if not isinstance(self.end_time, datetime):
1573
+ raise TypeError(
1574
+ f"end_time must be a datetime object, but got {type(self.end_time).__name__}."
1575
+ )
1576
+
1577
+ ds = self.load_data()
1578
+ ds = self.clean_up(ds)
1579
+ self.check_dataset(ds)
1580
+
1581
+ # Select relevant times
1582
+ ds = self.add_time_info(ds)
1583
+ object.__setattr__(self, "ds", ds)
1584
+
1585
+ def load_data(self) -> xr.Dataset:
1586
+ """Load dataset from the specified file.
1587
+
1588
+ Returns
1589
+ -------
1590
+ ds : xr.Dataset
1591
+ The loaded xarray Dataset containing the forcing data.
1592
+ """
1593
+ ds = _load_data(
1594
+ self.filename, self.dim_names, use_dask=False, decode_times=False
1595
+ )
1596
+
1597
+ return ds
1598
+
1599
+ def clean_up(self, ds: xr.Dataset) -> xr.Dataset:
1600
+ """Decodes the 'name' variable (if byte-encoded) and updates the dataset.
1601
+
1602
+ This method checks if the 'name' variable is of dtype 'object' (i.e., byte-encoded),
1603
+ and if so, decodes each byte array to a string and updates the dataset.
1604
+ It also ensures that the 'station' dimension is of integer type.
1605
+
1606
+
1607
+ Parameters
1608
+ ----------
1609
+ ds : xr.Dataset
1610
+ The dataset containing the 'name' variable to decode.
1611
+
1612
+ Returns
1613
+ -------
1614
+ ds : xr.Dataset
1615
+ The dataset with the decoded 'name' variable.
1616
+ """
1617
+
1618
+ if ds[self.var_names["name"]].dtype == "object":
1619
+ names = []
1620
+ for i in range(len(ds[self.dim_names["station"]])):
1621
+ byte_array = ds[self.var_names["name"]].isel(
1622
+ **{self.dim_names["station"]: i}
1623
+ )
1624
+ name = decode_string(byte_array)
1625
+ names.append(name)
1626
+ ds[self.var_names["name"]] = xr.DataArray(
1627
+ data=names, dims=self.dim_names["station"]
1628
+ )
1629
+
1630
+ if ds[self.dim_names["station"]].dtype == "float64":
1631
+ ds[self.dim_names["station"]] = ds[self.dim_names["station"]].astype(int)
1632
+
1633
+ # Drop all variables that have chars dim
1634
+ vars_to_drop = ["ocn_name", "stn_name", "ct_name", "cn_name", "chars"]
1635
+ existing_vars = [var for var in vars_to_drop if var in ds]
1636
+ ds = ds.drop_vars(existing_vars)
1637
+
1638
+ return ds
1639
+
1640
+ def check_dataset(self, ds: xr.Dataset) -> None:
1641
+ """Check if the dataset contains the specified variables and dimensions.
1642
+
1643
+ Parameters
1644
+ ----------
1645
+ ds : xr.Dataset
1646
+ The xarray Dataset to check.
1647
+
1648
+ Raises
1649
+ ------
1650
+ ValueError
1651
+ If the dataset does not contain the specified variables or dimensions.
1652
+ """
1653
+
1654
+ _check_dataset(ds, self.dim_names, self.var_names, self.opt_var_names)
1655
+
1656
+ def add_time_info(self, ds: xr.Dataset) -> xr.Dataset:
1657
+ """Dummy method to be overridden by child classes to add time information to the
1658
+ dataset.
1659
+
1660
+ This method is intended as a placeholder and should be implemented in subclasses
1661
+ to provide specific functionality for adding time-related information to the dataset.
1662
+
1663
+ Parameters
1664
+ ----------
1665
+ ds : xr.Dataset
1666
+ The xarray Dataset to which time information will be added.
1667
+
1668
+ Returns
1669
+ -------
1670
+ xr.Dataset
1671
+ The xarray Dataset with time information added (as implemented by child classes).
1672
+ """
1673
+ return ds
1674
+
1675
+ def select_relevant_times(self, ds) -> xr.Dataset:
1676
+ """Select a subset of the dataset based on the specified time range.
1677
+
1678
+ This method filters the dataset to include all records between `start_time` and `end_time`.
1679
+ Additionally, it ensures that one record at or before `start_time` and one record at or
1680
+ after `end_time` are included, even if they fall outside the strict time range.
1681
+
1682
+ If no `end_time` is specified, the method will select the time range of
1683
+ [start_time, start_time + 24 hours] and return the closest time entry to `start_time` within that range.
1684
+
1685
+ Parameters
1686
+ ----------
1687
+ ds : xr.Dataset
1688
+ The input dataset to be filtered. Must contain a time dimension.
1689
+
1690
+ Returns
1691
+ -------
1692
+ xr.Dataset
1693
+ A dataset filtered to the specified time range, including the closest entries
1694
+ at or before `start_time` and at or after `end_time` if applicable.
1695
+
1696
+ Warns
1697
+ -----
1698
+ UserWarning
1699
+ If no records at or before `start_time` or no records at or after `end_time` are found.
1700
+
1701
+ UserWarning
1702
+ If the dataset does not contain any time dimension or the time dimension is incorrectly named.
1703
+ """
1704
+
1705
+ time_dim = self.dim_names["time"]
1706
+
1707
+ ds = _select_relevant_times(ds, time_dim, self.start_time, self.end_time, False)
1708
+
1709
+ return ds
1710
+
1711
+ def compute_climatology(self):
1712
+ logging.info("Compute climatology for river forcing.")
1713
+
1714
+ time_dim = self.dim_names["time"]
1715
+
1716
+ flux = self.ds[self.var_names["flux"]].groupby(f"{time_dim}.month").mean()
1717
+ self.ds[self.var_names["flux"]] = flux
1718
+
1719
+ ds = assign_dates_to_climatology(self.ds, "month")
1720
+ ds = ds.swap_dims({"month": "time"})
1721
+ object.__setattr__(self, "ds", ds)
1722
+
1723
+ updated_dim_names = {**self.dim_names}
1724
+ updated_dim_names["time"] = "time"
1725
+ object.__setattr__(self, "dim_names", updated_dim_names)
1726
+
1727
+ object.__setattr__(self, "climatology", True)
1728
+
1729
+ def sort_by_river_volume(self, ds: xr.Dataset) -> xr.Dataset:
1730
+ """Sorts the dataset by river volume in descending order (largest rivers first),
1731
+ if the volume variable is available.
1732
+
1733
+ This method uses the river volume to reorder the dataset such that the rivers with
1734
+ the largest volumes come first in the `station` dimension. If the volume variable
1735
+ is not present in the dataset, a warning is logged.
1736
+
1737
+ Parameters
1738
+ ----------
1739
+ ds : xr.Dataset
1740
+ The xarray Dataset containing the river data to be sorted by volume.
1741
+
1742
+ Returns
1743
+ -------
1744
+ xr.Dataset
1745
+ The dataset with rivers sorted by their volume in descending order.
1746
+ If the volume variable is not available, the original dataset is returned.
1747
+ """
1748
+
1749
+ if "vol" in self.opt_var_names:
1750
+ volume_values = ds[self.opt_var_names["vol"]].values
1751
+ if isinstance(volume_values, np.ndarray):
1752
+ # Check if all volume values are the same
1753
+ if np.all(volume_values == volume_values[0]):
1754
+ # If all volumes are the same, no need to reverse order
1755
+ sorted_indices = np.argsort(
1756
+ volume_values
1757
+ ) # Sort in ascending order
1758
+ else:
1759
+ # If volumes differ, reverse order for descending sort
1760
+ sorted_indices = np.argsort(volume_values)[
1761
+ ::-1
1762
+ ] # Reverse for descending order
1763
+
1764
+ ds = ds.isel(**{self.dim_names["station"]: sorted_indices})
1765
+
1766
+ else:
1767
+ logging.warning("The volume data is not in a valid array format.")
1768
+ else:
1769
+ logging.warning(
1770
+ "Cannot sort rivers by volume. 'vol' is missing in the variable names."
1771
+ )
1772
+
1773
+ return ds
1774
+
1775
+ def extract_relevant_rivers(self, target_coords, dx):
1776
+ """Extracts a subset of the dataset based on the proximity of river mouths to
1777
+ target coordinates.
1778
+
1779
+ This method calculates the distance between each river mouth and the provided target coordinates
1780
+ (latitude and longitude) using the `gc_dist` function. It then filters the dataset to include only those
1781
+ river stations whose minimum distance from the target is less than a specified threshold distance (`dx`).
1782
+
1783
+ Parameters
1784
+ ----------
1785
+ target_coords : dict
1786
+ A dictionary containing the target coordinates for the comparison. It should include:
1787
+ - "lon" (float): The target longitude in degrees.
1788
+ - "lat" (float): The target latitude in degrees.
1789
+ - "straddle" (bool): A flag indicating whether to adjust the longitudes for stations that cross the
1790
+ International Date Line. If `True`, longitudes greater than 180 degrees are adjusted by subtracting 360,
1791
+ otherwise, negative longitudes are adjusted by adding 360.
1792
+
1793
+ dx : float
1794
+ The maximum distance threshold (in meters) for including a river station. Only river mouths that are
1795
+ within `dx` meters from the target coordinates will be included in the returned dataset.
1796
+
1797
+ Returns
1798
+ -------
1799
+ indices : dict
1800
+ A dictionary containing the indices of the rivers that are within the threshold distance from
1801
+ the target coordinates. The dictionary keys are:
1802
+ - "station" : numpy.ndarray
1803
+ The indices of the rivers that satisfy the distance threshold.
1804
+ - "eta_rho" : numpy.ndarray
1805
+ The indices of the `eta_rho` dimension corresponding to the selected stations.
1806
+ - "xi_rho" : numpy.ndarray
1807
+ The indices of the `xi_rho` dimension corresponding to the selected stations.
1808
+ """
1809
+
1810
+ # Retrieve longitude and latitude of river mouths
1811
+ river_lon = self.ds[self.var_names["longitude"]]
1812
+ river_lat = self.ds[self.var_names["latitude"]]
1813
+
1814
+ # Adjust longitude based on whether it crosses the International Date Line (straddle case)
1815
+ if target_coords["straddle"]:
1816
+ river_lon = xr.where(river_lon > 180, river_lon - 360, river_lon)
1817
+ else:
1818
+ river_lon = xr.where(river_lon < 0, river_lon + 360, river_lon)
1819
+
1820
+ # Calculate the distance between the target coordinates and each river mouth
1821
+ dist = gc_dist(target_coords["lon"], target_coords["lat"], river_lon, river_lat)
1822
+ dist_min = dist.min(dim=["eta_rho", "xi_rho"])
1823
+ # Filter the dataset to include only stations within the distance threshold
1824
+ if (dist_min < dx).any():
1825
+ ds = self.ds.where(dist_min < dx, drop=True)
1826
+ ds = self.sort_by_river_volume(ds)
1827
+ dist = dist.where(dist_min < dx, drop=True).transpose(
1828
+ self.dim_names["station"], "eta_rho", "xi_rho"
1829
+ )
1830
+ dist_min = dist_min.where(dist_min < dx, drop=True)
1831
+
1832
+ # Find the indices of the closest grid cell to the river mouth
1833
+ indices = np.where(dist == dist_min)
1834
+ names = (
1835
+ self.ds[self.var_names["name"]]
1836
+ .isel({self.dim_names["station"]: indices[0]})
1837
+ .values
1838
+ )
1839
+ # Return the indices in a dictionary format
1840
+ indices = {
1841
+ "station": indices[0],
1842
+ "eta_rho": indices[1],
1843
+ "xi_rho": indices[2],
1844
+ "name": names,
1845
+ }
1846
+ else:
1847
+ ds = xr.Dataset()
1848
+ indices = {
1849
+ "station": [],
1850
+ "eta_rho": [],
1851
+ "xi_rho": [],
1852
+ "name": [],
1853
+ }
1854
+
1855
+ object.__setattr__(self, "ds", ds)
1856
+
1857
+ return indices
1858
+
1859
+
1860
+ @dataclass(frozen=True, kw_only=True)
1861
+ class DaiRiverDataset(RiverDataset):
1862
+ """Represents river data from the Dai river dataset.
1863
+
1864
+ Parameters
1865
+ ----------
1866
+ filename : Union[str, Path, List[Union[str, Path]]], optional
1867
+ The path to the Dai River dataset file. If not provided, the dataset will be downloaded
1868
+ automatically via the `pooch` library.
1869
+ start_time : datetime
1870
+ The start time for selecting relevant data.
1871
+ end_time : datetime
1872
+ The end time for selecting relevant data.
1873
+ dim_names: Dict[str, str], optional
1874
+ Dictionary specifying the names of dimensions in the dataset.
1875
+ var_names: Dict[str, str], optional
1876
+ Dictionary of variable names that are required in the dataset.
1877
+ opt_var_names: Dict[str, str], optional
1878
+ Dictionary of variable names that are optional in the dataset.
1879
+ climatology : bool
1880
+ Indicates whether the dataset is climatological. Defaults to False.
1881
+
1882
+ Attributes
1883
+ ----------
1884
+ ds : xr.Dataset
1885
+ The xarray Dataset containing the forcing data on its original grid.
1886
+ """
1887
+
1888
+ filename: Union[str, Path, List[Union[str, Path]]] = field(
1889
+ default_factory=lambda: download_river_data("dai_trenberth_may2019.nc")
1890
+ )
1891
+ start_time: datetime
1892
+ end_time: datetime
1893
+ dim_names: Dict[str, str] = field(
1894
+ default_factory=lambda: {
1895
+ "station": "station",
1896
+ "time": "time",
1897
+ }
1898
+ )
1899
+ var_names: Dict[str, str] = field(
1900
+ default_factory=lambda: {
1901
+ "latitude": "lat_mou",
1902
+ "longitude": "lon_mou",
1903
+ "flux": "FLOW",
1904
+ "ratio": "ratio_m2s",
1905
+ "name": "riv_name",
1906
+ }
1907
+ )
1908
+ opt_var_names: Dict[str, str] = field(
1909
+ default_factory=lambda: {
1910
+ "vol": "vol_stn",
1911
+ }
1912
+ )
1913
+ climatology: Optional[bool] = False
1914
+ ds: xr.Dataset = field(init=False, repr=False)
1915
+
1916
+ def add_time_info(self, ds: xr.Dataset) -> xr.Dataset:
1917
+ """Adds time information to the dataset based on the climatology flag and
1918
+ dimension names.
1919
+
1920
+ This method processes the dataset to include time information according to the climatology
1921
+ setting. If the dataset represents climatology data and the time dimension is labeled as
1922
+ "month", it assigns dates to the dataset based on a monthly climatology. Additionally, it
1923
+ handles dimension name updates if necessary.
1924
+
1925
+ Parameters
1926
+ ----------
1927
+ ds : xr.Dataset
1928
+ The input dataset to which time information will be added.
1929
+
1930
+ Returns
1931
+ -------
1932
+ xr.Dataset
1933
+ The dataset with time information added, including adjustments for climatology and
1934
+ dimension names.
1935
+ """
1936
+ time_dim = self.dim_names["time"]
1937
+
1938
+ # Extract the 'time' variable as a numpy array
1939
+ time_vals = ds[time_dim].values
1940
+
1941
+ # Handle rounding of the time values
1942
+ year = np.round(time_vals * 1e-2).astype(int)
1943
+ month = np.round((time_vals * 1e-2 - year) * 1e2).astype(int)
1944
+
1945
+ # Convert to datetime (assuming the day is always 15th for this example)
1946
+ dates = [datetime(year=i, month=m, day=15) for i, m in zip(year, month)]
1947
+
1948
+ ds[time_dim] = dates
1949
+
1950
+ return ds
1951
+
1952
+
1953
+ # shared functions
1954
+
1955
+
1956
+ def _load_data(filename, dim_names, use_dask, decode_times=True):
1957
+ """Load dataset from the specified file.
1958
+
1959
+ Parameters
1960
+ ----------
1961
+ filename : Union[str, Path, List[Union[str, Path]]]
1962
+ The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
1963
+ or a list of strings or Path objects containing multiple files.
1964
+ dim_names: Dict[str, str], optional
1965
+ Dictionary specifying the names of dimensions in the dataset.
1966
+ use_dask: bool
1967
+ Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
1968
+ decode_times: bool, optional
1969
+ If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers.
1970
+ Defaults to True.
1971
+
1972
+ Returns
1973
+ -------
1974
+ ds : xr.Dataset
1975
+ The loaded xarray Dataset containing the forcing data.
1976
+
1977
+ Raises
1978
+ ------
1979
+ FileNotFoundError
1980
+ If the specified file does not exist.
1981
+ ValueError
1982
+ If a list of files is provided but dim_names["time"] is not available or use_dask=False.
1983
+ """
1984
+
1985
+ # Precompile the regex for matching wildcard characters
1986
+ wildcard_regex = re.compile(r"[\*\?\[\]]")
1987
+
1988
+ # Convert Path objects to strings
1989
+ if isinstance(filename, (str, Path)):
1990
+ filename_str = str(filename)
1991
+ elif isinstance(filename, list):
1992
+ filename_str = [str(f) for f in filename]
1993
+ else:
1994
+ raise ValueError("filename must be a string, Path, or a list of strings/Paths.")
1995
+ # Handle the case when filename is a string
1996
+ contains_wildcard = False
1997
+ if isinstance(filename_str, str):
1998
+ contains_wildcard = bool(wildcard_regex.search(filename_str))
1999
+ if contains_wildcard:
2000
+ matching_files = glob.glob(filename_str)
2001
+ if not matching_files:
2002
+ raise FileNotFoundError(
2003
+ f"No files found matching the pattern '{filename_str}'."
2004
+ )
2005
+ else:
2006
+ matching_files = [filename_str]
2007
+
2008
+ # Handle the case when filename is a list
2009
+ elif isinstance(filename_str, list):
2010
+ contains_wildcard = any(wildcard_regex.search(f) for f in filename_str)
2011
+ if contains_wildcard:
2012
+ matching_files = []
2013
+ for f in filename_str:
2014
+ files = glob.glob(f)
2015
+ if not files:
2016
+ raise FileNotFoundError(
2017
+ f"No files found matching the pattern '{f}'."
2018
+ )
2019
+ matching_files.extend(files)
2020
+ else:
2021
+ matching_files = filename_str
2022
+
2023
+ # Check if time dimension is available when multiple files are provided
2024
+ if isinstance(filename_str, list) and "time" not in dim_names:
2025
+ raise ValueError(
2026
+ "A list of files is provided, but time dimension is not available. "
2027
+ "A time dimension must be available to concatenate the files."
2028
+ )
2029
+
2030
+ # Determine the kwargs for combining datasets
2031
+ if contains_wildcard or len(matching_files) == 1:
2032
+ # If there is a wildcard or just one file, use by_coords
2033
+ kwargs = {"combine": "by_coords"}
2034
+ else:
2035
+ # Otherwise, use nested combine based on time
2036
+ kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
2037
+
2038
+ # Base kwargs used for dataset combination
2039
+ combine_kwargs = {
2040
+ "coords": "minimal",
2041
+ "compat": "override",
2042
+ "combine_attrs": "override",
2043
+ }
2044
+
2045
+ if use_dask:
2046
+
2047
+ chunks = {
2048
+ dim_names["latitude"]: -1,
2049
+ dim_names["longitude"]: -1,
2050
+ }
2051
+ if "depth" in dim_names:
2052
+ chunks[dim_names["depth"]] = -1
2053
+ if "time" in dim_names:
2054
+ chunks[dim_names["time"]] = 1
2055
+
2056
+ ds = xr.open_mfdataset(
2057
+ matching_files,
2058
+ decode_times=decode_times,
2059
+ chunks=chunks,
2060
+ **combine_kwargs,
2061
+ **kwargs,
2062
+ )
2063
+ else:
2064
+ ds_list = []
2065
+ for file in matching_files:
2066
+ ds = xr.open_dataset(file, decode_times=decode_times, chunks=None)
2067
+ ds_list.append(ds)
2068
+
2069
+ if kwargs["combine"] == "by_coords":
2070
+ ds = xr.combine_by_coords(ds_list, **combine_kwargs)
2071
+ elif kwargs["combine"] == "nested":
2072
+ ds = xr.combine_nested(
2073
+ ds_list, concat_dim=kwargs["concat_dim"], **combine_kwargs
2074
+ )
2075
+
2076
+ if "time" in dim_names and dim_names["time"] not in ds.dims:
2077
+ ds = ds.expand_dims(dim_names["time"])
2078
+
2079
+ return ds
2080
+
2081
+
2082
+ def _check_dataset(
2083
+ ds: xr.Dataset,
2084
+ dim_names: Dict[str, str],
2085
+ var_names: Dict[str, str],
2086
+ opt_var_names: Optional[Dict[str, str]] = None,
2087
+ ) -> None:
2088
+ """Check if the dataset contains the specified variables and dimensions.
2089
+
2090
+ Parameters
2091
+ ----------
2092
+ ds : xr.Dataset
2093
+ The xarray Dataset to check.
2094
+ dim_names: Dict[str, str], optional
2095
+ Dictionary specifying the names of dimensions in the dataset.
2096
+ var_names: Dict[str, str]
2097
+ Dictionary of variable names that are required in the dataset.
2098
+ opt_var_names : Optional[Dict[str, str]], optional
2099
+ Dictionary of optional variable names.
2100
+ These variables are not strictly required, and the function will not raise an error if they are missing.
2101
+ Default is None, meaning no optional variables are considered.
2102
+
2103
+
2104
+ Raises
2105
+ ------
2106
+ ValueError
2107
+ If the dataset does not contain the specified variables or dimensions.
2108
+ """
2109
+ missing_dims = [dim for dim in dim_names.values() if dim not in ds.dims]
2110
+ if missing_dims:
2111
+ raise ValueError(
2112
+ f"Dataset does not contain all required dimensions. The following dimensions are missing: {missing_dims}"
2113
+ )
2114
+
2115
+ missing_vars = [var for var in var_names.values() if var not in ds.data_vars]
2116
+ if missing_vars:
2117
+ raise ValueError(
2118
+ f"Dataset does not contain all required variables. The following variables are missing: {missing_vars}"
2119
+ )
2120
+
2121
+ if opt_var_names:
2122
+ missing_optional_vars = [
2123
+ var for var in opt_var_names.values() if var not in ds.data_vars
2124
+ ]
2125
+ if missing_optional_vars:
2126
+ logging.warning(
2127
+ f"Optional variables missing (but not critical): {missing_optional_vars}"
2128
+ )
2129
+
2130
+
2131
+ def _select_relevant_times(
2132
+ ds, time_dim, start_time=None, end_time=None, climatology=False
2133
+ ) -> xr.Dataset:
2134
+ """Select a subset of the dataset based on the specified time range.
2135
+
2136
+ This method filters the dataset to include all records between `start_time` and `end_time`.
2137
+ Additionally, it ensures that one record at or before `start_time` and one record at or
2138
+ after `end_time` are included, even if they fall outside the strict time range.
2139
+
2140
+ If no `end_time` is specified, the method will select the time range of
2141
+ [start_time, start_time + 24 hours] and return the closest time entry to `start_time` within that range.
2142
+
2143
+ Parameters
2144
+ ----------
2145
+ ds : xr.Dataset
2146
+ The input dataset to be filtered. Must contain a time dimension.
2147
+ time_dim: str
2148
+ Name of time dimension.
2149
+ start_time : Optional[datetime], optional
2150
+ The start time for selecting relevant data. If not provided, the data is not filtered by start time.
2151
+ end_time : Optional[datetime], optional
2152
+ The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided,
2153
+ or no filtering is applied if start_time is not provided.
2154
+ climatology : bool
2155
+ Indicates whether the dataset is climatological. Defaults to False.
2156
+
2157
+ Returns
2158
+ -------
2159
+ xr.Dataset
2160
+ A dataset filtered to the specified time range, including the closest entries
2161
+ at or before `start_time` and at or after `end_time` if applicable.
2162
+
2163
+ Raises
2164
+ ------
2165
+ ValueError
2166
+ If no matching times are found between `start_time` and `start_time + 24 hours`.
2167
+
2168
+ Warns
2169
+ -----
2170
+ UserWarning
2171
+ If the dataset contains exactly 12 time steps but the climatology flag is not set.
2172
+ This may indicate that the dataset represents climatology data.
2173
+
2174
+ UserWarning
2175
+ If no records at or before `start_time` or no records at or after `end_time` are found.
2176
+
2177
+ UserWarning
2178
+ If the dataset does not contain any time dimension or the time dimension is incorrectly named.
2179
+
2180
+ Notes
2181
+ -----
2182
+ - If the `climatology` flag is set and `end_time` is not provided, the method will
2183
+ interpolate initial conditions from climatology data.
2184
+ - If the dataset uses `cftime` datetime objects, these will be converted to standard
2185
+ `np.datetime64` objects before filtering.
2186
+ """
2187
+
2188
+ if time_dim in ds.variables:
2189
+ if climatology:
2190
+ if len(ds[time_dim]) != 12:
2191
+ raise ValueError(
2192
+ f"The dataset contains {len(ds[time_dim])} time steps, but the climatology flag is set to True, which requires exactly 12 time steps."
2193
+ )
2194
+ if not end_time:
2195
+ # Interpolate from climatology for initial conditions
2196
+ ds = interpolate_from_climatology(ds, time_dim, start_time)
2197
+ else:
2198
+ time_type = get_time_type(ds[time_dim])
2199
+ if time_type == "int":
2200
+ raise ValueError(
2201
+ "The dataset contains integer time values, which are only supported when the climatology flag is set to True. However, your climatology flag is set to False."
2202
+ )
2203
+ if time_type == "cftime":
2204
+ ds = ds.assign_coords(
2205
+ {time_dim: convert_cftime_to_datetime(ds[time_dim])}
2206
+ )
2207
+ if end_time:
2208
+ end_time = end_time
2209
+
2210
+ # Identify records before or at start_time
2211
+ before_start = ds[time_dim] <= np.datetime64(start_time)
2212
+ if before_start.any():
2213
+ closest_before_start = (
2214
+ ds[time_dim].where(before_start, drop=True).max()
2215
+ )
2216
+ else:
2217
+ logging.warning("No records found at or before the start_time.")
2218
+ closest_before_start = ds[time_dim].min()
2219
+
2220
+ # Identify records after or at end_time
2221
+ after_end = ds[time_dim] >= np.datetime64(end_time)
2222
+ if after_end.any():
2223
+ closest_after_end = ds[time_dim].where(after_end, drop=True).min()
2224
+ else:
2225
+ logging.warning("No records found at or after the end_time.")
2226
+ closest_after_end = ds[time_dim].max()
2227
+
2228
+ # Select records within the time range and add the closest before/after
2229
+ within_range = (ds[time_dim] > np.datetime64(start_time)) & (
2230
+ ds[time_dim] < np.datetime64(end_time)
2231
+ )
2232
+ selected_times = ds[time_dim].where(
2233
+ within_range
2234
+ | (ds[time_dim] == closest_before_start)
2235
+ | (ds[time_dim] == closest_after_end),
2236
+ drop=True,
2237
+ )
2238
+ ds = ds.sel({time_dim: selected_times})
2239
+ else:
2240
+ # Look in time range [start_time, start_time + 24h]
2241
+ end_time = start_time + timedelta(days=1)
2242
+ times = (np.datetime64(start_time) <= ds[time_dim]) & (
2243
+ ds[time_dim] < np.datetime64(end_time)
2244
+ )
2245
+ if np.all(~times):
2246
+ raise ValueError(
2247
+ f"The dataset does not contain any time entries between the specified start_time: {start_time} "
2248
+ f"and {start_time + timedelta(hours=24)}. "
2249
+ "Please ensure the dataset includes time entries for that range."
2250
+ )
2251
+
2252
+ ds = ds.where(times, drop=True)
2253
+ if ds.sizes[time_dim] > 1:
2254
+ # Pick the time closest to start_time
2255
+ ds = ds.isel({time_dim: 0})
2256
+ logging.info(
2257
+ f"Selected time entry closest to the specified start_time ({start_time}) within the range [{start_time}, {start_time + timedelta(hours=24)}]: {ds[time_dim].values}"
2258
+ )
2259
+ else:
2260
+ logging.warning(
2261
+ "Dataset does not contain any time information. Please check if the time dimension "
2262
+ "is correctly named or if the dataset includes time data."
2263
+ )
2264
+
2265
+ return ds
2266
+
2267
+
2268
+ def decode_string(byte_array):
2269
+
2270
+ # Decode each byte and handle errors with 'ignore'
2271
+ decoded_string = "".join(
2272
+ [
2273
+ x.decode("utf-8", errors="ignore") # Ignore invalid byte sequences
2274
+ for x in byte_array.values
2275
+ if isinstance(x, bytes) and x != b" " and x is not np.nan
2276
+ ]
2277
+ )
2278
+
2279
+ return decoded_string