roms-tools 3.1.2__py3-none-any.whl → 3.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. roms_tools/__init__.py +3 -0
  2. roms_tools/analysis/cdr_analysis.py +203 -0
  3. roms_tools/analysis/cdr_ensemble.py +198 -0
  4. roms_tools/analysis/roms_output.py +80 -46
  5. roms_tools/data/grids/GLORYS_global_grid.nc +0 -0
  6. roms_tools/download.py +4 -0
  7. roms_tools/plot.py +113 -51
  8. roms_tools/setup/boundary_forcing.py +45 -20
  9. roms_tools/setup/cdr_forcing.py +122 -8
  10. roms_tools/setup/cdr_release.py +161 -8
  11. roms_tools/setup/grid.py +150 -141
  12. roms_tools/setup/initial_conditions.py +113 -48
  13. roms_tools/setup/{datasets.py → lat_lon_datasets.py} +443 -938
  14. roms_tools/setup/mask.py +63 -7
  15. roms_tools/setup/nesting.py +314 -117
  16. roms_tools/setup/river_datasets.py +527 -0
  17. roms_tools/setup/river_forcing.py +46 -20
  18. roms_tools/setup/surface_forcing.py +7 -9
  19. roms_tools/setup/tides.py +2 -3
  20. roms_tools/setup/topography.py +8 -10
  21. roms_tools/setup/utils.py +396 -23
  22. roms_tools/tests/test_analysis/test_cdr_analysis.py +144 -0
  23. roms_tools/tests/test_analysis/test_cdr_ensemble.py +202 -0
  24. roms_tools/tests/test_analysis/test_roms_output.py +61 -3
  25. roms_tools/tests/test_setup/test_boundary_forcing.py +54 -52
  26. roms_tools/tests/test_setup/test_cdr_forcing.py +54 -0
  27. roms_tools/tests/test_setup/test_cdr_release.py +118 -1
  28. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_east/c/0/0/0 +0 -0
  29. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_north/c/0/0/0 +0 -0
  30. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_ALT_CO2_west/c/0/0/0 +0 -0
  31. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_east/c/0/0/0 +0 -0
  32. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_north/c/0/0/0 +0 -0
  33. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/ALK_west/c/0/0/0 +0 -0
  34. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_east/c/0/0/0 +0 -0
  35. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_north/c/0/0/0 +0 -0
  36. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_ALT_CO2_west/c/0/0/0 +0 -0
  37. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_east/c/0/0/0 +0 -0
  38. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_north/c/0/0/0 +0 -0
  39. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DIC_west/c/0/0/0 +0 -0
  40. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_east/c/0/0/0 +0 -0
  41. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_north/c/0/0/0 +0 -0
  42. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOC_west/c/0/0/0 +0 -0
  43. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_east/c/0/0/0 +0 -0
  44. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_north/c/0/0/0 +0 -0
  45. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOCr_west/c/0/0/0 +0 -0
  46. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_east/c/0/0/0 +0 -0
  47. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_north/c/0/0/0 +0 -0
  48. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DON_west/c/0/0/0 +0 -0
  49. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_east/c/0/0/0 +0 -0
  50. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_north/c/0/0/0 +0 -0
  51. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DONr_west/c/0/0/0 +0 -0
  52. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_east/c/0/0/0 +0 -0
  53. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_north/c/0/0/0 +0 -0
  54. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOP_west/c/0/0/0 +0 -0
  55. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_east/c/0/0/0 +0 -0
  56. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_north/c/0/0/0 +0 -0
  57. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/DOPr_west/c/0/0/0 +0 -0
  58. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_east/c/0/0/0 +0 -0
  59. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_north/c/0/0/0 +0 -0
  60. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Fe_west/c/0/0/0 +0 -0
  61. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_east/c/0/0/0 +0 -0
  62. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_north/c/0/0/0 +0 -0
  63. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/Lig_west/c/0/0/0 +0 -0
  64. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_east/c/0/0/0 +0 -0
  65. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_north/c/0/0/0 +0 -0
  66. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NH4_west/c/0/0/0 +0 -0
  67. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_east/c/0/0/0 +0 -0
  68. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_north/c/0/0/0 +0 -0
  69. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/NO3_west/c/0/0/0 +0 -0
  70. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_east/c/0/0/0 +0 -0
  71. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_north/c/0/0/0 +0 -0
  72. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/O2_west/c/0/0/0 +0 -0
  73. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_east/c/0/0/0 +0 -0
  74. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_north/c/0/0/0 +0 -0
  75. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/PO4_west/c/0/0/0 +0 -0
  76. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_east/c/0/0/0 +0 -0
  77. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_north/c/0/0/0 +0 -0
  78. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/SiO3_west/c/0/0/0 +0 -0
  79. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_east/c/0/0/0 +0 -0
  80. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_north/c/0/0/0 +0 -0
  81. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatC_west/c/0/0/0 +0 -0
  82. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_east/c/0/0/0 +0 -0
  83. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_north/c/0/0/0 +0 -0
  84. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatChl_west/c/0/0/0 +0 -0
  85. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_east/c/0/0/0 +0 -0
  86. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_north/c/0/0/0 +0 -0
  87. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatFe_west/c/0/0/0 +0 -0
  88. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_east/c/0/0/0 +0 -0
  89. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_north/c/0/0/0 +0 -0
  90. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatP_west/c/0/0/0 +0 -0
  91. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_east/c/0/0/0 +0 -0
  92. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_north/c/0/0/0 +0 -0
  93. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diatSi_west/c/0/0/0 +0 -0
  94. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_east/c/0/0/0 +0 -0
  95. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_north/c/0/0/0 +0 -0
  96. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazC_west/c/0/0/0 +0 -0
  97. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_east/c/0/0/0 +0 -0
  98. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_north/c/0/0/0 +0 -0
  99. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazChl_west/c/0/0/0 +0 -0
  100. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_east/c/0/0/0 +0 -0
  101. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_north/c/0/0/0 +0 -0
  102. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazFe_west/c/0/0/0 +0 -0
  103. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_east/c/0/0/0 +0 -0
  104. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_north/c/0/0/0 +0 -0
  105. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/diazP_west/c/0/0/0 +0 -0
  106. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_east/c/0/0/0 +0 -0
  107. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_north/c/0/0/0 +0 -0
  108. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spC_west/c/0/0/0 +0 -0
  109. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_east/c/0/0/0 +0 -0
  110. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_north/c/0/0/0 +0 -0
  111. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spCaCO3_west/c/0/0/0 +0 -0
  112. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_east/c/0/0/0 +0 -0
  113. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_north/c/0/0/0 +0 -0
  114. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spChl_west/c/0/0/0 +0 -0
  115. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_east/c/0/0/0 +0 -0
  116. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_north/c/0/0/0 +0 -0
  117. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spFe_west/c/0/0/0 +0 -0
  118. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_east/c/0/0/0 +0 -0
  119. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_north/c/0/0/0 +0 -0
  120. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/spP_west/c/0/0/0 +0 -0
  121. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zarr.json +406 -406
  122. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_east/c/0/0/0 +0 -0
  123. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_north/c/0/0/0 +0 -0
  124. roms_tools/tests/test_setup/test_data/bgc_boundary_forcing_from_climatology.zarr/zooC_west/c/0/0/0 +0 -0
  125. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_east/c/0/0/0 +0 -0
  126. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_north/c/0/0/0 +0 -0
  127. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_south/c/0/0/0 +0 -0
  128. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_west/c/0/0/0 +0 -0
  129. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_east/c/0/0/0 +0 -0
  130. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_north/c/0/0/0 +0 -0
  131. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_south/c/0/0/0 +0 -0
  132. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_west/c/0/0/0 +0 -0
  133. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_east/c/0/0/0 +0 -0
  134. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_north/c/0/0/0 +0 -0
  135. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_south/c/0/0/0 +0 -0
  136. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_west/c/0/0/0 +0 -0
  137. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_east/c/0/0 +0 -0
  138. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_north/c/0/0 +0 -0
  139. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_south/c/0/0 +0 -0
  140. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_west/c/0/0 +0 -0
  141. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_east/c/0/0/0 +0 -0
  142. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_north/c/0/0/0 +0 -0
  143. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_south/c/0/0/0 +0 -0
  144. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_west/c/0/0/0 +0 -0
  145. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_east/c/0/0 +0 -0
  146. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_north/c/0/0 +0 -0
  147. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_south/c/0/0 +0 -0
  148. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_west/c/0/0 +0 -0
  149. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zarr.json +182 -182
  150. roms_tools/tests/test_setup/test_data/grid.zarr/h/c/0/0 +0 -0
  151. roms_tools/tests/test_setup/test_data/grid.zarr/zarr.json +191 -191
  152. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/h/c/0/0 +0 -0
  153. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/zarr.json +210 -210
  154. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/ALK/c/0/0/0/0 +0 -0
  155. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/ALK_ALT_CO2/c/0/0/0/0 +0 -0
  156. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/DIC/c/0/0/0/0 +0 -0
  157. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/DIC_ALT_CO2/c/0/0/0/0 +0 -0
  158. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/DOC/c/0/0/0/0 +0 -0
  159. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/DOCr/c/0/0/0/0 +0 -0
  160. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/DON/c/0/0/0/0 +0 -0
  161. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/DONr/c/0/0/0/0 +0 -0
  162. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/DOP/c/0/0/0/0 +0 -0
  163. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/DOPr/c/0/0/0/0 +0 -0
  164. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/Fe/c/0/0/0/0 +0 -0
  165. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/Lig/c/0/0/0/0 +0 -0
  166. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/NH4/c/0/0/0/0 +0 -0
  167. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/NO3/c/0/0/0/0 +0 -0
  168. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/O2/c/0/0/0/0 +0 -0
  169. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/PO4/c/0/0/0/0 +0 -0
  170. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/SiO3/c/0/0/0/0 +0 -0
  171. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/diatC/c/0/0/0/0 +0 -0
  172. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/diatChl/c/0/0/0/0 +0 -0
  173. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/diatFe/c/0/0/0/0 +0 -0
  174. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/diatP/c/0/0/0/0 +0 -0
  175. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/diatSi/c/0/0/0/0 +0 -0
  176. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/diazC/c/0/0/0/0 +0 -0
  177. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/diazChl/c/0/0/0/0 +0 -0
  178. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/diazFe/c/0/0/0/0 +0 -0
  179. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/diazP/c/0/0/0/0 +0 -0
  180. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/salt/c/0/0/0/0 +0 -0
  181. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/spC/c/0/0/0/0 +0 -0
  182. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/spCaCO3/c/0/0/0/0 +0 -0
  183. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/spChl/c/0/0/0/0 +0 -0
  184. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/spFe/c/0/0/0/0 +0 -0
  185. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/spP/c/0/0/0/0 +0 -0
  186. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/temp/c/0/0/0/0 +0 -0
  187. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/u/c/0/0/0/0 +0 -0
  188. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/ubar/c/0/0/0 +0 -0
  189. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/v/c/0/0/0/0 +0 -0
  190. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/vbar/c/0/0/0 +0 -0
  191. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/zarr.json +182 -182
  192. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/zooC/c/0/0/0/0 +0 -0
  193. roms_tools/tests/test_setup/test_data/initial_conditions_with_unified_bgc_from_climatology.zarr/salt/c/0/0/0/0 +0 -0
  194. roms_tools/tests/test_setup/test_data/initial_conditions_with_unified_bgc_from_climatology.zarr/temp/c/0/0/0/0 +0 -0
  195. roms_tools/tests/test_setup/test_data/initial_conditions_with_unified_bgc_from_climatology.zarr/u/c/0/0/0/0 +0 -0
  196. roms_tools/tests/test_setup/test_data/initial_conditions_with_unified_bgc_from_climatology.zarr/ubar/c/0/0/0 +0 -0
  197. roms_tools/tests/test_setup/test_data/initial_conditions_with_unified_bgc_from_climatology.zarr/v/c/0/0/0/0 +0 -0
  198. roms_tools/tests/test_setup/test_data/initial_conditions_with_unified_bgc_from_climatology.zarr/vbar/c/0/0/0 +0 -0
  199. roms_tools/tests/test_setup/test_data/initial_conditions_with_unified_bgc_from_climatology.zarr/zarr.json +187 -187
  200. roms_tools/tests/test_setup/test_data/tidal_forcing.zarr/u_Im/c/0/0/0 +0 -0
  201. roms_tools/tests/test_setup/test_data/tidal_forcing.zarr/u_Re/c/0/0/0 +0 -0
  202. roms_tools/tests/test_setup/test_data/tidal_forcing.zarr/v_Im/c/0/0/0 +0 -0
  203. roms_tools/tests/test_setup/test_data/tidal_forcing.zarr/v_Re/c/0/0/0 +0 -0
  204. roms_tools/tests/test_setup/test_data/tidal_forcing.zarr/zarr.json +66 -66
  205. roms_tools/tests/test_setup/test_grid.py +236 -115
  206. roms_tools/tests/test_setup/test_initial_conditions.py +94 -41
  207. roms_tools/tests/test_setup/{test_datasets.py → test_lat_lon_datasets.py} +409 -100
  208. roms_tools/tests/test_setup/test_nesting.py +119 -31
  209. roms_tools/tests/test_setup/test_river_datasets.py +48 -0
  210. roms_tools/tests/test_setup/test_surface_forcing.py +2 -1
  211. roms_tools/tests/test_setup/test_utils.py +92 -2
  212. roms_tools/tests/test_setup/utils.py +71 -0
  213. roms_tools/tests/test_tiling/test_join.py +241 -0
  214. roms_tools/tests/test_utils.py +139 -17
  215. roms_tools/tiling/join.py +189 -0
  216. roms_tools/utils.py +131 -99
  217. {roms_tools-3.1.2.dist-info → roms_tools-3.3.0.dist-info}/METADATA +12 -2
  218. {roms_tools-3.1.2.dist-info → roms_tools-3.3.0.dist-info}/RECORD +221 -211
  219. {roms_tools-3.1.2.dist-info → roms_tools-3.3.0.dist-info}/WHEEL +0 -0
  220. {roms_tools-3.1.2.dist-info → roms_tools-3.3.0.dist-info}/licenses/LICENSE +0 -0
  221. {roms_tools-3.1.2.dist-info → roms_tools-3.3.0.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,16 @@
1
+ from __future__ import annotations
2
+
1
3
  import importlib.util
2
- import logging
3
- import time
4
- from collections import Counter, defaultdict
5
- from collections.abc import Callable
4
+ import typing
5
+ from collections.abc import Callable, Mapping
6
6
  from dataclasses import dataclass, field
7
- from datetime import datetime, timedelta
7
+ from datetime import datetime
8
8
  from pathlib import Path
9
9
  from types import ModuleType
10
- from typing import ClassVar
10
+ from typing import Any, ClassVar, Literal, cast
11
+
12
+ if typing.TYPE_CHECKING:
13
+ from roms_tools.setup.grid import Grid
11
14
 
12
15
  import numpy as np
13
16
  import xarray as xr
@@ -15,27 +18,38 @@ import xarray as xr
15
18
  from roms_tools.constants import R_EARTH
16
19
  from roms_tools.download import (
17
20
  download_correction_data,
18
- download_river_data,
19
21
  download_sal_data,
20
22
  download_topo,
21
23
  )
22
24
  from roms_tools.setup.fill import LateralFill
23
25
  from roms_tools.setup.utils import (
26
+ Timed,
24
27
  assign_dates_to_climatology,
25
- convert_cftime_to_datetime,
26
- gc_dist,
27
- get_time_type,
28
+ check_dataset,
29
+ get_target_coords,
28
30
  interpolate_cyclic_time,
29
- interpolate_from_climatology,
30
31
  one_dim_fill,
32
+ select_relevant_times,
31
33
  )
32
- from roms_tools.utils import _get_pkg_error_msg, _has_gcsfs, _load_data
34
+ from roms_tools.utils import get_dask_chunks, get_pkg_error_msg, has_gcsfs, load_data
33
35
 
34
- # lat-lon datasets
36
+ TConcatEndTypes = Literal["lower", "upper", "both"]
37
+ REPO_ROOT = Path(__file__).resolve().parents[2]
38
+ GLORYS_GLOBAL_GRID_PATH = (
39
+ REPO_ROOT / "roms_tools" / "data" / "grids" / "GLORYS_global_grid.nc"
40
+ )
41
+ DEFAULT_NR_BUFFER_POINTS = (
42
+ 20 # Default number of buffer points for subdomain selection.
43
+ )
44
+ # Balances performance and accuracy:
45
+ # - Too many points → more expensive computations
46
+ # - Too few points → potential boundary artifacts when lateral refill is performed
47
+ # See discussion: https://github.com/CWorthy-ocean/roms-tools/issues/153
48
+ # This default will be applied consistently across all datasets requiring lateral fill.
35
49
 
36
50
 
37
51
  @dataclass(kw_only=True)
38
- class Dataset:
52
+ class LatLonDataset:
39
53
  """Represents forcing data on original grid.
40
54
 
41
55
  Parameters
@@ -47,7 +61,7 @@ class Dataset:
47
61
  Start time for selecting relevant data. If not provided, no time-based filtering is applied.
48
62
  end_time : Optional[datetime], optional
49
63
  End time for selecting relevant data. If not provided, the dataset selects the time entry
50
- closest to `start_time` within the range `[start_time, start_time + 24 hours]`.
64
+ closest to `start_time` within the range `[start_time, start_time + 24 hours)`.
51
65
  If `start_time` is also not provided, no time-based filtering is applied.
52
66
  dim_names: Dict[str, str], optional
53
67
  Dictionary specifying the names of dimensions in the dataset.
@@ -62,8 +76,19 @@ class Dataset:
62
76
  Indicates whether land values require lateral filling. If `True`, ocean values will be extended into land areas
63
77
  to replace NaNs or non-ocean values (such as atmospheric values in ERA5 data). If `False`, it is assumed that
64
78
  land values are already correctly assigned, and lateral filling will be skipped. Defaults to `True`.
65
- use_dask: bool
79
+ use_dask: bool, optional
66
80
  Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
81
+ read_zarr: bool, optional
82
+ If True, use the zarr engine to read the dataset, and don't use mfdataset.
83
+ Defaults to False.
84
+ allow_flex_time: bool, optional
85
+ Controls how strictly the dataset selects a time entry when `end_time` is not provided (relevant for initial conditions):
86
+
87
+ - If False (default): requires an exact match to `start_time`. Raises a ValueError if no match exists.
88
+ - If True: allows a +24h search window after `start_time` and selects the closest available
89
+ time entry within that window. Raises a ValueError if none are found.
90
+
91
+ Only used when `end_time` is None. Has no effect otherwise.
67
92
  apply_post_processing: bool
68
93
  Indicates whether to post-process the dataset for futher use. Defaults to True.
69
94
 
@@ -94,14 +119,15 @@ class Dataset:
94
119
  }
95
120
  )
96
121
  var_names: dict[str, str]
97
- opt_var_names: dict[str, str] | None = field(default_factory=dict)
98
- climatology: bool | None = False
122
+ opt_var_names: dict[str, str] = field(default_factory=dict)
123
+ climatology: bool = False
99
124
  needs_lateral_fill: bool | None = True
100
- use_dask: bool | None = False
125
+ use_dask: bool = False
126
+ read_zarr: bool = False
127
+ allow_flex_time: bool = False
101
128
  apply_post_processing: bool | None = True
102
- read_zarr: bool | None = False
103
- ds_loader_fn: Callable[[], xr.Dataset] | None = None
104
129
 
130
+ ds_loader_fn: Callable[[], xr.Dataset] | None = None
105
131
  is_global: bool = field(init=False, repr=False)
106
132
  ds: xr.Dataset = field(init=False, repr=False)
107
133
 
@@ -172,17 +198,17 @@ class Dataset:
172
198
  ValueError
173
199
  If a list of files is provided but self.dim_names["time"] is not available or use_dask=False.
174
200
  """
175
- ds = _load_data(
176
- self.filename,
177
- self.dim_names,
178
- self.use_dask or False,
179
- read_zarr=self.read_zarr or False,
201
+ ds = load_data(
202
+ filename=self.filename,
203
+ dim_names=self.dim_names,
204
+ use_dask=self.use_dask,
205
+ read_zarr=self.read_zarr,
180
206
  ds_loader_fn=self.ds_loader_fn,
181
207
  )
182
208
 
183
209
  return ds
184
210
 
185
- def clean_up(self, ds: xr.Dataset, **kwargs) -> xr.Dataset:
211
+ def clean_up(self, ds: xr.Dataset) -> xr.Dataset:
186
212
  """Dummy method to be overridden by child classes to clean up the dataset.
187
213
 
188
214
  This method is intended as a placeholder and should be implemented in subclasses
@@ -213,9 +239,9 @@ class Dataset:
213
239
  ValueError
214
240
  If the dataset does not contain the specified variables or dimensions.
215
241
  """
216
- _check_dataset(ds, self.dim_names, self.var_names)
242
+ check_dataset(ds, self.dim_names, self.var_names)
217
243
 
218
- def select_relevant_fields(self, ds) -> xr.Dataset:
244
+ def select_relevant_fields(self, ds: xr.Dataset) -> xr.Dataset:
219
245
  """Selects and returns a subset of the dataset containing only the variables
220
246
  specified in `self.var_names`.
221
247
 
@@ -258,7 +284,7 @@ class Dataset:
258
284
  """
259
285
  return ds
260
286
 
261
- def select_relevant_times(self, ds) -> xr.Dataset:
287
+ def select_relevant_times(self, ds: xr.Dataset) -> xr.Dataset:
262
288
  """Select a subset of the dataset based on the specified time range.
263
289
 
264
290
  This method filters the dataset to include all records between `start_time` and `end_time`.
@@ -266,7 +292,7 @@ class Dataset:
266
292
  after `end_time` are included, even if they fall outside the strict time range.
267
293
 
268
294
  If no `end_time` is specified, the method will select the time range of
269
- [start_time, start_time + 24 hours] and return the closest time entry to `start_time` within that range.
295
+ [start_time, start_time + 24 hours) and return the closest time entry to `start_time` within that range.
270
296
 
271
297
  Parameters
272
298
  ----------
@@ -305,8 +331,17 @@ class Dataset:
305
331
  """
306
332
  time_dim = self.dim_names["time"]
307
333
 
308
- ds = _select_relevant_times(
309
- ds, time_dim, self.start_time, self.end_time, self.climatology
334
+ # Ensure start_time is not None for type safety
335
+ if self.start_time is None:
336
+ raise ValueError("select_relevant_times called but start_time is None.")
337
+
338
+ ds = select_relevant_times(
339
+ ds,
340
+ time_dim,
341
+ self.start_time,
342
+ self.end_time,
343
+ self.climatology,
344
+ self.allow_flex_time,
310
345
  )
311
346
 
312
347
  return ds
@@ -353,7 +388,7 @@ class Dataset:
353
388
 
354
389
  return ds
355
390
 
356
- def infer_horizontal_resolution(self, ds: xr.Dataset):
391
+ def infer_horizontal_resolution(self, ds: xr.Dataset) -> None:
357
392
  """Estimate and set the average horizontal resolution of a dataset based on
358
393
  latitude and longitude spacing.
359
394
 
@@ -381,7 +416,7 @@ class Dataset:
381
416
  # Set the computed resolution as an attribute
382
417
  self.resolution = resolution
383
418
 
384
- def compute_minimal_grid_spacing(self, ds: xr.Dataset):
419
+ def compute_minimal_grid_spacing(self, ds: xr.Dataset) -> float:
385
420
  """Compute the minimal grid spacing in a dataset based on latitude and longitude
386
421
  spacing, considering Earth's radius.
387
422
 
@@ -443,7 +478,12 @@ class Dataset:
443
478
 
444
479
  return is_global
445
480
 
446
- def concatenate_longitudes(self, ds, end="upper", verbose=False):
481
+ def concatenate_longitudes(
482
+ self,
483
+ ds: xr.Dataset,
484
+ end: TConcatEndTypes = "upper",
485
+ verbose: bool = False,
486
+ ) -> xr.Dataset:
447
487
  """Concatenates fields in dataset twice along the longitude dimension.
448
488
 
449
489
  Parameters
@@ -466,58 +506,12 @@ class Dataset:
466
506
  ds_concatenated : xr.Dataset
467
507
  The concatenated dataset.
468
508
  """
469
- if verbose:
470
- start_time = time.time()
471
-
472
- ds_concatenated = xr.Dataset()
473
-
474
- lon = ds[self.dim_names["longitude"]]
475
- if end == "lower":
476
- lon_minus360 = lon - 360
477
- lon_concatenated = xr.concat(
478
- [lon_minus360, lon], dim=self.dim_names["longitude"]
479
- )
480
-
481
- elif end == "upper":
482
- lon_plus360 = lon + 360
483
- lon_concatenated = xr.concat(
484
- [lon, lon_plus360], dim=self.dim_names["longitude"]
485
- )
486
-
487
- elif end == "both":
488
- lon_minus360 = lon - 360
489
- lon_plus360 = lon + 360
490
- lon_concatenated = xr.concat(
491
- [lon_minus360, lon, lon_plus360], dim=self.dim_names["longitude"]
492
- )
493
-
494
- for var in ds.data_vars:
495
- if self.dim_names["longitude"] in ds[var].dims:
496
- field = ds[var]
497
-
498
- if end == "both":
499
- field_concatenated = xr.concat(
500
- [field, field, field], dim=self.dim_names["longitude"]
501
- )
502
- else:
503
- field_concatenated = xr.concat(
504
- [field, field], dim=self.dim_names["longitude"]
505
- )
506
-
507
- if self.use_dask:
508
- field_concatenated = field_concatenated.chunk(
509
- {self.dim_names["longitude"]: -1}
510
- )
511
- field_concatenated[self.dim_names["longitude"]] = lon_concatenated
512
- ds_concatenated[var] = field_concatenated
513
- else:
514
- ds_concatenated[var] = ds[var]
515
-
516
- ds_concatenated[self.dim_names["longitude"]] = lon_concatenated
517
-
518
- if verbose:
519
- logging.info(
520
- f"Concatenating the data along the longitude dimension: {time.time() - start_time:.3f} seconds"
509
+ with Timed(
510
+ "=== Concatenating the data along the longitude dimension ===",
511
+ verbose=verbose,
512
+ ):
513
+ ds_concatenated = _concatenate_longitudes(
514
+ ds, self.dim_names, end, self.use_dask
521
515
  )
522
516
 
523
517
  return ds_concatenated
@@ -552,14 +546,16 @@ class Dataset:
552
546
  ds = self.ds.astype({var: "float64" for var in self.ds.data_vars})
553
547
  self.ds = ds
554
548
 
549
+ return None
550
+
555
551
  def choose_subdomain(
556
552
  self,
557
- target_coords,
558
- buffer_points=20,
559
- return_copy=False,
560
- return_coords_only=False,
561
- verbose=False,
562
- ):
553
+ target_coords: dict[str, Any],
554
+ buffer_points: int = DEFAULT_NR_BUFFER_POINTS,
555
+ return_copy: bool = False,
556
+ return_coords_only: bool = False,
557
+ verbose: bool = False,
558
+ ) -> xr.Dataset | LatLonDataset | None:
563
559
  """Selects a subdomain from the xarray Dataset based on specified target
564
560
  coordinates, extending the selection by a defined buffer. Adjusts longitude
565
561
  ranges as necessary to accommodate the dataset's expected range and handles
@@ -596,95 +592,16 @@ class Dataset:
596
592
  ValueError
597
593
  If the selected latitude or longitude range does not intersect with the dataset.
598
594
  """
599
- lat_min = target_coords["lat"].min().values
600
- lat_max = target_coords["lat"].max().values
601
- lon_min = target_coords["lon"].min().values
602
- lon_max = target_coords["lon"].max().values
603
-
604
- margin = self.resolution * buffer_points
605
-
606
- # Select the subdomain in latitude direction (so that we have to concatenate fewer latitudes below if concatenation is necessary)
607
- subdomain = self.ds.sel(
608
- **{
609
- self.dim_names["latitude"]: slice(lat_min - margin, lat_max + margin),
610
- }
611
- )
612
- lon = subdomain[self.dim_names["longitude"]]
613
-
614
- if self.is_global:
615
- concats = []
616
- # Concatenate only if necessary
617
- if lon_max + margin > lon.max():
618
- # See if shifting by +360 degrees helps
619
- if (lon_min - margin > (lon + 360).min()) and (
620
- lon_max + margin < (lon + 360).max()
621
- ):
622
- subdomain[self.dim_names["longitude"]] = lon + 360
623
- lon = subdomain[self.dim_names["longitude"]]
624
- else:
625
- concats.append("upper")
626
- if lon_min - margin < lon.min():
627
- # See if shifting by -360 degrees helps
628
- if (lon_min - margin > (lon - 360).min()) and (
629
- lon_max + margin < (lon - 360).max()
630
- ):
631
- subdomain[self.dim_names["longitude"]] = lon - 360
632
- lon = subdomain[self.dim_names["longitude"]]
633
- else:
634
- concats.append("lower")
635
-
636
- if concats:
637
- end = "both" if len(concats) == 2 else concats[0]
638
- subdomain = self.concatenate_longitudes(
639
- subdomain, end=end, verbose=False
640
- )
641
- lon = subdomain[self.dim_names["longitude"]]
642
-
643
- else:
644
- # Adjust longitude range if needed to match the expected range
645
- if not target_coords["straddle"]:
646
- if lon.min() < -180:
647
- if lon_max + margin > 0:
648
- lon_min -= 360
649
- lon_max -= 360
650
- elif lon.min() < 0:
651
- if lon_max + margin > 180:
652
- lon_min -= 360
653
- lon_max -= 360
654
-
655
- if target_coords["straddle"]:
656
- if lon.max() > 360:
657
- if lon_min - margin < 180:
658
- lon_min += 360
659
- lon_max += 360
660
- elif lon.max() > 180:
661
- if lon_min - margin < 0:
662
- lon_min += 360
663
- lon_max += 360
664
- # Select the subdomain in longitude direction
665
-
666
- subdomain = subdomain.sel(
667
- **{
668
- self.dim_names["longitude"]: slice(lon_min - margin, lon_max + margin),
669
- }
595
+ subdomain = choose_subdomain(
596
+ ds=self.ds,
597
+ dim_names=self.dim_names,
598
+ resolution=self.resolution,
599
+ is_global=self.is_global,
600
+ target_coords=target_coords,
601
+ buffer_points=buffer_points,
602
+ use_dask=self.use_dask,
670
603
  )
671
604
 
672
- # Check if the selected subdomain has zero dimensions in latitude or longitude
673
- if subdomain[self.dim_names["latitude"]].size == 0:
674
- raise ValueError("Selected latitude range does not intersect with dataset.")
675
-
676
- if subdomain[self.dim_names["longitude"]].size == 0:
677
- raise ValueError(
678
- "Selected longitude range does not intersect with dataset."
679
- )
680
-
681
- # Adjust longitudes to expected range if needed
682
- lon = subdomain[self.dim_names["longitude"]]
683
- if target_coords["straddle"]:
684
- subdomain[self.dim_names["longitude"]] = xr.where(lon > 180, lon - 360, lon)
685
- else:
686
- subdomain[self.dim_names["longitude"]] = xr.where(lon < 0, lon + 360, lon)
687
-
688
605
  if return_coords_only:
689
606
  # Create and return a dataset with only latitudes and longitudes
690
607
  coords_ds = subdomain[
@@ -693,9 +610,10 @@ class Dataset:
693
610
  return coords_ds
694
611
 
695
612
  if return_copy:
696
- return Dataset.from_ds(self, subdomain)
613
+ return LatLonDataset.from_ds(self, subdomain)
697
614
  else:
698
615
  self.ds = subdomain
616
+ return None
699
617
 
700
618
  def apply_lateral_fill(self):
701
619
  """Apply lateral fill to variables using the dataset's mask and grid dimensions.
@@ -715,10 +633,6 @@ class Dataset:
715
633
  point to the same variable in the dataset.
716
634
  """
717
635
  if self.needs_lateral_fill:
718
- logging.info(
719
- "Applying 2D horizontal fill to the source data before regridding."
720
- )
721
-
722
636
  lateral_fill = LateralFill(
723
637
  self.ds["mask"],
724
638
  [self.dim_names["latitude"], self.dim_names["longitude"]],
@@ -749,10 +663,6 @@ class Dataset:
749
663
  else:
750
664
  # Apply standard lateral fill for other variables
751
665
  self.ds[var_name] = lateral_fill.apply(self.ds[var_name])
752
- else:
753
- logging.info(
754
- "2D horizontal fill is skipped because source data already contains filled values."
755
- )
756
666
 
757
667
  def extrapolate_deepest_to_bottom(self):
758
668
  """Extrapolate deepest non-NaN values to fill bottom NaNs along the depth
@@ -769,8 +679,8 @@ class Dataset:
769
679
  )
770
680
 
771
681
  @classmethod
772
- def from_ds(cls, original_dataset: "Dataset", ds: xr.Dataset) -> "Dataset":
773
- """Substitute the internal dataset of a Dataset object with a new xarray
682
+ def from_ds(cls, original_dataset: LatLonDataset, ds: xr.Dataset) -> LatLonDataset:
683
+ """Substitute the internal dataset of a LatLonDataset object with a new xarray
774
684
  Dataset.
775
685
 
776
686
  This method creates a new Dataset instance, bypassing the usual `__init__`
@@ -780,18 +690,18 @@ class Dataset:
780
690
 
781
691
  Parameters
782
692
  ----------
783
- original_dataset : Dataset
784
- The original Dataset instance from which attributes will be copied.
693
+ original_dataset : LatLonDataset
694
+ The original LatLonDataset instance from which attributes will be copied.
785
695
  ds : xarray.Dataset
786
696
  The new xarray Dataset to assign to the `ds` attribute of the new instance.
787
697
 
788
698
  Returns
789
699
  -------
790
- Dataset
700
+ LatLonDataset
791
701
  A new Dataset instance with the `ds` attribute set to the provided dataset
792
702
  and other attributes copied from the original instance.
793
703
  """
794
- # Create a new Dataset instance without calling __init__ or __post_init__
704
+ # Create a new LatLonDataset instance without calling __init__ or __post_init__
795
705
  dataset = cls.__new__(cls)
796
706
 
797
707
  # Directly set the provided dataset as the 'ds' attribute
@@ -806,7 +716,7 @@ class Dataset:
806
716
 
807
717
 
808
718
  @dataclass(kw_only=True)
809
- class TPXODataset(Dataset):
719
+ class TPXODataset(LatLonDataset):
810
720
  """Represents tidal data on the original grid from the TPXO dataset.
811
721
 
812
722
  Parameters
@@ -871,7 +781,7 @@ class TPXODataset(Dataset):
871
781
  ValueError
872
782
  If longitude or latitude values do not match the grid.
873
783
  """
874
- ds_grid = _load_data(self.grid_filename, self.dim_names, self.use_dask)
784
+ ds_grid = load_data(self.grid_filename, self.dim_names, self.use_dask)
875
785
 
876
786
  # Define mask and coordinate names based on location
877
787
  if self.location == "h":
@@ -902,21 +812,13 @@ class TPXODataset(Dataset):
902
812
 
903
813
  # Drop all dimensions except 'longitude' and 'latitude'
904
814
  dims_to_keep = {"longitude", "latitude"}
905
- dims_to_drop = [dim for dim in ds_grid.dims if dim not in dims_to_keep]
815
+ dims_to_drop: set[str] = set(ds_grid.dims) - dims_to_keep
906
816
  if dims_to_drop:
907
817
  ds_grid = ds_grid.isel({dim: 0 for dim in dims_to_drop})
908
818
 
909
819
  # Ensure correct dimension order
910
820
  ds_grid = ds_grid.transpose("latitude", "longitude")
911
821
 
912
- dims_to_keep = {"longitude", "latitude"}
913
- dims_to_drop = set(ds_grid.dims) - dims_to_keep
914
- ds_grid = (
915
- ds_grid.isel({dim: 0 for dim in dims_to_drop}) if dims_to_drop else ds_grid
916
- )
917
- # Bring dimensions in correct order
918
- ds_grid = ds_grid.transpose("latitude", "longitude")
919
-
920
822
  ds = ds.rename({"con": "nc"})
921
823
  ds = ds.assign_coords(
922
824
  {
@@ -1029,7 +931,7 @@ class TPXODataset(Dataset):
1029
931
 
1030
932
 
1031
933
  @dataclass(kw_only=True)
1032
- class GLORYSDataset(Dataset):
934
+ class GLORYSDataset(LatLonDataset):
1033
935
  """Represents GLORYS data on original grid."""
1034
936
 
1035
937
  var_names: dict[str, str] = field(
@@ -1051,7 +953,7 @@ class GLORYSDataset(Dataset):
1051
953
  }
1052
954
  )
1053
955
 
1054
- climatology: bool | None = False
956
+ climatology: bool = False
1055
957
 
1056
958
  def post_process(self):
1057
959
  """Apply a mask to the dataset based on the 'zeta' variable, with 0 where 'zeta'
@@ -1067,19 +969,29 @@ class GLORYSDataset(Dataset):
1067
969
  None
1068
970
  The dataset is modified in-place by applying the mask to each variable.
1069
971
  """
1070
- mask = xr.where(
1071
- self.ds[self.var_names["zeta"]].isel({self.dim_names["time"]: 0}).isnull(),
1072
- 0,
1073
- 1,
1074
- )
1075
- mask_vel = xr.where(
1076
- self.ds[self.var_names["u"]]
1077
- .isel({self.dim_names["time"]: 0, self.dim_names["depth"]: 0})
1078
- .isnull(),
1079
- 0,
1080
- 1,
1081
- )
972
+ zeta = self.ds[self.var_names["zeta"]]
973
+ u = self.ds[self.var_names["u"]]
974
+
975
+ # Select time=0 if time dimension exists, otherwise use data as-is
976
+ if self.dim_names["time"] in zeta.dims:
977
+ zeta_ref = zeta.isel({self.dim_names["time"]: 0})
978
+ else:
979
+ zeta_ref = zeta
980
+
981
+ if self.dim_names["time"] in u.dims:
982
+ u_ref = u.isel({self.dim_names["time"]: 0})
983
+ else:
984
+ u_ref = u
1082
985
 
986
+ # Also handle depth for velocity
987
+ if self.dim_names["depth"] in u_ref.dims:
988
+ u_ref = u_ref.isel({self.dim_names["depth"]: 0})
989
+
990
+ # Create masks
991
+ mask = xr.where(zeta_ref.isnull(), 0, 1)
992
+ mask_vel = xr.where(u_ref.isnull(), 0, 1)
993
+
994
+ # Save to dataset
1083
995
  self.ds["mask"] = mask
1084
996
  self.ds["mask_vel"] = mask_vel
1085
997
 
@@ -1130,7 +1042,7 @@ class GLORYSDefaultDataset(GLORYSDataset):
1130
1042
 
1131
1043
  spec = importlib.util.find_spec(package_name)
1132
1044
  if not spec:
1133
- msg = _get_pkg_error_msg("cloud-based GLORYS data", package_name, "stream")
1045
+ msg = get_pkg_error_msg("cloud-based GLORYS data", package_name, "stream")
1134
1046
  raise RuntimeError(msg)
1135
1047
 
1136
1048
  try:
@@ -1151,18 +1063,40 @@ class GLORYSDefaultDataset(GLORYSDataset):
1151
1063
  The streaming dataset
1152
1064
  """
1153
1065
  copernicusmarine = self._load_copernicus()
1154
- return copernicusmarine.open_dataset(
1066
+
1067
+ # ds = copernicusmarine.download_functions.download_zarr.open_dataset_from_arco_series(
1068
+ # dataset_url="https://s3.waw3-1.cloudferro.com/mdl-arco-geo-025/arco/GLOBAL_MULTIYEAR_PHY_001_030/cmems_mod_glo_phy_my_0.083deg_P1D-m_202311/geoChunked.zarr",
1069
+ # variables=["thetao", "so", "uo", "vo", "zos"],
1070
+ # geographical_parameters=copernicusmarine.download_functions.subset_parameters.GeographicalParameters(),
1071
+ # temporal_parameters=copernicusmarine.download_functions.subset_parameters.TemporalParameters(
1072
+ # start_datetime=self.start_time, end_datetime=self.end_time
1073
+ # ),
1074
+ # depth_parameters=copernicusmarine.download_functions.subset_parameters.DepthParameters(),
1075
+ # coordinates_selection_method="outside",
1076
+ # optimum_dask_chunking={
1077
+ # "time": 1,
1078
+ # "depth": -1,
1079
+ # "latitude": -1,
1080
+ # "longitude": -1,
1081
+ # },
1082
+ # )
1083
+
1084
+ ds = copernicusmarine.open_dataset(
1155
1085
  self.dataset_name,
1156
1086
  start_datetime=self.start_time,
1157
1087
  end_datetime=self.end_time,
1158
1088
  service="arco-geo-series",
1159
- coordinates_selection_method="inside",
1160
- chunk_size_limit=2,
1089
+ coordinates_selection_method="outside",
1090
+ chunk_size_limit=-1,
1161
1091
  )
1092
+ chunks = get_dask_chunks(self.dim_names)
1093
+ ds = ds.chunk(chunks)
1094
+
1095
+ return ds
1162
1096
 
1163
1097
 
1164
1098
  @dataclass(kw_only=True)
1165
- class UnifiedDataset(Dataset):
1099
+ class UnifiedDataset(LatLonDataset):
1166
1100
  """Represents unified BGC data on original grid.
1167
1101
 
1168
1102
  Notes
@@ -1285,7 +1219,7 @@ class UnifiedBGCDataset(UnifiedDataset):
1285
1219
  }
1286
1220
  )
1287
1221
 
1288
- climatology: bool | None = True
1222
+ climatology: bool = True
1289
1223
 
1290
1224
 
1291
1225
  @dataclass(kw_only=True)
@@ -1307,11 +1241,11 @@ class UnifiedBGCSurfaceDataset(UnifiedDataset):
1307
1241
  }
1308
1242
  )
1309
1243
 
1310
- climatology: bool | None = True
1244
+ climatology: bool = True
1311
1245
 
1312
1246
 
1313
1247
  @dataclass(kw_only=True)
1314
- class CESMDataset(Dataset):
1248
+ class CESMDataset(LatLonDataset):
1315
1249
  """Represents CESM data on original grid."""
1316
1250
 
1317
1251
  # overwrite clean_up method from parent class
@@ -1422,9 +1356,9 @@ class CESMBGCDataset(CESMDataset):
1422
1356
  }
1423
1357
  )
1424
1358
 
1425
- climatology: bool | None = False
1359
+ climatology: bool = False
1426
1360
 
1427
- def post_process(self):
1361
+ def post_process(self) -> None:
1428
1362
  """
1429
1363
  Processes and converts CESM data values as follows:
1430
1364
  - Convert depth values from cm to m.
@@ -1493,9 +1427,9 @@ class CESMBGCSurfaceForcingDataset(CESMDataset):
1493
1427
  }
1494
1428
  )
1495
1429
 
1496
- climatology: bool | None = False
1430
+ climatology: bool = False
1497
1431
 
1498
- def post_process(self):
1432
+ def post_process(self) -> None:
1499
1433
  """Perform post-processing on the dataset to remove specific variables.
1500
1434
 
1501
1435
  This method checks if the variable "z_t" exists in the dataset. If it does,
@@ -1518,7 +1452,7 @@ class CESMBGCSurfaceForcingDataset(CESMDataset):
1518
1452
 
1519
1453
 
1520
1454
  @dataclass(kw_only=True)
1521
- class ERA5Dataset(Dataset):
1455
+ class ERA5Dataset(LatLonDataset):
1522
1456
  """Represents ERA5 data on original grid."""
1523
1457
 
1524
1458
  var_names: dict[str, str] = field(
@@ -1542,9 +1476,9 @@ class ERA5Dataset(Dataset):
1542
1476
  }
1543
1477
  )
1544
1478
 
1545
- climatology: bool | None = False
1479
+ climatology: bool = False
1546
1480
 
1547
- def post_process(self):
1481
+ def post_process(self) -> None:
1548
1482
  """
1549
1483
  Processes and converts ERA5 data values as follows:
1550
1484
  - Convert radiation values from J/m^2 to W/m^2.
@@ -1632,17 +1566,17 @@ class ERA5ARCODataset(ERA5Dataset):
1632
1566
  }
1633
1567
  )
1634
1568
 
1635
- def __post_init__(self):
1569
+ def __post_init__(self) -> None:
1636
1570
  self.read_zarr = True
1637
- if not _has_gcsfs():
1638
- msg = _get_pkg_error_msg("cloud-based ERA5 data", "gcsfs", "stream")
1571
+ if not has_gcsfs():
1572
+ msg = get_pkg_error_msg("cloud-based ERA5 data", "gcsfs", "stream")
1639
1573
  raise RuntimeError(msg)
1640
1574
 
1641
1575
  super().__post_init__()
1642
1576
 
1643
1577
 
1644
1578
  @dataclass(kw_only=True)
1645
- class ERA5Correction(Dataset):
1579
+ class ERA5Correction(LatLonDataset):
1646
1580
  """Global dataset to correct ERA5 radiation.
1647
1581
 
1648
1582
  The dataset contains multiplicative correction factors for the ERA5 shortwave
@@ -1664,9 +1598,9 @@ class ERA5Correction(Dataset):
1664
1598
  "time": "time",
1665
1599
  }
1666
1600
  )
1667
- climatology: bool | None = True
1601
+ climatology: bool = True
1668
1602
 
1669
- def __post_init__(self):
1603
+ def __post_init__(self) -> None:
1670
1604
  if not self.climatology:
1671
1605
  raise NotImplementedError(
1672
1606
  "Correction data must be a climatology. Set climatology to True."
@@ -1674,32 +1608,31 @@ class ERA5Correction(Dataset):
1674
1608
 
1675
1609
  super().__post_init__()
1676
1610
 
1677
- def choose_subdomain(self, target_coords, straddle: bool):
1678
- """Converts longitude values in the dataset if necessary and selects a subdomain
1679
- based on the specified coordinates.
1611
+ def match_subdomain(self, target_coords: dict[str, Any]) -> None:
1612
+ """
1613
+ Selects a subdomain from the dataset matching the specified coordinates.
1680
1614
 
1681
- This method converts longitude values between different ranges if required and then extracts a subset of the
1682
- dataset according to the given coordinates. It updates the dataset in place to reflect the selected subdomain.
1615
+ This method extracts a subset of the dataset (`self.ds`) based on given latitude
1616
+ and longitude values. If the dataset spans the globe, it concatenates longitudes
1617
+ to ensure seamless wrapping.
1683
1618
 
1684
1619
  Parameters
1685
1620
  ----------
1686
- target_coords : dict
1687
- A dictionary specifying the target coordinates for selecting the subdomain. Keys should correspond to the
1688
- dimension names of the dataset (e.g., latitude and longitude), and values should be the desired ranges or
1689
- specific coordinate values.
1690
- straddle : bool
1691
- If True, assumes that target longitudes are in the range [-180, 180]. If False, assumes longitudes are in the
1692
- range [0, 360]. This parameter determines how longitude values are converted if necessary.
1621
+ target_coords : dict[str, Any]
1622
+ A dictionary containing the target latitude and longitude values to select.
1623
+ Expected keys: "lat" and "lon", each mapped to a DataArray of coordinates.
1693
1624
 
1694
1625
  Raises
1695
1626
  ------
1696
1627
  ValueError
1697
- If the specified subdomain does not fully contain the specified latitude or longitude values. This can occur
1698
- if the dataset does not cover the full range of provided coordinates.
1628
+ If the selected subdomain does not contain all specified latitude or
1629
+ longitude values.
1699
1630
 
1700
1631
  Notes
1701
1632
  -----
1702
- - The dataset (`self.ds`) is updated in place to reflect the chosen subdomain.
1633
+ - The dataset (`self.ds`) is updated in place.
1634
+ - Assumes latitude values in `target_coords["lat"]` are within dataset bounds.
1635
+ - For global datasets, longitude concatenation is applied unconditionally.
1703
1636
  """
1704
1637
  # Select the subdomain in latitude direction (so that we have to concatenate fewer latitudes below if concatenation is performed)
1705
1638
  subdomain = self.ds.sel({self.dim_names["latitude"]: target_coords["lat"]})
@@ -1726,7 +1659,7 @@ class ERA5Correction(Dataset):
1726
1659
 
1727
1660
 
1728
1661
  @dataclass(kw_only=True)
1729
- class ETOPO5Dataset(Dataset):
1662
+ class ETOPO5Dataset(LatLonDataset):
1730
1663
  """Represents topography data on the original grid from the ETOPO5 dataset."""
1731
1664
 
1732
1665
  filename: str = field(default_factory=lambda: download_topo("etopo5.nc"))
@@ -1762,7 +1695,7 @@ class ETOPO5Dataset(Dataset):
1762
1695
 
1763
1696
 
1764
1697
  @dataclass(kw_only=True)
1765
- class SRTM15Dataset(Dataset):
1698
+ class SRTM15Dataset(LatLonDataset):
1766
1699
  """Represents topography data on the original grid from the SRTM15 dataset."""
1767
1700
 
1768
1701
  var_names: dict[str, str] = field(
@@ -1775,428 +1708,6 @@ class SRTM15Dataset(Dataset):
1775
1708
  )
1776
1709
 
1777
1710
 
1778
- # river datasets
1779
- @dataclass(kw_only=True)
1780
- class RiverDataset:
1781
- """Represents river data.
1782
-
1783
- Parameters
1784
- ----------
1785
- filename : Union[str, Path, List[Union[str, Path]]]
1786
- The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
1787
- or a list of strings or Path objects containing multiple files.
1788
- start_time : datetime
1789
- The start time for selecting relevant data.
1790
- end_time : datetime
1791
- The end time for selecting relevant data.
1792
- dim_names: Dict[str, str]
1793
- Dictionary specifying the names of dimensions in the dataset.
1794
- Requires "station" and "time" as keys.
1795
- var_names: Dict[str, str]
1796
- Dictionary of variable names that are required in the dataset.
1797
- Requires the keys "latitude", "longitude", "flux", "ratio", and "name".
1798
- opt_var_names: Dict[str, str], optional
1799
- Dictionary of variable names that are optional in the dataset.
1800
- Defaults to an empty dictionary.
1801
- climatology : bool
1802
- Indicates whether the dataset is climatological. Defaults to False.
1803
-
1804
- Attributes
1805
- ----------
1806
- ds : xr.Dataset
1807
- The xarray Dataset containing the forcing data on its original grid.
1808
- """
1809
-
1810
- filename: str | Path | list[str | Path]
1811
- start_time: datetime
1812
- end_time: datetime
1813
- dim_names: dict[str, str]
1814
- var_names: dict[str, str]
1815
- opt_var_names: dict[str, str] | None = field(default_factory=dict)
1816
- climatology: bool | None = False
1817
- ds: xr.Dataset = field(init=False, repr=False)
1818
-
1819
- def __post_init__(self):
1820
- # Validate start_time and end_time
1821
- if not isinstance(self.start_time, datetime):
1822
- raise TypeError(
1823
- f"start_time must be a datetime object, but got {type(self.start_time).__name__}."
1824
- )
1825
- if not isinstance(self.end_time, datetime):
1826
- raise TypeError(
1827
- f"end_time must be a datetime object, but got {type(self.end_time).__name__}."
1828
- )
1829
-
1830
- ds = self.load_data()
1831
- ds = self.clean_up(ds)
1832
- self.check_dataset(ds)
1833
- ds = _deduplicate_river_names(
1834
- ds, self.var_names["name"], self.dim_names["station"]
1835
- )
1836
-
1837
- # Select relevant times
1838
- ds = self.add_time_info(ds)
1839
- self.ds = ds
1840
-
1841
- def load_data(self) -> xr.Dataset:
1842
- """Load dataset from the specified file.
1843
-
1844
- Returns
1845
- -------
1846
- ds : xr.Dataset
1847
- The loaded xarray Dataset containing the forcing data.
1848
- """
1849
- ds = _load_data(
1850
- self.filename, self.dim_names, use_dask=False, decode_times=False
1851
- )
1852
-
1853
- return ds
1854
-
1855
- def clean_up(self, ds: xr.Dataset) -> xr.Dataset:
1856
- """Decodes the 'name' variable (if byte-encoded) and updates the dataset.
1857
-
1858
- This method checks if the 'name' variable is of dtype 'object' (i.e., byte-encoded),
1859
- and if so, decodes each byte array to a string and updates the dataset.
1860
- It also ensures that the 'station' dimension is of integer type.
1861
-
1862
-
1863
- Parameters
1864
- ----------
1865
- ds : xr.Dataset
1866
- The dataset containing the 'name' variable to decode.
1867
-
1868
- Returns
1869
- -------
1870
- ds : xr.Dataset
1871
- The dataset with the decoded 'name' variable.
1872
- """
1873
- if ds[self.var_names["name"]].dtype == "object":
1874
- names = []
1875
- for i in range(len(ds[self.dim_names["station"]])):
1876
- byte_array = ds[self.var_names["name"]].isel(
1877
- **{self.dim_names["station"]: i}
1878
- )
1879
- name = decode_string(byte_array)
1880
- names.append(name)
1881
- ds[self.var_names["name"]] = xr.DataArray(
1882
- data=names, dims=self.dim_names["station"]
1883
- )
1884
-
1885
- if ds[self.dim_names["station"]].dtype == "float64":
1886
- ds[self.dim_names["station"]] = ds[self.dim_names["station"]].astype(int)
1887
-
1888
- # Drop all variables that have chars dim
1889
- vars_to_drop = ["ocn_name", "stn_name", "ct_name", "cn_name", "chars"]
1890
- existing_vars = [var for var in vars_to_drop if var in ds]
1891
- ds = ds.drop_vars(existing_vars)
1892
-
1893
- return ds
1894
-
1895
- def check_dataset(self, ds: xr.Dataset) -> None:
1896
- """Validate required variables, dimensions, and uniqueness of river names.
1897
-
1898
- Parameters
1899
- ----------
1900
- ds : xr.Dataset
1901
- The xarray Dataset to check.
1902
-
1903
- Raises
1904
- ------
1905
- ValueError
1906
- If the dataset does not contain the specified variables or dimensions.
1907
- """
1908
- _check_dataset(ds, self.dim_names, self.var_names, self.opt_var_names)
1909
-
1910
- def add_time_info(self, ds: xr.Dataset) -> xr.Dataset:
1911
- """Dummy method to be overridden by child classes to add time information to the
1912
- dataset.
1913
-
1914
- This method is intended as a placeholder and should be implemented in subclasses
1915
- to provide specific functionality for adding time-related information to the dataset.
1916
-
1917
- Parameters
1918
- ----------
1919
- ds : xr.Dataset
1920
- The xarray Dataset to which time information will be added.
1921
-
1922
- Returns
1923
- -------
1924
- xr.Dataset
1925
- The xarray Dataset with time information added (as implemented by child classes).
1926
- """
1927
- return ds
1928
-
1929
- def select_relevant_times(self, ds) -> xr.Dataset:
1930
- """Select a subset of the dataset based on the specified time range.
1931
-
1932
- This method filters the dataset to include all records between `start_time` and `end_time`.
1933
- Additionally, it ensures that one record at or before `start_time` and one record at or
1934
- after `end_time` are included, even if they fall outside the strict time range.
1935
-
1936
- If no `end_time` is specified, the method will select the time range of
1937
- [start_time, start_time + 24 hours] and return the closest time entry to `start_time` within that range.
1938
-
1939
- Parameters
1940
- ----------
1941
- ds : xr.Dataset
1942
- The input dataset to be filtered. Must contain a time dimension.
1943
-
1944
- Returns
1945
- -------
1946
- xr.Dataset
1947
- A dataset filtered to the specified time range, including the closest entries
1948
- at or before `start_time` and at or after `end_time` if applicable.
1949
-
1950
- Warns
1951
- -----
1952
- UserWarning
1953
- If no records at or before `start_time` or no records at or after `end_time` are found.
1954
-
1955
- UserWarning
1956
- If the dataset does not contain any time dimension or the time dimension is incorrectly named.
1957
- """
1958
- time_dim = self.dim_names["time"]
1959
-
1960
- ds = _select_relevant_times(ds, time_dim, self.start_time, self.end_time, False)
1961
-
1962
- return ds
1963
-
1964
- def compute_climatology(self):
1965
- logging.info("Compute climatology for river forcing.")
1966
-
1967
- time_dim = self.dim_names["time"]
1968
-
1969
- flux = self.ds[self.var_names["flux"]].groupby(f"{time_dim}.month").mean()
1970
- self.ds[self.var_names["flux"]] = flux
1971
-
1972
- ds = assign_dates_to_climatology(self.ds, "month")
1973
- ds = ds.swap_dims({"month": "time"})
1974
- self.ds = ds
1975
-
1976
- updated_dim_names = {**self.dim_names}
1977
- updated_dim_names["time"] = "time"
1978
- self.dim_names = updated_dim_names
1979
-
1980
- self.climatology = True
1981
-
1982
- def sort_by_river_volume(self, ds: xr.Dataset) -> xr.Dataset:
1983
- """Sorts the dataset by river volume in descending order (largest rivers first),
1984
- if the volume variable is available.
1985
-
1986
- This method uses the river volume to reorder the dataset such that the rivers with
1987
- the largest volumes come first in the `station` dimension. If the volume variable
1988
- is not present in the dataset, a warning is logged.
1989
-
1990
- Parameters
1991
- ----------
1992
- ds : xr.Dataset
1993
- The xarray Dataset containing the river data to be sorted by volume.
1994
-
1995
- Returns
1996
- -------
1997
- xr.Dataset
1998
- The dataset with rivers sorted by their volume in descending order.
1999
- If the volume variable is not available, the original dataset is returned.
2000
- """
2001
- if "vol" in self.opt_var_names:
2002
- volume_values = ds[self.opt_var_names["vol"]].values
2003
- if isinstance(volume_values, np.ndarray):
2004
- # Check if all volume values are the same
2005
- if np.all(volume_values == volume_values[0]):
2006
- # If all volumes are the same, no need to reverse order
2007
- sorted_indices = np.argsort(
2008
- volume_values
2009
- ) # Sort in ascending order
2010
- else:
2011
- # If volumes differ, reverse order for descending sort
2012
- sorted_indices = np.argsort(volume_values)[
2013
- ::-1
2014
- ] # Reverse for descending order
2015
-
2016
- ds = ds.isel(**{self.dim_names["station"]: sorted_indices})
2017
-
2018
- else:
2019
- logging.warning("The volume data is not in a valid array format.")
2020
- else:
2021
- logging.warning(
2022
- "Cannot sort rivers by volume. 'vol' is missing in the variable names."
2023
- )
2024
-
2025
- return ds
2026
-
2027
- def extract_relevant_rivers(self, target_coords, dx):
2028
- """Extracts a subset of the dataset based on the proximity of river mouths to
2029
- target coordinates.
2030
-
2031
- This method calculates the distance between each river mouth and the provided target coordinates
2032
- (latitude and longitude) using the `gc_dist` function. It then filters the dataset to include only those
2033
- river stations whose minimum distance from the target is less than a specified threshold distance (`dx`).
2034
-
2035
- Parameters
2036
- ----------
2037
- target_coords : dict
2038
- A dictionary containing the target coordinates for the comparison. It should include:
2039
- - "lon" (float): The target longitude in degrees.
2040
- - "lat" (float): The target latitude in degrees.
2041
- - "straddle" (bool): A flag indicating whether to adjust the longitudes for stations that cross the
2042
- International Date Line. If `True`, longitudes greater than 180 degrees are adjusted by subtracting 360,
2043
- otherwise, negative longitudes are adjusted by adding 360.
2044
-
2045
- dx : float
2046
- The maximum distance threshold (in meters) for including a river station. Only river mouths that are
2047
- within `dx` meters from the target coordinates will be included in the returned dataset.
2048
-
2049
- Returns
2050
- -------
2051
- indices : dict[str, list[tuple]]
2052
- A dictionary containing the indices of the rivers that are within the threshold distance from
2053
- the target coordinates. The dictionary structure consists of river names as keys, and each value is a list of tuples. Each tuple represents
2054
- a pair of indices corresponding to the `eta_rho` and `xi_rho` grid coordinates of the river.
2055
- """
2056
- # Retrieve longitude and latitude of river mouths
2057
- river_lon = self.ds[self.var_names["longitude"]]
2058
- river_lat = self.ds[self.var_names["latitude"]]
2059
-
2060
- # Adjust longitude based on whether it crosses the International Date Line (straddle case)
2061
- if target_coords["straddle"]:
2062
- river_lon = xr.where(river_lon > 180, river_lon - 360, river_lon)
2063
- else:
2064
- river_lon = xr.where(river_lon < 0, river_lon + 360, river_lon)
2065
-
2066
- # Calculate the distance between the target coordinates and each river mouth
2067
- dist = gc_dist(target_coords["lon"], target_coords["lat"], river_lon, river_lat)
2068
- dist_min = dist.min(dim=["eta_rho", "xi_rho"])
2069
- # Filter the dataset to include only stations within the distance threshold
2070
- if (dist_min < dx).any():
2071
- ds = self.ds.where(dist_min < dx, drop=True)
2072
- ds = self.sort_by_river_volume(ds)
2073
- dist = dist.where(dist_min < dx, drop=True).transpose(
2074
- self.dim_names["station"], "eta_rho", "xi_rho"
2075
- )
2076
-
2077
- river_indices = get_indices_of_nearest_grid_cell_for_rivers(dist, self)
2078
- else:
2079
- ds = xr.Dataset()
2080
- river_indices = {}
2081
-
2082
- self.ds = ds
2083
-
2084
- return river_indices
2085
-
2086
- def extract_named_rivers(self, indices):
2087
- """Extracts a subset of the dataset based on the provided river names in the
2088
- indices dictionary.
2089
-
2090
- This method filters the dataset to include only the rivers specified in the `indices` dictionary.
2091
- The resulting subset is stored in the `ds` attribute of the class.
2092
-
2093
- Parameters
2094
- ----------
2095
- indices : dict
2096
- A dictionary where the keys are river names (strings) and the values are dictionaries
2097
- containing river-related data (e.g., river indices, coordinates).
2098
-
2099
- Returns
2100
- -------
2101
- None
2102
- The method modifies the `self.ds` attribute in place, setting it to the filtered dataset
2103
- containing only the data related to the specified rivers.
2104
-
2105
- Raises
2106
- ------
2107
- ValueError
2108
- - If `indices` is not a dictionary.
2109
- - If any of the requested river names are not found in the dataset.
2110
- """
2111
- if not isinstance(indices, dict):
2112
- raise ValueError("`indices` must be a dictionary.")
2113
-
2114
- river_names = list(indices.keys())
2115
-
2116
- # Ensure the dataset is filtered based on the provided river names
2117
- ds_filtered = self.ds.where(
2118
- self.ds[self.var_names["name"]].isin(river_names), drop=True
2119
- )
2120
-
2121
- # Check that all requested rivers exist in the dataset
2122
- filtered_river_names = set(ds_filtered[self.var_names["name"]].values)
2123
- missing_rivers = set(river_names) - filtered_river_names
2124
-
2125
- if missing_rivers:
2126
- raise ValueError(
2127
- f"The following rivers were not found in the dataset: {missing_rivers}"
2128
- )
2129
-
2130
- # Set the filtered dataset as the new `ds`
2131
- self.ds = ds_filtered
2132
-
2133
-
2134
- @dataclass(kw_only=True)
2135
- class DaiRiverDataset(RiverDataset):
2136
- """Represents river data from the Dai river dataset."""
2137
-
2138
- filename: str | Path | list[str | Path] = field(
2139
- default_factory=lambda: download_river_data("dai_trenberth_may2019.nc")
2140
- )
2141
- dim_names: dict[str, str] = field(
2142
- default_factory=lambda: {
2143
- "station": "station",
2144
- "time": "time",
2145
- }
2146
- )
2147
- var_names: dict[str, str] = field(
2148
- default_factory=lambda: {
2149
- "latitude": "lat_mou",
2150
- "longitude": "lon_mou",
2151
- "flux": "FLOW",
2152
- "ratio": "ratio_m2s",
2153
- "name": "riv_name",
2154
- }
2155
- )
2156
- opt_var_names: dict[str, str] = field(
2157
- default_factory=lambda: {
2158
- "vol": "vol_stn",
2159
- }
2160
- )
2161
- climatology: bool | None = False
2162
-
2163
- def add_time_info(self, ds: xr.Dataset) -> xr.Dataset:
2164
- """Adds time information to the dataset based on the climatology flag and
2165
- dimension names.
2166
-
2167
- This method processes the dataset to include time information according to the climatology
2168
- setting. If the dataset represents climatology data and the time dimension is labeled as
2169
- "month", it assigns dates to the dataset based on a monthly climatology. Additionally, it
2170
- handles dimension name updates if necessary.
2171
-
2172
- Parameters
2173
- ----------
2174
- ds : xr.Dataset
2175
- The input dataset to which time information will be added.
2176
-
2177
- Returns
2178
- -------
2179
- xr.Dataset
2180
- The dataset with time information added, including adjustments for climatology and
2181
- dimension names.
2182
- """
2183
- time_dim = self.dim_names["time"]
2184
-
2185
- # Extract the 'time' variable as a numpy array
2186
- time_vals = ds[time_dim].values
2187
-
2188
- # Handle rounding of the time values
2189
- year = np.round(time_vals * 1e-2).astype(int)
2190
- month = np.round((time_vals * 1e-2 - year) * 1e2).astype(int)
2191
-
2192
- # Convert to datetime (assuming the day is always 15th for this example)
2193
- dates = [datetime(year=i, month=m, day=15) for i, m in zip(year, month)]
2194
-
2195
- ds[time_dim] = dates
2196
-
2197
- return ds
2198
-
2199
-
2200
1711
  @dataclass
2201
1712
  class TPXOManager:
2202
1713
  """Manages multiple TPXODataset instances and selects and processes tidal
@@ -2684,208 +2195,6 @@ class TPXOManager:
2684
2195
  object.__setattr__(self.datasets["sal"], "var_names", var_names)
2685
2196
 
2686
2197
 
2687
- # shared functions
2688
-
2689
-
2690
- def _check_dataset(
2691
- ds: xr.Dataset,
2692
- dim_names: dict[str, str],
2693
- var_names: dict[str, str],
2694
- opt_var_names: dict[str, str] | None = None,
2695
- ) -> None:
2696
- """Check if the dataset contains the specified variables and dimensions.
2697
-
2698
- Parameters
2699
- ----------
2700
- ds : xr.Dataset
2701
- The xarray Dataset to check.
2702
- dim_names: Dict[str, str], optional
2703
- Dictionary specifying the names of dimensions in the dataset.
2704
- var_names: Dict[str, str]
2705
- Dictionary of variable names that are required in the dataset.
2706
- opt_var_names : Optional[Dict[str, str]], optional
2707
- Dictionary of optional variable names.
2708
- These variables are not strictly required, and the function will not raise an error if they are missing.
2709
- Default is None, meaning no optional variables are considered.
2710
-
2711
-
2712
- Raises
2713
- ------
2714
- ValueError
2715
- If the dataset does not contain the specified variables or dimensions.
2716
- """
2717
- missing_dims = [dim for dim in dim_names.values() if dim not in ds.dims]
2718
- if missing_dims:
2719
- raise ValueError(
2720
- f"Dataset does not contain all required dimensions. The following dimensions are missing: {missing_dims}"
2721
- )
2722
-
2723
- missing_vars = [var for var in var_names.values() if var not in ds.data_vars]
2724
- if missing_vars:
2725
- raise ValueError(
2726
- f"Dataset does not contain all required variables. The following variables are missing: {missing_vars}"
2727
- )
2728
-
2729
- if opt_var_names:
2730
- missing_optional_vars = [
2731
- var for var in opt_var_names.values() if var not in ds.data_vars
2732
- ]
2733
- if missing_optional_vars:
2734
- logging.warning(
2735
- f"Optional variables missing (but not critical): {missing_optional_vars}"
2736
- )
2737
-
2738
-
2739
- def _select_relevant_times(
2740
- ds, time_dim, start_time, end_time=None, climatology=False
2741
- ) -> xr.Dataset:
2742
- """Select a subset of the dataset based on the specified time range.
2743
-
2744
- This method filters the dataset to include all records between `start_time` and `end_time`.
2745
- Additionally, it ensures that one record at or before `start_time` and one record at or
2746
- after `end_time` are included, even if they fall outside the strict time range.
2747
-
2748
- If no `end_time` is specified, the method will select the time range of
2749
- [start_time, start_time + 24 hours] and return the closest time entry to `start_time` within that range.
2750
-
2751
- Parameters
2752
- ----------
2753
- ds : xr.Dataset
2754
- The input dataset to be filtered. Must contain a time dimension.
2755
- time_dim: str
2756
- Name of time dimension.
2757
- start_time : datetime
2758
- The start time for selecting relevant data.
2759
- end_time : Optional[datetime], optional
2760
- The end time for selecting relevant data. If not provided, only data at the start_time is selected if start_time is provided.
2761
- climatology : bool
2762
- Indicates whether the dataset is climatological. Defaults to False.
2763
-
2764
- Returns
2765
- -------
2766
- xr.Dataset
2767
- A dataset filtered to the specified time range, including the closest entries
2768
- at or before `start_time` and at or after `end_time` if applicable.
2769
-
2770
- Raises
2771
- ------
2772
- ValueError
2773
- If no matching times are found between `start_time` and `start_time + 24 hours`.
2774
-
2775
- Warns
2776
- -----
2777
- UserWarning
2778
- If the dataset contains exactly 12 time steps but the climatology flag is not set.
2779
- This may indicate that the dataset represents climatology data.
2780
-
2781
- UserWarning
2782
- If no records at or before `start_time` or no records at or after `end_time` are found.
2783
-
2784
- UserWarning
2785
- If the dataset does not contain any time dimension or the time dimension is incorrectly named.
2786
-
2787
- Notes
2788
- -----
2789
- - If the `climatology` flag is set and `end_time` is not provided, the method will
2790
- interpolate initial conditions from climatology data.
2791
- - If the dataset uses `cftime` datetime objects, these will be converted to standard
2792
- `np.datetime64` objects before filtering.
2793
- """
2794
- if time_dim in ds.variables:
2795
- if climatology:
2796
- if len(ds[time_dim]) != 12:
2797
- raise ValueError(
2798
- f"The dataset contains {len(ds[time_dim])} time steps, but the climatology flag is set to True, which requires exactly 12 time steps."
2799
- )
2800
- if not end_time:
2801
- # Convert from timedelta64[ns] to fractional days
2802
- ds["time"] = ds["time"] / np.timedelta64(1, "D")
2803
- # Interpolate from climatology for initial conditions
2804
- ds = interpolate_from_climatology(ds, time_dim, start_time)
2805
- else:
2806
- time_type = get_time_type(ds[time_dim])
2807
- if time_type == "int":
2808
- raise ValueError(
2809
- "The dataset contains integer time values, which are only supported when the climatology flag is set to True. However, your climatology flag is set to False."
2810
- )
2811
- if time_type == "cftime":
2812
- ds = ds.assign_coords(
2813
- {time_dim: convert_cftime_to_datetime(ds[time_dim])}
2814
- )
2815
- if end_time:
2816
- end_time = end_time
2817
-
2818
- # Identify records before or at start_time
2819
- before_start = ds[time_dim] <= np.datetime64(start_time)
2820
- if before_start.any():
2821
- closest_before_start = (
2822
- ds[time_dim].where(before_start, drop=True).max()
2823
- )
2824
- else:
2825
- logging.warning("No records found at or before the start_time.")
2826
- closest_before_start = ds[time_dim].min()
2827
-
2828
- # Identify records after or at end_time
2829
- after_end = ds[time_dim] >= np.datetime64(end_time)
2830
- if after_end.any():
2831
- closest_after_end = ds[time_dim].where(after_end, drop=True).min()
2832
- else:
2833
- logging.warning("No records found at or after the end_time.")
2834
- closest_after_end = ds[time_dim].max()
2835
-
2836
- # Select records within the time range and add the closest before/after
2837
- within_range = (ds[time_dim] > np.datetime64(start_time)) & (
2838
- ds[time_dim] < np.datetime64(end_time)
2839
- )
2840
- selected_times = ds[time_dim].where(
2841
- within_range
2842
- | (ds[time_dim] == closest_before_start)
2843
- | (ds[time_dim] == closest_after_end),
2844
- drop=True,
2845
- )
2846
- ds = ds.sel({time_dim: selected_times})
2847
- else:
2848
- # Look in time range [start_time, start_time + 24h]
2849
- end_time = start_time + timedelta(days=1)
2850
- times = (np.datetime64(start_time) <= ds[time_dim]) & (
2851
- ds[time_dim] < np.datetime64(end_time)
2852
- )
2853
- if np.all(~times):
2854
- raise ValueError(
2855
- f"The dataset does not contain any time entries between the specified start_time: {start_time} "
2856
- f"and {start_time + timedelta(hours=24)}. "
2857
- "Please ensure the dataset includes time entries for that range."
2858
- )
2859
-
2860
- ds = ds.where(times, drop=True)
2861
- if ds.sizes[time_dim] > 1:
2862
- # Pick the time closest to start_time
2863
- ds = ds.isel({time_dim: 0})
2864
- logging.info(
2865
- f"Selected time entry closest to the specified start_time ({start_time}) within the range [{start_time}, {start_time + timedelta(hours=24)}]: {ds[time_dim].values}"
2866
- )
2867
- else:
2868
- logging.warning(
2869
- "Dataset does not contain any time information. Please check if the time dimension "
2870
- "is correctly named or if the dataset includes time data."
2871
- )
2872
-
2873
- return ds
2874
-
2875
-
2876
- def decode_string(byte_array):
2877
- # Decode each byte and handle errors with 'ignore'
2878
- decoded_string = "".join(
2879
- [
2880
- x.decode("utf-8", errors="ignore") # Ignore invalid byte sequences
2881
- for x in byte_array.values
2882
- if isinstance(x, bytes) and x != b" " and x is not np.nan
2883
- ]
2884
- )
2885
-
2886
- return decoded_string
2887
-
2888
-
2889
2198
  def modified_julian_days(year, month, day, hour=0):
2890
2199
  """Calculate the Modified Julian Day (MJD) for a given date and time.
2891
2200
 
@@ -2943,77 +2252,273 @@ def modified_julian_days(year, month, day, hour=0):
2943
2252
  return mjd
2944
2253
 
2945
2254
 
2946
- def get_indices_of_nearest_grid_cell_for_rivers(
2947
- dist: xr.DataArray, data: RiverDataset
2948
- ) -> dict[str, list[tuple[int, int]]]:
2949
- """Get the indices of the nearest grid cell for each river based on distance.
2255
+ def _concatenate_longitudes(
2256
+ ds: xr.Dataset,
2257
+ dim_names: Mapping[str, str],
2258
+ end: TConcatEndTypes,
2259
+ use_dask: bool = False,
2260
+ ) -> xr.Dataset:
2261
+ """
2262
+ Concatenate longitude dimension to handle global grids that cross
2263
+ the 0/360-degree or -180/180-degree boundary.
2264
+
2265
+ Extends the longitude dimension either lower, upper, or both sides
2266
+ by +/- 360 degrees and duplicates the corresponding variables along
2267
+ that dimension.
2950
2268
 
2951
2269
  Parameters
2952
2270
  ----------
2953
- dist : xr.DataArray
2954
- A 2D or 3D array representing distances from each river to coastal grid cells,
2955
- with dimensions including "eta_rho" and "xi_rho".
2956
- data : RiverDataset
2957
- An instance of RiverDataset containing river names and dimension metadata.
2271
+ ds : xr.Dataset
2272
+ Input xarray Dataset to be concatenated.
2273
+ dim_names : Mapping[str, str]
2274
+ Dictionary or mapping containing dimension names. Must include "longitude".
2275
+ end : str
2276
+ Specifies which side(s) to extend:
2277
+ - "lower": extend by subtracting 360 degrees.
2278
+ - "upper": extend by adding 360 degrees.
2279
+ - "both": extend on both sides.
2280
+ use_dask : bool, default False
2281
+ If True, chunk the concatenated longitude dimension using Dask.
2958
2282
 
2959
2283
  Returns
2960
2284
  -------
2961
- dict[str, list[tuple[int, int]]]
2962
- Dictionary mapping each river name to a list containing the (eta_rho, xi_rho) index
2963
- of the closest coastal grid cell.
2285
+ xr.Dataset
2286
+ Dataset with longitude dimension extended and data variables duplicated.
2287
+
2288
+ Notes
2289
+ -----
2290
+ Only data variables containing the longitude dimension are concatenated;
2291
+ others are left unchanged.
2964
2292
  """
2965
- # Find indices of the nearest coastal grid cell for each river
2966
- indices = dist.argmin(dim=["eta_rho", "xi_rho"])
2967
-
2968
- eta_rho_values = indices["eta_rho"].values
2969
- xi_rho_values = indices["xi_rho"].values
2970
-
2971
- # Get the corresponding station indices and river names
2972
- stations = indices["eta_rho"][data.dim_names["station"]].values
2973
- names = (
2974
- data.ds[data.var_names["name"]]
2975
- .sel({data.dim_names["station"]: stations})
2976
- .values
2977
- )
2293
+ ds_concat = xr.Dataset()
2294
+
2295
+ lon_name = dim_names["longitude"]
2296
+ lon = ds[lon_name]
2297
+
2298
+ match end:
2299
+ case "lower":
2300
+ lon_concat = xr.concat([lon - 360, lon], dim=lon_name)
2301
+ n_copies = 2
2302
+ case "upper":
2303
+ lon_concat = xr.concat([lon, lon + 360], dim=lon_name)
2304
+ n_copies = 2
2305
+ case "both":
2306
+ lon_concat = xr.concat([lon - 360, lon, lon + 360], dim=lon_name)
2307
+ n_copies = 3
2308
+ case _:
2309
+ raise ValueError(f"Invalid `end` value: {end}")
2310
+
2311
+ for var in ds.variables:
2312
+ if lon_name in ds[var].dims:
2313
+ field = ds[var]
2314
+ field_concat = xr.concat([field] * n_copies, dim=lon_name)
2315
+
2316
+ if use_dask:
2317
+ field_concat = field_concat.chunk({lon_name: -1})
2318
+
2319
+ ds_concat[var] = field_concat
2320
+ else:
2321
+ ds_concat[var] = ds[var]
2978
2322
 
2979
- # Build dictionary of river name to grid index
2980
- river_indices = {
2981
- str(names[i]): [(int(eta_rho_values[i]), int(xi_rho_values[i]))]
2982
- for i in range(len(stations))
2983
- }
2323
+ ds_concat = ds_concat.assign_coords({lon_name: lon_concat.values})
2984
2324
 
2985
- return river_indices
2325
+ return ds_concat
2986
2326
 
2987
2327
 
2988
- def _deduplicate_river_names(
2989
- ds: xr.Dataset, name_var: str, station_dim: str
2328
+ def choose_subdomain(
2329
+ ds: xr.Dataset,
2330
+ dim_names: Mapping[str, str],
2331
+ resolution: float,
2332
+ is_global: bool,
2333
+ target_coords: Mapping[str, Any],
2334
+ buffer_points: int = 20,
2335
+ use_dask: bool = False,
2990
2336
  ) -> xr.Dataset:
2991
- """Ensure river names are unique by appending _1, _2 to duplicates, excluding non-
2992
- duplicates.
2993
2337
  """
2994
- original = ds[name_var]
2338
+ Select a subdomain from an xarray Dataset based on target coordinates,
2339
+ with optional buffer points and global longitude handling.
2340
+
2341
+ Parameters
2342
+ ----------
2343
+ ds : xr.Dataset
2344
+ The full xarray Dataset to subset.
2345
+ dim_names : Mapping[str, str]
2346
+ Dictionary mapping logical dimension names to dataset dimension names.
2347
+ Example: {"latitude": "latitude", "longitude": "longitude"}.
2348
+ resolution : float
2349
+ Spatial resolution of the dataset, used to compute buffer margin.
2350
+ is_global : bool
2351
+ Whether the dataset covers global longitude (affects concatenation logic).
2352
+ target_coords : Mapping[str, Any]
2353
+ Dictionary containing target latitude and longitude coordinates.
2354
+ Expected keys: "lat", "lon", and "straddle" (boolean for crossing 180°).
2355
+ buffer_points : int, default 20
2356
+ Number of grid points to extend beyond the target coordinates.
2357
+ use_dask: bool, optional
2358
+ Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is processed eagerly. Defaults to False.
2995
2359
 
2996
- # Force cast to plain Python strings
2997
- names = [str(name) for name in original.values]
2360
+ Returns
2361
+ -------
2362
+ xr.Dataset
2363
+ Subset of the input Dataset covering the requested coordinates plus buffer.
2998
2364
 
2999
- # Count all names
3000
- name_counts = Counter(names)
3001
- seen = defaultdict(int)
2365
+ Raises
2366
+ ------
2367
+ ValueError
2368
+ If the selected latitude or longitude range does not intersect the dataset.
2369
+ """
2370
+ lat_min = target_coords["lat"].min().values
2371
+ lat_max = target_coords["lat"].max().values
2372
+ lon_min = target_coords["lon"].min().values
2373
+ lon_max = target_coords["lon"].max().values
3002
2374
 
3003
- unique_names = []
3004
- for name in names:
3005
- if name_counts[name] > 1:
3006
- seen[name] += 1
3007
- unique_names.append(f"{name}_{seen[name]}")
3008
- else:
3009
- unique_names.append(name)
2375
+ margin = resolution * buffer_points
2376
+
2377
+ # Select the subdomain in latitude direction (so that we have to concatenate fewer latitudes below if concatenation is necessary)
2378
+ subdomain = ds.sel(
2379
+ **{
2380
+ dim_names["latitude"]: slice(lat_min - margin, lat_max + margin),
2381
+ }
2382
+ )
2383
+ lon = subdomain[dim_names["longitude"]]
2384
+
2385
+ if is_global:
2386
+ concats = []
2387
+ # Concatenate only if necessary
2388
+ if lon_max + margin > lon.max():
2389
+ # See if shifting by +360 degrees helps
2390
+ if (lon_min - margin > (lon + 360).min()) and (
2391
+ lon_max + margin < (lon + 360).max()
2392
+ ):
2393
+ subdomain[dim_names["longitude"]] = lon + 360
2394
+ lon = subdomain[dim_names["longitude"]]
2395
+ else:
2396
+ concats.append("upper")
2397
+ if lon_min - margin < lon.min():
2398
+ # See if shifting by -360 degrees helps
2399
+ if (lon_min - margin > (lon - 360).min()) and (
2400
+ lon_max + margin < (lon - 360).max()
2401
+ ):
2402
+ subdomain[dim_names["longitude"]] = lon - 360
2403
+ lon = subdomain[dim_names["longitude"]]
2404
+ else:
2405
+ concats.append("lower")
2406
+
2407
+ if concats:
2408
+ end = "both" if len(concats) == 2 else concats[0]
2409
+ end = cast(TConcatEndTypes, end)
2410
+ subdomain = _concatenate_longitudes(
2411
+ subdomain, dim_names=dim_names, end=end, use_dask=use_dask
2412
+ )
2413
+ lon = subdomain[dim_names["longitude"]]
2414
+
2415
+ else:
2416
+ # Adjust longitude range if needed to match the expected range
2417
+ if not target_coords["straddle"]:
2418
+ if lon.min() < -180:
2419
+ if lon_max + margin > 0:
2420
+ lon_min -= 360
2421
+ lon_max -= 360
2422
+ elif lon.min() < 0:
2423
+ if lon_max + margin > 180:
2424
+ lon_min -= 360
2425
+ lon_max -= 360
2426
+
2427
+ if target_coords["straddle"]:
2428
+ if lon.max() > 360:
2429
+ if lon_min - margin < 180:
2430
+ lon_min += 360
2431
+ lon_max += 360
2432
+ elif lon.max() > 180:
2433
+ if lon_min - margin < 0:
2434
+ lon_min += 360
2435
+ lon_max += 360
2436
+ # Select the subdomain in longitude direction
2437
+ subdomain = subdomain.sel(
2438
+ **{
2439
+ dim_names["longitude"]: slice(lon_min - margin, lon_max + margin),
2440
+ }
2441
+ )
2442
+ # Check if the selected subdomain has zero dimensions in latitude or longitude
2443
+ if (
2444
+ dim_names["latitude"] not in subdomain
2445
+ or subdomain[dim_names["latitude"]].size == 0
2446
+ ):
2447
+ raise ValueError("Selected latitude range does not intersect with dataset.")
2448
+
2449
+ if (
2450
+ dim_names["longitude"] not in subdomain
2451
+ or subdomain[dim_names["longitude"]].size == 0
2452
+ ):
2453
+ raise ValueError("Selected longitude range does not intersect with dataset.")
2454
+
2455
+ # Adjust longitudes to expected range if needed
2456
+ lon = subdomain[dim_names["longitude"]]
2457
+ if target_coords["straddle"]:
2458
+ subdomain[dim_names["longitude"]] = xr.where(lon > 180, lon - 360, lon)
2459
+ else:
2460
+ subdomain[dim_names["longitude"]] = xr.where(lon < 0, lon + 360, lon)
3010
2461
 
3011
- # Replace with updated names while preserving dtype, dims, attrs
3012
- updated_array = xr.DataArray(
3013
- data=np.array(unique_names, dtype=f"<U{max(len(n) for n in unique_names)}"),
3014
- dims=original.dims,
3015
- attrs=original.attrs,
2462
+ return subdomain
2463
+
2464
+
2465
+ def get_glorys_bounds(
2466
+ grid: Grid,
2467
+ glorys_grid_path: Path | str | None = None,
2468
+ ) -> dict[str, float]:
2469
+ """
2470
+ Compute the latitude/longitude bounds of a GLORYS spatial subset
2471
+ that fully covers the given ROMS grid (with margin for regridding).
2472
+
2473
+ Parameters
2474
+ ----------
2475
+ grid : Grid
2476
+ The grid object.
2477
+ glorys_grid_path : str, optional
2478
+ Path to the GLORYS global grid file. If None, defaults to
2479
+ "<repo_root>/data/grids/GLORYS_global_grid.nc".
2480
+
2481
+ Returns
2482
+ -------
2483
+ dict[str, float]
2484
+ Dictionary containing the bounding box values:
2485
+
2486
+ - `"minimum_latitude"` : float
2487
+ - `"maximum_latitude"` : float
2488
+ - `"minimum_longitude"` : float
2489
+ - `"maximum_longitude"` : float
2490
+
2491
+ Notes
2492
+ -----
2493
+ - The resolution is estimated as the mean of latitude and longitude spacing.
2494
+ """
2495
+ if glorys_grid_path is None:
2496
+ glorys_grid_path = GLORYS_GLOBAL_GRID_PATH
2497
+
2498
+ ds = xr.open_dataset(glorys_grid_path)
2499
+
2500
+ # Estimate grid resolution (mean spacing in degrees)
2501
+ res_lat = ds.latitude.diff("latitude").mean()
2502
+ res_lon = ds.longitude.diff("longitude").mean()
2503
+ resolution = (res_lat + res_lon) / 2
2504
+
2505
+ # Extract target grid coordinates
2506
+ target_coords = get_target_coords(grid)
2507
+
2508
+ # Select subdomain with margin
2509
+ ds_subset = choose_subdomain(
2510
+ ds=ds,
2511
+ dim_names={"latitude": "latitude", "longitude": "longitude"},
2512
+ resolution=resolution,
2513
+ is_global=True,
2514
+ target_coords=target_coords,
2515
+ buffer_points=DEFAULT_NR_BUFFER_POINTS + 1,
3016
2516
  )
3017
- ds[name_var] = updated_array
3018
2517
 
3019
- return ds
2518
+ # Compute bounds
2519
+ return {
2520
+ "minimum_latitude": float(ds_subset.latitude.min()),
2521
+ "maximum_latitude": float(ds_subset.latitude.max()),
2522
+ "minimum_longitude": float(ds_subset.longitude.min()),
2523
+ "maximum_longitude": float(ds_subset.longitude.max()),
2524
+ }