roms-tools 2.2.1__py3-none-any.whl → 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. ci/environment.yml +1 -0
  2. roms_tools/__init__.py +2 -0
  3. roms_tools/analysis/roms_output.py +590 -0
  4. roms_tools/{setup/download.py → download.py} +3 -0
  5. roms_tools/{setup/plot.py → plot.py} +34 -28
  6. roms_tools/setup/boundary_forcing.py +199 -203
  7. roms_tools/setup/datasets.py +60 -136
  8. roms_tools/setup/grid.py +40 -67
  9. roms_tools/setup/initial_conditions.py +249 -247
  10. roms_tools/setup/nesting.py +6 -27
  11. roms_tools/setup/river_forcing.py +41 -76
  12. roms_tools/setup/surface_forcing.py +125 -75
  13. roms_tools/setup/tides.py +31 -51
  14. roms_tools/setup/topography.py +1 -1
  15. roms_tools/setup/utils.py +44 -224
  16. roms_tools/tests/test_analysis/test_roms_output.py +269 -0
  17. roms_tools/tests/{test_setup/test_regrid.py → test_regrid.py} +1 -1
  18. roms_tools/tests/test_setup/test_boundary_forcing.py +221 -58
  19. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/.zattrs +5 -3
  20. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/.zmetadata +156 -121
  21. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/abs_time/.zarray +2 -2
  22. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/abs_time/.zattrs +2 -1
  23. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/abs_time/0 +0 -0
  24. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/bry_time/.zarray +2 -2
  25. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/bry_time/.zattrs +1 -1
  26. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/bry_time/0 +0 -0
  27. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_east/.zarray +4 -4
  28. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_east/0.0.0 +0 -0
  29. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_north/.zarray +4 -4
  30. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_north/0.0.0 +0 -0
  31. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_south/.zarray +4 -4
  32. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_south/0.0.0 +0 -0
  33. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_west/.zarray +4 -4
  34. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/salt_west/0.0.0 +0 -0
  35. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_east/.zarray +4 -4
  36. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_east/0.0.0 +0 -0
  37. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_north/.zarray +4 -4
  38. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_north/0.0.0 +0 -0
  39. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_south/.zarray +4 -4
  40. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_south/0.0.0 +0 -0
  41. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_west/.zarray +4 -4
  42. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/temp_west/0.0.0 +0 -0
  43. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_east/.zarray +4 -4
  44. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_east/0.0.0 +0 -0
  45. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_north/.zarray +4 -4
  46. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_north/0.0.0 +0 -0
  47. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_south/.zarray +4 -4
  48. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_south/0.0.0 +0 -0
  49. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_west/.zarray +4 -4
  50. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/u_west/0.0.0 +0 -0
  51. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_east/.zarray +4 -4
  52. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_east/0.0 +0 -0
  53. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_north/.zarray +4 -4
  54. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_north/0.0 +0 -0
  55. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_south/.zarray +4 -4
  56. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_south/0.0 +0 -0
  57. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_west/.zarray +4 -4
  58. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/ubar_west/0.0 +0 -0
  59. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_east/.zarray +4 -4
  60. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_east/0.0.0 +0 -0
  61. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_north/.zarray +4 -4
  62. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_north/0.0.0 +0 -0
  63. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_south/.zarray +4 -4
  64. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_south/0.0.0 +0 -0
  65. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_west/.zarray +4 -4
  66. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/v_west/0.0.0 +0 -0
  67. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_east/.zarray +4 -4
  68. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_east/0.0 +0 -0
  69. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_north/.zarray +4 -4
  70. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_north/0.0 +0 -0
  71. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_south/.zarray +4 -4
  72. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_south/0.0 +0 -0
  73. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_west/.zarray +4 -4
  74. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/vbar_west/0.0 +0 -0
  75. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_east/.zarray +4 -4
  76. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_east/.zattrs +8 -0
  77. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_east/0.0 +0 -0
  78. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_north/.zarray +4 -4
  79. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_north/.zattrs +8 -0
  80. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_north/0.0 +0 -0
  81. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_south/.zarray +4 -4
  82. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_south/.zattrs +8 -0
  83. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_south/0.0 +0 -0
  84. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_west/.zarray +4 -4
  85. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_west/.zattrs +8 -0
  86. roms_tools/tests/test_setup/test_data/boundary_forcing.zarr/zeta_west/0.0 +0 -0
  87. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/.zattrs +4 -4
  88. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/.zmetadata +4 -4
  89. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/angle/0.0 +0 -0
  90. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/angle_coarse/0.0 +0 -0
  91. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/f/0.0 +0 -0
  92. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/h/0.0 +0 -0
  93. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/lat_coarse/0.0 +0 -0
  94. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/lat_rho/0.0 +0 -0
  95. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/lat_u/0.0 +0 -0
  96. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/lat_v/0.0 +0 -0
  97. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/lon_coarse/0.0 +0 -0
  98. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/lon_rho/0.0 +0 -0
  99. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/lon_u/0.0 +0 -0
  100. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/lon_v/0.0 +0 -0
  101. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_coarse/0.0 +0 -0
  102. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_rho/0.0 +0 -0
  103. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_u/0.0 +0 -0
  104. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/mask_v/0.0 +0 -0
  105. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/pm/0.0 +0 -0
  106. roms_tools/tests/test_setup/test_data/grid_that_straddles_dateline.zarr/pn/0.0 +0 -0
  107. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/.zattrs +2 -1
  108. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/.zmetadata +6 -4
  109. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/Cs_r/.zattrs +1 -1
  110. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/Cs_w/.zattrs +1 -1
  111. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/NH4/0.0.0.0 +0 -0
  112. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/NO3/0.0.0.0 +0 -0
  113. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/PO4/0.0.0.0 +0 -0
  114. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/abs_time/.zattrs +1 -0
  115. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/diatSi/0.0.0.0 +0 -0
  116. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/ocean_time/.zattrs +1 -1
  117. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/spC/0.0.0.0 +0 -0
  118. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/spCaCO3/0.0.0.0 +0 -0
  119. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/spFe/0.0.0.0 +0 -0
  120. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/temp/0.0.0.0 +0 -0
  121. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/u/0.0.0.0 +0 -0
  122. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/ubar/0.0.0 +0 -0
  123. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/v/0.0.0.0 +0 -0
  124. roms_tools/tests/test_setup/test_data/initial_conditions_with_bgc_from_climatology.zarr/vbar/0.0.0 +0 -0
  125. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/.zmetadata +30 -0
  126. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_location/.zarray +22 -0
  127. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_location/.zattrs +8 -0
  128. roms_tools/tests/test_setup/test_data/river_forcing_no_climatology.zarr/river_location/0.0 +0 -0
  129. roms_tools/tests/test_setup/test_data/river_forcing_with_bgc.zarr/.zmetadata +30 -0
  130. roms_tools/tests/test_setup/test_data/river_forcing_with_bgc.zarr/river_location/.zarray +22 -0
  131. roms_tools/tests/test_setup/test_data/river_forcing_with_bgc.zarr/river_location/.zattrs +8 -0
  132. roms_tools/tests/test_setup/test_data/river_forcing_with_bgc.zarr/river_location/0.0 +0 -0
  133. roms_tools/tests/test_setup/test_datasets.py +1 -1
  134. roms_tools/tests/test_setup/test_grid.py +1 -14
  135. roms_tools/tests/test_setup/test_initial_conditions.py +205 -67
  136. roms_tools/tests/test_setup/test_nesting.py +0 -16
  137. roms_tools/tests/test_setup/test_river_forcing.py +9 -37
  138. roms_tools/tests/test_setup/test_surface_forcing.py +103 -74
  139. roms_tools/tests/test_setup/test_tides.py +5 -17
  140. roms_tools/tests/test_setup/test_topography.py +1 -1
  141. roms_tools/tests/test_setup/test_utils.py +57 -1
  142. roms_tools/tests/{test_utils.py → test_tiling/test_partition.py} +1 -1
  143. roms_tools/tiling/partition.py +338 -0
  144. roms_tools/utils.py +310 -276
  145. roms_tools/vertical_coordinate.py +227 -0
  146. {roms_tools-2.2.1.dist-info → roms_tools-2.4.0.dist-info}/METADATA +1 -1
  147. {roms_tools-2.2.1.dist-info → roms_tools-2.4.0.dist-info}/RECORD +151 -142
  148. roms_tools/setup/vertical_coordinate.py +0 -109
  149. /roms_tools/{setup/regrid.py → regrid.py} +0 -0
  150. {roms_tools-2.2.1.dist-info → roms_tools-2.4.0.dist-info}/LICENSE +0 -0
  151. {roms_tools-2.2.1.dist-info → roms_tools-2.4.0.dist-info}/WHEEL +0 -0
  152. {roms_tools-2.2.1.dist-info → roms_tools-2.4.0.dist-info}/top_level.txt +0 -0
roms_tools/utils.py CHANGED
@@ -1,312 +1,323 @@
1
- from numbers import Integral
2
-
3
- import numpy as np
4
1
  import xarray as xr
5
- from typing import Union
6
2
  from pathlib import Path
3
+ import re
4
+ import glob
5
+ import logging
7
6
 
8
7
 
9
- def partition(
10
- ds: xr.Dataset, np_eta: int = 1, np_xi: int = 1
11
- ) -> tuple[list[int], list[xr.Dataset]]:
12
- """Partition a ROMS (Regional Ocean Modeling System) dataset into smaller spatial
13
- tiles.
14
-
15
- This function divides the input dataset into `np_eta` by `np_xi` tiles, where each tile
16
- represents a subdomain of the original dataset. The partitioning is performed along
17
- the spatial dimensions `eta_rho`, `xi_rho`, `eta_v`, `xi_u`, `eta_psi`, `xi_psi`, `eta_coarse`, and `xi_coarse`,
18
- depending on which dimensions are present in the dataset.
8
+ def _load_data(
9
+ filename,
10
+ dim_names,
11
+ use_dask,
12
+ time_chunking=True,
13
+ decode_times=True,
14
+ force_combine_nested=False,
15
+ ):
16
+ """Load dataset from the specified file.
19
17
 
20
18
  Parameters
21
19
  ----------
22
- ds : xr.Dataset
23
- The input ROMS dataset that is to be partitioned.
24
-
25
- np_eta : int, optional
26
- The number of partitions along the `eta` direction. Must be a positive integer. Default is 1.
27
-
28
- np_xi : int, optional
29
- The number of partitions along the `xi` direction. Must be a positive integer. Default is 1.
20
+ filename : Union[str, Path, List[Union[str, Path]]]
21
+ The path to the data file(s). Can be a single string (with or without wildcards), a single Path object,
22
+ or a list of strings or Path objects containing multiple files.
23
+ dim_names : Dict[str, str], optional
24
+ Dictionary specifying the names of dimensions in the dataset.
25
+ Required only for lat-lon datasets to map dimension names like "latitude" and "longitude".
26
+ For ROMS datasets, this parameter can be omitted, as default ROMS dimensions ("eta_rho", "xi_rho", "s_rho") are assumed.
27
+ use_dask: bool
28
+ Indicates whether to use dask for chunking. If True, data is loaded with dask; if False, data is loaded eagerly. Defaults to False.
29
+ time_chunking : bool, optional
30
+ If True and `use_dask=True`, the data will be chunked along the time dimension with a chunk size of 1.
31
+ If False, the data will not be chunked explicitly along the time dimension, but will follow the default auto chunking scheme. This option is useful for ROMS restart files.
32
+ Defaults to True.
33
+ decode_times: bool, optional
34
+ If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers.
35
+ Defaults to True.
36
+ force_combine_nested: bool, optional
37
+ If True, forces the use of nested combination (`combine_nested`) regardless of whether wildcards are used.
38
+ Defaults to False.
30
39
 
31
40
  Returns
32
41
  -------
33
- tuple[list[int], list[xr.Dataset]]
34
- A tuple containing two elements:
35
-
36
- - A list of integers representing the file numbers associated with each partition.
37
- - A list of `xarray.Dataset` objects, each representing a partitioned subdomain of the original dataset.
42
+ ds : xr.Dataset
43
+ The loaded xarray Dataset containing the forcing data.
38
44
 
39
45
  Raises
40
46
  ------
47
+ FileNotFoundError
48
+ If the specified file does not exist.
41
49
  ValueError
42
- If `np_eta` or `np_xi` is not a positive integer, or if the dataset cannot be evenly partitioned
43
- into the specified number of tiles.
44
-
45
-
46
- Example
47
- -------
48
- >>> partitioned_file_numbers, partitioned_datasets = partition(
49
- ... ds, np_eta=2, np_xi=3
50
- ... )
51
- >>> print(partitioned_file_numbers)
52
- [0, 1, 2, 3, 4, 5]
53
- >>> print([ds.sizes for ds in partitioned_datasets])
54
- [{'eta_rho': 50, 'xi_rho': 50}, {'eta_rho': 50, 'xi_rho': 50}, ...]
55
-
56
- This example partitions the dataset into 2 tiles along the `eta` direction and 3 tiles
57
- along the `xi` direction, resulting in a total of 6 partitions.
50
+ If a list of files is provided but dim_names["time"] is not available or use_dask=False.
58
51
  """
52
+ if dim_names is None:
53
+ dim_names = {}
59
54
 
60
- if (
61
- not isinstance(np_eta, Integral)
62
- or np_eta < 1
63
- or not isinstance(np_xi, Integral)
64
- or np_xi < 1
65
- ):
66
- raise ValueError("np_eta and np_xi must be positive integers")
67
-
68
- partitionable_dims_maybe_present = [
69
- "eta_rho",
70
- "xi_rho",
71
- "eta_v",
72
- "xi_u",
73
- "eta_psi",
74
- "xi_psi",
75
- "eta_coarse",
76
- "xi_coarse",
77
- ]
78
- dims_to_partition = [d for d in partitionable_dims_maybe_present if d in ds.dims]
79
-
80
- # if eta is periodic there are no ghost cells along those dimensions
81
- if "eta_v" in ds.sizes and ds.sizes["eta_rho"] == ds.sizes["eta_v"]:
82
- # TODO how are we supposed to know if eta is periodic if eta_v doesn't appear? partit.F doesn't say...
83
- n_eta_ghost_cells = 0
84
- else:
85
- n_eta_ghost_cells = 1
55
+ # Precompile the regex for matching wildcard characters
56
+ wildcard_regex = re.compile(r"[\*\?\[\]]")
86
57
 
87
- # if xi is periodic there are no ghost cells along those dimensions
88
- if "xi_u" in ds.sizes and ds.sizes["xi_rho"] == ds.sizes["xi_u"]:
89
- n_xi_ghost_cells = 0
58
+ # Convert Path objects to strings
59
+ if isinstance(filename, (str, Path)):
60
+ filename_str = str(filename)
61
+ elif isinstance(filename, list):
62
+ filename_str = [str(f) for f in filename]
90
63
  else:
91
- n_xi_ghost_cells = 1
92
-
93
- def integer_division_or_raise(a: int, b: int, dimension: str) -> int:
94
- """Perform integer division and ensure that the division is exact.
95
-
96
- Parameters
97
- ----------
98
- a : int
99
- The numerator for the division.
100
- b : int
101
- The denominator for the division.
102
- dimension : str
103
- The name of the dimension being partitioned, used for error reporting.
104
-
105
- Returns
106
- -------
107
- int
108
- The result of the integer division.
109
-
110
- Raises
111
- ------
112
- ValueError
113
- If the division is not exact, indicating that the domain cannot be evenly divided
114
- along the specified dimension.
115
- """
116
- remainder = a % b
117
- if remainder == 0:
118
- return a // b
64
+ raise ValueError("filename must be a string, Path, or a list of strings/Paths.")
65
+
66
+ # Handle the case when filename is a string
67
+ contains_wildcard = False
68
+ if isinstance(filename_str, str):
69
+ contains_wildcard = bool(wildcard_regex.search(filename_str))
70
+ if contains_wildcard:
71
+ matching_files = glob.glob(filename_str)
72
+ if not matching_files:
73
+ raise FileNotFoundError(
74
+ f"No files found matching the pattern '{filename_str}'."
75
+ )
119
76
  else:
120
- raise ValueError(
121
- f"Dimension '{dimension}' of size {a} cannot be evenly divided into {b} partitions."
122
- )
77
+ matching_files = [filename_str]
78
+
79
+ # Handle the case when filename is a list
80
+ elif isinstance(filename_str, list):
81
+ contains_wildcard = any(wildcard_regex.search(f) for f in filename_str)
82
+ if contains_wildcard:
83
+ matching_files = []
84
+ for f in filename_str:
85
+ files = glob.glob(f)
86
+ if not files:
87
+ raise FileNotFoundError(
88
+ f"No files found matching the pattern '{f}'."
89
+ )
90
+ matching_files.extend(files)
91
+ else:
92
+ matching_files = filename_str
123
93
 
124
- if "eta_rho" in dims_to_partition:
125
- eta_rho_domain_size = integer_division_or_raise(
126
- ds.sizes["eta_rho"] - 2 * n_eta_ghost_cells, np_eta, "eta_rho"
127
- )
94
+ # Sort the matching files
95
+ matching_files = sorted(matching_files)
128
96
 
129
- if "xi_rho" in dims_to_partition:
130
- xi_rho_domain_size = integer_division_or_raise(
131
- ds.sizes["xi_rho"] - 2 * n_xi_ghost_cells, np_xi, "xi_rho"
97
+ # Check if time dimension is available when multiple files are provided
98
+ if isinstance(filename_str, list) and "time" not in dim_names:
99
+ raise ValueError(
100
+ "A list of files is provided, but time dimension is not available. "
101
+ "A time dimension must be available to concatenate the files."
132
102
  )
133
103
 
134
- if "eta_v" in dims_to_partition:
135
- eta_v_domain_size = integer_division_or_raise(
136
- ds.sizes["eta_v"] - 1 * n_eta_ghost_cells, np_eta, "eta_v"
137
- )
104
+ # Determine the kwargs for combining datasets
105
+ if force_combine_nested:
106
+ kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
107
+ elif contains_wildcard or len(matching_files) == 1:
108
+ kwargs = {"combine": "by_coords"}
109
+ else:
110
+ kwargs = {"combine": "nested", "concat_dim": dim_names["time"]}
138
111
 
139
- if "xi_u" in dims_to_partition:
140
- xi_u_domain_size = integer_division_or_raise(
141
- ds.sizes["xi_u"] - 1 * n_xi_ghost_cells, np_xi, "xi_u"
142
- )
112
+ # Base kwargs used for dataset combination
113
+ combine_kwargs = {
114
+ "coords": "minimal",
115
+ "compat": "override",
116
+ "combine_attrs": "override",
117
+ }
143
118
 
144
- if "eta_psi" in dims_to_partition:
145
- eta_psi_domain_size = integer_division_or_raise(
146
- ds.sizes["eta_psi"] - 3 * n_eta_ghost_cells, np_eta, "eta_psi"
147
- )
119
+ if use_dask:
148
120
 
149
- if "xi_psi" in dims_to_partition:
150
- xi_psi_domain_size = integer_division_or_raise(
151
- ds.sizes["xi_psi"] - 3 * n_xi_ghost_cells, np_xi, "xi_psi"
121
+ if "latitude" in dim_names and "longitude" in dim_names:
122
+ # for lat-lon datasets
123
+ chunks = {
124
+ dim_names["latitude"]: -1,
125
+ dim_names["longitude"]: -1,
126
+ }
127
+ else:
128
+ # For ROMS datasets
129
+ chunks = {
130
+ "eta_rho": -1,
131
+ "eta_v": -1,
132
+ "xi_rho": -1,
133
+ "xi_u": -1,
134
+ "s_rho": -1,
135
+ }
136
+
137
+ if "depth" in dim_names:
138
+ chunks[dim_names["depth"]] = -1
139
+ if "time" in dim_names and time_chunking:
140
+ chunks[dim_names["time"]] = 1
141
+
142
+ ds = xr.open_mfdataset(
143
+ matching_files,
144
+ decode_times=decode_times,
145
+ chunks=chunks,
146
+ **combine_kwargs,
147
+ **kwargs,
152
148
  )
153
149
 
154
- if "eta_coarse" in dims_to_partition:
155
- eta_coarse_domain_size = integer_division_or_raise(
156
- ds.sizes["eta_coarse"] - 2 * n_eta_ghost_cells, np_eta, "eta_coarse"
157
- )
158
- if "xi_coarse" in dims_to_partition:
159
- xi_coarse_domain_size = integer_division_or_raise(
160
- ds.sizes["xi_coarse"] - 2 * n_xi_ghost_cells, np_xi, "xi_coarse"
150
+ # Rechunk the dataset along the tidal constituent dimension ("ntides") after loading
151
+ # because the original dataset does not have a chunk size of 1 along this dimension.
152
+ if "ntides" in dim_names:
153
+ ds = ds.chunk({dim_names["ntides"]: 1})
154
+
155
+ else:
156
+ ds_list = []
157
+ for file in matching_files:
158
+ ds = xr.open_dataset(file, decode_times=decode_times, chunks=None)
159
+ ds_list.append(ds)
160
+
161
+ if kwargs["combine"] == "by_coords":
162
+ ds = xr.combine_by_coords(ds_list, **combine_kwargs)
163
+ elif kwargs["combine"] == "nested":
164
+ ds = xr.combine_nested(
165
+ ds_list, concat_dim=kwargs["concat_dim"], **combine_kwargs
166
+ )
167
+
168
+ if "time" in dim_names and dim_names["time"] not in ds.dims:
169
+ ds = ds.expand_dims(dim_names["time"])
170
+
171
+ return ds
172
+
173
+
174
+ def interpolate_from_rho_to_u(field, method="additive"):
175
+ """Interpolates the given field from rho points to u points.
176
+
177
+ This function performs an interpolation from the rho grid (cell centers) to the u grid
178
+ (cell edges in the xi direction). Depending on the chosen method, it either averages
179
+ (additive) or multiplies (multiplicative) the field values between adjacent rho points
180
+ along the xi dimension. It also handles the removal of unnecessary coordinate variables
181
+ and updates the dimensions accordingly.
182
+
183
+ Parameters
184
+ ----------
185
+ field : xr.DataArray
186
+ The input data array on the rho grid to be interpolated. It is assumed to have a dimension
187
+ named "xi_rho".
188
+
189
+ method : str, optional, default='additive'
190
+ The method to use for interpolation. Options are:
191
+ - 'additive': Average the field values between adjacent rho points.
192
+ - 'multiplicative': Multiply the field values between adjacent rho points. Appropriate for
193
+ binary masks.
194
+
195
+ Returns
196
+ -------
197
+ field_interpolated : xr.DataArray
198
+ The interpolated data array on the u grid with the dimension "xi_u".
199
+ """
200
+
201
+ if method == "additive":
202
+ field_interpolated = 0.5 * (field + field.shift(xi_rho=1)).isel(
203
+ xi_rho=slice(1, None)
161
204
  )
205
+ elif method == "multiplicative":
206
+ field_interpolated = (field * field.shift(xi_rho=1)).isel(xi_rho=slice(1, None))
207
+ else:
208
+ raise NotImplementedError(f"Unsupported method '{method}' specified.")
162
209
 
163
- # unpartitioned dimensions should have sizes unchanged
164
- partitioned_sizes = {
165
- dim: [size] for dim, size in ds.sizes.items() if dim in dims_to_partition
166
- }
210
+ vars_to_drop = ["lat_rho", "lon_rho", "eta_rho", "xi_rho"]
211
+ for var in vars_to_drop:
212
+ if var in field_interpolated.coords:
213
+ field_interpolated = field_interpolated.drop_vars(var)
167
214
 
168
- # TODO refactor to use two functions for odd- and even-length dimensions
169
- if "eta_v" in dims_to_partition:
170
- partitioned_sizes["eta_v"] = [eta_v_domain_size] * (np_eta - 1) + [
171
- eta_v_domain_size + n_eta_ghost_cells
172
- ]
173
- if "xi_u" in dims_to_partition:
174
- partitioned_sizes["xi_u"] = [xi_u_domain_size] * (np_xi - 1) + [
175
- xi_u_domain_size + n_xi_ghost_cells
176
- ]
177
-
178
- if np_eta > 1:
179
- if "eta_rho" in dims_to_partition:
180
- partitioned_sizes["eta_rho"] = (
181
- [eta_rho_domain_size + n_eta_ghost_cells]
182
- + [eta_rho_domain_size] * (np_eta - 2)
183
- + [eta_rho_domain_size + n_eta_ghost_cells]
184
- )
185
- if "eta_psi" in dims_to_partition:
186
- partitioned_sizes["eta_psi"] = (
187
- [n_eta_ghost_cells + eta_psi_domain_size]
188
- + [eta_psi_domain_size] * (np_eta - 2)
189
- + [eta_psi_domain_size + 2 * n_eta_ghost_cells]
190
- )
191
- if "eta_coarse" in dims_to_partition:
192
- partitioned_sizes["eta_coarse"] = (
193
- [eta_coarse_domain_size + n_eta_ghost_cells]
194
- + [eta_coarse_domain_size] * (np_eta - 2)
195
- + [eta_coarse_domain_size + n_eta_ghost_cells]
196
- )
215
+ field_interpolated = field_interpolated.swap_dims({"xi_rho": "xi_u"})
197
216
 
198
- if np_xi > 1:
199
- if "xi_rho" in dims_to_partition:
200
- partitioned_sizes["xi_rho"] = (
201
- [xi_rho_domain_size + n_xi_ghost_cells]
202
- + [xi_rho_domain_size] * (np_xi - 2)
203
- + [xi_rho_domain_size + n_xi_ghost_cells]
204
- )
205
- if "xi_psi" in dims_to_partition:
206
- partitioned_sizes["xi_psi"] = (
207
- [n_xi_ghost_cells + xi_psi_domain_size]
208
- + [xi_psi_domain_size] * (np_xi - 2)
209
- + [xi_psi_domain_size + 2 * n_xi_ghost_cells]
210
- )
211
- if "xi_coarse" in dims_to_partition:
212
- partitioned_sizes["xi_coarse"] = (
213
- [xi_coarse_domain_size + n_xi_ghost_cells]
214
- + [xi_coarse_domain_size] * (np_xi - 2)
215
- + [xi_coarse_domain_size + n_xi_ghost_cells]
216
- )
217
+ return field_interpolated
217
218
 
218
- def cumsum(pmf):
219
- """Implementation of cumsum which ensures the result starts with zero."""
220
- cdf = np.empty(len(pmf) + 1, dtype=int)
221
- cdf[0] = 0
222
- np.cumsum(pmf, out=cdf[1:])
223
- return cdf
224
-
225
- file_numbers = []
226
- partitioned_datasets = []
227
- for i in range(np_eta):
228
- for j in range(np_xi):
229
- file_number = j + (i * np_xi)
230
- file_numbers.append(file_number)
231
-
232
- indexers = {}
233
-
234
- if "eta_rho" in dims_to_partition:
235
- eta_rho_partition_indices = cumsum(partitioned_sizes["eta_rho"])
236
- indexers["eta_rho"] = slice(
237
- int(eta_rho_partition_indices[i]),
238
- int(eta_rho_partition_indices[i + 1]),
239
- )
240
- if "xi_rho" in dims_to_partition:
241
- xi_rho_partition_indices = cumsum(partitioned_sizes["xi_rho"])
242
- indexers["xi_rho"] = slice(
243
- int(xi_rho_partition_indices[j]),
244
- int(xi_rho_partition_indices[j + 1]),
245
- )
246
219
 
247
- if "eta_v" in dims_to_partition:
248
- eta_v_partition_indices = cumsum(partitioned_sizes["eta_v"])
249
- indexers["eta_v"] = slice(
250
- int(eta_v_partition_indices[i]),
251
- int(eta_v_partition_indices[i + 1]),
252
- )
253
- if "xi_u" in dims_to_partition:
254
- xi_u_partition_indices = cumsum(partitioned_sizes["xi_u"])
255
- indexers["xi_u"] = slice(
256
- int(xi_u_partition_indices[j]), int(xi_u_partition_indices[j + 1])
257
- )
258
- if "eta_psi" in dims_to_partition:
259
- eta_psi_partition_indices = cumsum(partitioned_sizes["eta_psi"])
260
- indexers["eta_psi"] = slice(
261
- int(eta_psi_partition_indices[i]),
262
- int(eta_psi_partition_indices[i + 1]),
263
- )
264
- if "xi_psi" in dims_to_partition:
265
- xi_psi_partition_indices = cumsum(partitioned_sizes["xi_psi"])
266
- indexers["xi_psi"] = slice(
267
- int(xi_psi_partition_indices[j]),
268
- int(xi_psi_partition_indices[j + 1]),
269
- )
220
+ def interpolate_from_rho_to_v(field, method="additive"):
221
+ """Interpolates the given field from rho points to v points.
270
222
 
271
- if "eta_coarse" in dims_to_partition:
272
- eta_coarse_partition_indices = cumsum(partitioned_sizes["eta_coarse"])
273
- indexers["eta_coarse"] = slice(
274
- int(eta_coarse_partition_indices[i]),
275
- int(eta_coarse_partition_indices[i + 1]),
276
- )
223
+ This function performs an interpolation from the rho grid (cell centers) to the v grid
224
+ (cell edges in the eta direction). Depending on the chosen method, it either averages
225
+ (additive) or multiplies (multiplicative) the field values between adjacent rho points
226
+ along the eta dimension. It also handles the removal of unnecessary coordinate variables
227
+ and updates the dimensions accordingly.
277
228
 
278
- if "xi_coarse" in dims_to_partition:
279
- xi_coarse_partition_indices = cumsum(partitioned_sizes["xi_coarse"])
280
- indexers["xi_coarse"] = slice(
281
- int(xi_coarse_partition_indices[j]),
282
- int(xi_coarse_partition_indices[j + 1]),
283
- )
229
+ Parameters
230
+ ----------
231
+ field : xr.DataArray
232
+ The input data array on the rho grid to be interpolated. It is assumed to have a dimension
233
+ named "eta_rho".
284
234
 
285
- partitioned_ds = ds.isel(**indexers)
235
+ method : str, optional, default='additive'
236
+ The method to use for interpolation. Options are:
237
+ - 'additive': Average the field values between adjacent rho points.
238
+ - 'multiplicative': Multiply the field values between adjacent rho points. Appropriate for
239
+ binary masks.
286
240
 
287
- partitioned_datasets.append(partitioned_ds)
241
+ Returns
242
+ -------
243
+ field_interpolated : xr.DataArray
244
+ The interpolated data array on the v grid with the dimension "eta_v".
245
+ """
288
246
 
289
- return file_numbers, partitioned_datasets
247
+ if method == "additive":
248
+ field_interpolated = 0.5 * (field + field.shift(eta_rho=1)).isel(
249
+ eta_rho=slice(1, None)
250
+ )
251
+ elif method == "multiplicative":
252
+ field_interpolated = (field * field.shift(eta_rho=1)).isel(
253
+ eta_rho=slice(1, None)
254
+ )
255
+ else:
256
+ raise NotImplementedError(f"Unsupported method '{method}' specified.")
290
257
 
258
+ vars_to_drop = ["lat_rho", "lon_rho", "eta_rho", "xi_rho"]
259
+ for var in vars_to_drop:
260
+ if var in field_interpolated.coords:
261
+ field_interpolated = field_interpolated.drop_vars(var)
291
262
 
292
- def partition_netcdf(
293
- filepath: Union[str, Path], np_eta: int = 1, np_xi: int = 1
294
- ) -> None:
295
- """Partition a ROMS NetCDF file into smaller spatial tiles and save them to disk.
263
+ field_interpolated = field_interpolated.swap_dims({"eta_rho": "eta_v"})
296
264
 
297
- This function divides the dataset in the specified NetCDF file into `np_eta` by `np_xi` tiles.
298
- Each tile is saved as a separate NetCDF file.
265
+ return field_interpolated
266
+
267
+
268
+ def transpose_dimensions(da: xr.DataArray) -> xr.DataArray:
269
+ """Transpose the dimensions of an xarray.DataArray to ensure that 'time', any
270
+ dimension starting with 's_', 'eta_', and 'xi_' are ordered first, followed by the
271
+ remaining dimensions in their original order.
299
272
 
300
273
  Parameters
301
274
  ----------
302
- filepath : Union[str, Path]
303
- The path to the input NetCDF file.
275
+ da : xarray.DataArray
276
+ The input DataArray whose dimensions are to be reordered.
304
277
 
305
- np_eta : int, optional
306
- The number of partitions along the `eta` direction. Must be a positive integer. Default is 1.
278
+ Returns
279
+ -------
280
+ xarray.DataArray
281
+ The DataArray with dimensions reordered so that 'time', 's_*', 'eta_*',
282
+ and 'xi_*' are first, in that order, if they exist.
283
+ """
307
284
 
308
- np_xi : int, optional
309
- The number of partitions along the `xi` direction. Must be a positive integer. Default is 1.
285
+ # List of preferred dimension patterns
286
+ preferred_order = ["time", "s_", "eta_", "xi_"]
287
+
288
+ # Get the existing dimensions in the DataArray
289
+ dims = list(da.dims)
290
+
291
+ # Collect dimensions that match any of the preferred patterns
292
+ matched_dims = []
293
+ for pattern in preferred_order:
294
+ # Find dimensions that start with the pattern
295
+ matched_dims += [dim for dim in dims if dim.startswith(pattern)]
296
+
297
+ # Create a new order: first the matched dimensions, then the rest
298
+ remaining_dims = [dim for dim in dims if dim not in matched_dims]
299
+ new_order = matched_dims + remaining_dims
300
+
301
+ # Transpose the DataArray to the new order
302
+ transposed_da = da.transpose(*new_order)
303
+
304
+ return transposed_da
305
+
306
+
307
+ def save_datasets(dataset_list, output_filenames, use_dask=False, verbose=True):
308
+ """Save the list of datasets to netCDF4 files.
309
+
310
+ Parameters
311
+ ----------
312
+ dataset_list : list
313
+ List of datasets to be saved.
314
+ output_filenames : list
315
+ List of filenames for the output files.
316
+ use_dask : bool, optional
317
+ Whether to use Dask diagnostics (e.g., progress bars) when saving the datasets, by default False.
318
+ verbose : bool, optional
319
+ Whether to log information about the files being written. If True, logs the output filenames.
320
+ Defaults to True.
310
321
 
311
322
  Returns
312
323
  -------
@@ -314,22 +325,45 @@ def partition_netcdf(
314
325
  A list of Path objects for the filenames that were saved.
315
326
  """
316
327
 
317
- # Ensure filepath is a Path object
318
- filepath = Path(filepath)
328
+ saved_filenames = []
329
+
330
+ output_filenames = [f"{filename}.nc" for filename in output_filenames]
331
+ if verbose:
332
+ logging.info(
333
+ "Writing the following NetCDF files:\n%s", "\n".join(output_filenames)
334
+ )
335
+
336
+ if use_dask:
337
+ from dask.diagnostics import ProgressBar
338
+
339
+ with ProgressBar():
340
+ xr.save_mfdataset(dataset_list, output_filenames)
341
+ else:
342
+ xr.save_mfdataset(dataset_list, output_filenames)
319
343
 
320
- # Open the dataset
321
- ds = xr.open_dataset(filepath.with_suffix(".nc"))
344
+ saved_filenames.extend(Path(f) for f in output_filenames)
322
345
 
323
- # Partition the dataset
324
- file_numbers, partitioned_datasets = partition(ds, np_eta=np_eta, np_xi=np_xi)
346
+ return saved_filenames
325
347
 
326
- # Generate paths to the partitioned files
327
- base_filepath = filepath.with_suffix("")
328
- paths_to_partitioned_files = [
329
- Path(f"{base_filepath}.{file_number}.nc") for file_number in file_numbers
330
- ]
331
348
 
332
- # Save the partitioned datasets to files
333
- xr.save_mfdataset(partitioned_datasets, paths_to_partitioned_files)
349
+ def get_dask_chunks(location, chunk_size):
350
+ """Returns the appropriate Dask chunking dictionary based on grid location.
334
351
 
335
- return paths_to_partitioned_files
352
+ Parameters
353
+ ----------
354
+ location : str
355
+ The grid location, one of "rho", "u", or "v".
356
+ chunk_size : int
357
+ The chunk size to apply.
358
+
359
+ Returns
360
+ -------
361
+ dict
362
+ Dictionary specifying the chunking strategy.
363
+ """
364
+ chunk_mapping = {
365
+ "rho": {"eta_rho": chunk_size, "xi_rho": chunk_size},
366
+ "u": {"eta_rho": chunk_size, "xi_u": chunk_size},
367
+ "v": {"eta_v": chunk_size, "xi_rho": chunk_size},
368
+ }
369
+ return chunk_mapping.get(location, {})