disdrodb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. disdrodb/__init__.py +1 -1
  2. disdrodb/_version.py +2 -2
  3. disdrodb/api/io.py +12 -2
  4. disdrodb/data_transfer/download_data.py +145 -14
  5. disdrodb/l0/check_standards.py +15 -10
  6. disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
  7. disdrodb/l0/configs/LPM/l0a_encodings.yml +4 -4
  8. disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +22 -6
  9. disdrodb/l0/configs/LPM/l0b_encodings.yml +41 -0
  10. disdrodb/l0/configs/LPM/raw_data_format.yml +40 -0
  11. disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
  12. disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
  13. disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
  14. disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
  15. disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +41 -0
  16. disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +50 -10
  17. disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
  18. disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
  19. disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
  20. disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
  21. disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
  22. disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
  23. disdrodb/l0/configs/RD80/raw_data_format.yml +2 -6
  24. disdrodb/l0/l0b_nc_processing.py +1 -1
  25. disdrodb/l0/l0b_processing.py +12 -10
  26. disdrodb/l0/manuals/SWS250.pdf +0 -0
  27. disdrodb/l0/manuals/VPF730.pdf +0 -0
  28. disdrodb/l0/manuals/VPF750.pdf +0 -0
  29. disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +23 -13
  30. disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +3 -3
  31. disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +5 -3
  32. disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +36 -20
  33. disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
  34. disdrodb/l0/readers/LPM/KIT/CHWALA.py +225 -0
  35. disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
  36. disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
  37. disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +107 -0
  38. disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +125 -0
  39. disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
  40. disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
  41. disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
  42. disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
  43. disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +133 -0
  44. disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +138 -0
  45. disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
  46. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
  47. disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +9 -0
  48. disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +67 -0
  49. disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
  50. disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +291 -0
  51. disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -1
  52. disdrodb/l0/standards.py +7 -4
  53. disdrodb/l0/template_tools.py +2 -2
  54. disdrodb/l1/encoding_attrs.py +30 -8
  55. disdrodb/l1/processing.py +6 -4
  56. disdrodb/l1/resampling.py +1 -1
  57. disdrodb/l1/routines.py +9 -7
  58. disdrodb/l2/empirical_dsd.py +100 -2
  59. disdrodb/l2/event.py +3 -3
  60. disdrodb/l2/processing.py +21 -12
  61. disdrodb/l2/processing_options.py +7 -7
  62. disdrodb/l2/routines.py +3 -3
  63. disdrodb/metadata/checks.py +15 -6
  64. disdrodb/metadata/manipulation.py +2 -2
  65. disdrodb/metadata/standards.py +83 -79
  66. disdrodb/metadata/writer.py +2 -2
  67. disdrodb/routines.py +246 -10
  68. disdrodb/scattering/routines.py +1 -1
  69. disdrodb/utils/dataframe.py +342 -0
  70. disdrodb/utils/directories.py +14 -2
  71. disdrodb/utils/xarray.py +83 -0
  72. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/METADATA +34 -61
  73. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/RECORD +77 -54
  74. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/WHEEL +1 -1
  75. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/entry_points.txt +3 -3
  76. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/licenses/LICENSE +0 -0
  77. {disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,342 @@
1
+ #!/usr/bin/env python3
2
+
3
+ # -----------------------------------------------------------------------------.
4
+ # Copyright (c) 2021-2023 DISDRODB developers
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License
17
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
+ # -----------------------------------------------------------------------------.
19
+ """Dataframe utilities."""
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+
24
+ def log_arange(start, stop, log_step=0.1, base=10):
25
+ """
26
+ Return numbers spaced evenly on a log scale (similar to np.arange but in log space).
27
+
28
+ Parameters
29
+ ----------
30
+ start : float
31
+ The starting value of the sequence (must be > 0).
32
+ stop : float
33
+ The end value of the sequence (must be > 0).
34
+ log_step : float
35
+ The step size in log-space (default is 0.1).
36
+ base : float
37
+ The logarithmic base (default is 10).
38
+
39
+ Returns
40
+ -------
41
+ np.ndarray
42
+ Array of values spaced in log scale.
43
+ """
44
+ if start <= 0 or stop <= 0:
45
+ raise ValueError("Both start and stop must be > 0 for log spacing.")
46
+
47
+ log_start = np.log(start) / np.log(base)
48
+ log_stop = np.log(stop) / np.log(base)
49
+
50
+ log_values = np.arange(log_start, log_stop, log_step)
51
+ return base**log_values
52
+
53
+
54
+ def compute_1d_histogram(df, column, variables=None, bins=10, labels=None, prefix_name=True, include_quantiles=False):
55
+ """Compute conditional univariate statistics.
56
+
57
+ Parameters
58
+ ----------
59
+ df : pandas.DataFrame
60
+ Input dataframe
61
+ column : str
62
+ Column name to be binned.
63
+ variables : str or list, optional
64
+ Column names for which conditional statistics will be computed.
65
+ If None, only counts are computed.
66
+ bins : int or array-like
67
+ Number of bins or bin edges.
68
+ labels : array-like, optional
69
+ Labels for the column bins. If None, uses bin centers.
70
+
71
+ Returns
72
+ -------
73
+ pandas.DataFrame
74
+ """
75
+ # Copy data
76
+ df = df.copy()
77
+
78
+ # Ensure `variables` is a list of variables
79
+ # - If no variable specified, create dummy variable
80
+ if variables is None:
81
+ variables = ["dummy"]
82
+ df["dummy"] = np.ones(df[column].shape)
83
+ variables_specified = False
84
+ elif isinstance(variables, str):
85
+ variables = [variables]
86
+ variables_specified = True
87
+ elif isinstance(variables, list):
88
+ variables_specified = True
89
+ else:
90
+ raise TypeError("`variables` must be a string, list of strings, or None.")
91
+ variables = np.unique(variables)
92
+
93
+ # Handle column binning
94
+ if isinstance(bins, int):
95
+ bins = np.linspace(df[column].min(), df[column].max(), bins + 1)
96
+
97
+ # Drop rows where any of the key columns have NaN
98
+ df = df.dropna(subset=[column, *variables])
99
+
100
+ if len(df) == 0:
101
+ raise ValueError("No valid data points after removing NaN values")
102
+
103
+ # Create binned columns with explicit handling of out-of-bounds values
104
+ df[f"{column}_binned"] = pd.cut(df[column], bins=bins, include_lowest=True)
105
+
106
+ # Create complete IntervalIndex for both dimensions
107
+ intervals = df[f"{column}_binned"].cat.categories
108
+
109
+ # Create IntervalIndex with all possible combinations
110
+ full_index = pd.Index(intervals, name=f"{column}_binned")
111
+
112
+ # Define grouping object
113
+ df_grouped = df.groupby([f"{column}_binned"], observed=False)
114
+
115
+ # Compute statistics for specified variables
116
+ variables_stats = []
117
+ for i, var in enumerate(variables):
118
+ # Prepare prefix
119
+ prefix = f"{var}_" if prefix_name and variables_specified else ""
120
+
121
+ # Define statistics to compute
122
+ if variables_specified:
123
+ # Compute quantiles
124
+ quantiles = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]
125
+ df_stats_quantiles = df_grouped[var].quantile(quantiles).unstack(level=-1) # noqa: PD010
126
+ df_stats_quantiles.columns = [f"{prefix}Q{int(q*100)}" for q in df_stats_quantiles.columns]
127
+ df_stats_quantiles = df_stats_quantiles.rename(
128
+ columns={
129
+ f"{prefix}Q50": f"{prefix}median",
130
+ },
131
+ )
132
+ # Define other stats to compute
133
+ list_stats = [
134
+ (f"{prefix}std", "std"),
135
+ (f"{prefix}min", "min"),
136
+ (f"{prefix}max", "max"),
137
+ (f"{prefix}mad", lambda s: np.median(np.abs(s - np.median(s)))),
138
+ ]
139
+ if i == 0:
140
+ list_stats.append(("count", "count"))
141
+ else:
142
+ list_stats = [("count", "count")]
143
+
144
+ # Compute statistics
145
+ df_stats = df_grouped[var].agg(list_stats)
146
+
147
+ # Compute other variable statistics
148
+ if variables_specified:
149
+ df_stats[f"{prefix}range"] = df_stats[f"{prefix}max"] - df_stats[f"{prefix}min"]
150
+ df_stats[f"{prefix}iqr"] = df_stats_quantiles[f"{prefix}Q75"] - df_stats_quantiles[f"{prefix}Q25"]
151
+ df_stats[f"{prefix}ipr80"] = df_stats_quantiles[f"{prefix}Q90"] - df_stats_quantiles[f"{prefix}Q10"]
152
+ df_stats[f"{prefix}ipr90"] = df_stats_quantiles[f"{prefix}Q95"] - df_stats_quantiles[f"{prefix}Q5"]
153
+ df_stats[f"{prefix}ipr98"] = df_stats_quantiles[f"{prefix}Q99"] - df_stats_quantiles[f"{prefix}Q1"]
154
+ if include_quantiles:
155
+ df_stats = pd.concat((df_stats, df_stats_quantiles), axis=1)
156
+ else:
157
+ df_stats[f"{prefix}median"] = df_stats_quantiles[f"{prefix}median"]
158
+ variables_stats.append(df_stats)
159
+
160
+ # Combine all statistics into a single DataFrame
161
+ df_stats = pd.concat(variables_stats, axis=1)
162
+
163
+ # Reindex to include all interval combinations
164
+ df_stats = df_stats.reindex(full_index)
165
+
166
+ # Determine bin centers
167
+ centers = intervals.mid
168
+
169
+ # Use provided labels if available
170
+ coords = labels if labels is not None else centers
171
+
172
+ # Reset index and add coordinates/labels
173
+ df_stats = df_stats.reset_index()
174
+ df_stats[f"{column}"] = pd.Categorical(df_stats[f"{column}_binned"].map(dict(zip(intervals, coords, strict=False))))
175
+ df_stats = df_stats.drop(columns=f"{column}_binned")
176
+
177
+ return df_stats
178
+
179
+
180
+ def compute_2d_histogram(
181
+ df,
182
+ x,
183
+ y,
184
+ variables=None,
185
+ x_bins=10,
186
+ y_bins=10,
187
+ x_labels=None,
188
+ y_labels=None,
189
+ prefix_name=True,
190
+ include_quantiles=False,
191
+ ):
192
+ """Compute conditional bivariate statistics.
193
+
194
+ Parameters
195
+ ----------
196
+ df : pandas.DataFrame
197
+ Input dataframe
198
+ x : str
199
+ Column name for x-axis binning (will be rounded to integers)
200
+ y : str
201
+ Column name for y-axis binning
202
+ variables : str or list, optional
203
+ Column names for which statistics will be computed.
204
+ If None, only counts are computed.
205
+ x_bins : int or array-like
206
+ Number of bins or bin edges for x
207
+ y_bins : int or array-like
208
+ Number of bins or bin edges for y
209
+ x_labels : array-like, optional
210
+ Labels for x bins. If None, uses bin centers
211
+ y_labels : array-like, optional
212
+ Labels for y bins. If None, uses bin centers
213
+
214
+ Returns
215
+ -------
216
+ xarray.Dataset
217
+ Dataset with dimensions corresponding to binned variables and
218
+ data variables for each statistic
219
+ """
220
+ # # If polars, cast to pandas
221
+ # if isinstance(df, pl.DataFrame):
222
+ # df = df.to_pandas()
223
+
224
+ # Copy data
225
+ df = df.copy()
226
+
227
+ # Ensure `variables` is a list of variables
228
+ # - If no variable specified, create dummy variable
229
+ if variables is None:
230
+ variables = ["dummy"]
231
+ df["dummy"] = np.ones(df[x].shape)
232
+ variables_specified = False
233
+ elif isinstance(variables, str):
234
+ variables = [variables]
235
+ variables_specified = True
236
+ elif isinstance(variables, list):
237
+ variables_specified = True
238
+ else:
239
+ raise TypeError("`variables` must be a string, list of strings, or None.")
240
+ variables = np.unique(variables)
241
+
242
+ # Handle x-axis binning
243
+ if isinstance(x_bins, int):
244
+ x_bins = np.linspace(df[x].min(), df[x].max(), x_bins + 1)
245
+ # Handle y-axis binning
246
+ if isinstance(y_bins, int):
247
+ y_bins = np.linspace(df[y].min(), df[y].max(), y_bins + 1)
248
+
249
+ # Drop rows where any of the key columns have NaN
250
+ df = df.dropna(subset=[x, y, *variables])
251
+
252
+ if len(df) == 0:
253
+ raise ValueError("No valid data points after removing NaN values")
254
+
255
+ # Create binned columns with explicit handling of out-of-bounds values
256
+ df[f"{x}_binned"] = pd.cut(df[x], bins=x_bins, include_lowest=True)
257
+ df[f"{y}_binned"] = pd.cut(df[y], bins=y_bins, include_lowest=True)
258
+
259
+ # Create complete IntervalIndex for both dimensions
260
+ x_intervals = df[f"{x}_binned"].cat.categories
261
+ y_intervals = df[f"{y}_binned"].cat.categories
262
+
263
+ # Create MultiIndex with all possible combinations
264
+ full_index = pd.MultiIndex.from_product([x_intervals, y_intervals], names=[f"{x}_binned", f"{y}_binned"])
265
+
266
+ # Define grouping object
267
+ df_grouped = df.groupby([f"{x}_binned", f"{y}_binned"], observed=False)
268
+
269
+ # Compute statistics for specified variables
270
+ variables_stats = []
271
+ for i, var in enumerate(variables):
272
+ # Prepare prefix
273
+ prefix = f"{var}_" if prefix_name and variables_specified else ""
274
+
275
+ # Define statistics to compute
276
+ if variables_specified:
277
+ # Compute quantiles
278
+ quantiles = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]
279
+ df_stats_quantiles = df_grouped[var].quantile(quantiles).unstack(level=-1) # noqa: PD010
280
+ df_stats_quantiles.columns = [f"{prefix}Q{int(q*100)}" for q in df_stats_quantiles.columns]
281
+ df_stats_quantiles = df_stats_quantiles.rename(
282
+ columns={
283
+ f"{prefix}Q50": f"{prefix}median",
284
+ },
285
+ )
286
+ # Define other stats to compute
287
+ list_stats = [
288
+ (f"{prefix}std", "std"),
289
+ (f"{prefix}min", "min"),
290
+ (f"{prefix}max", "max"),
291
+ (f"{prefix}mad", lambda s: np.median(np.abs(s - np.median(s)))),
292
+ ]
293
+ if i == 0:
294
+ list_stats.append(("count", "count"))
295
+ else:
296
+ list_stats = [("count", "count")]
297
+
298
+ # Compute statistics
299
+ df_stats = df_grouped[var].agg(list_stats)
300
+
301
+ # Compute other variable statistics
302
+ if variables_specified:
303
+ df_stats[f"{prefix}range"] = df_stats[f"{prefix}max"] - df_stats[f"{prefix}min"]
304
+ df_stats[f"{prefix}iqr"] = df_stats_quantiles[f"{prefix}Q75"] - df_stats_quantiles[f"{prefix}Q25"]
305
+ df_stats[f"{prefix}ipr80"] = df_stats_quantiles[f"{prefix}Q90"] - df_stats_quantiles[f"{prefix}Q10"]
306
+ df_stats[f"{prefix}ipr90"] = df_stats_quantiles[f"{prefix}Q95"] - df_stats_quantiles[f"{prefix}Q5"]
307
+ df_stats[f"{prefix}ipr98"] = df_stats_quantiles[f"{prefix}Q99"] - df_stats_quantiles[f"{prefix}Q1"]
308
+ if include_quantiles:
309
+ df_stats = pd.concat((df_stats, df_stats_quantiles), axis=1)
310
+ else:
311
+ df_stats[f"{prefix}median"] = df_stats_quantiles[f"{prefix}median"]
312
+ variables_stats.append(df_stats)
313
+
314
+ # Combine all statistics into a single DataFrame
315
+ df_stats = pd.concat(variables_stats, axis=1)
316
+
317
+ # Reindex to include all interval combinations
318
+ df_stats = df_stats.reindex(full_index)
319
+
320
+ # Determine coordinates
321
+ x_centers = x_intervals.mid
322
+ y_centers = y_intervals.mid
323
+
324
+ # Use provided labels if available
325
+ x_coords = x_labels if x_labels is not None else x_centers
326
+ y_coords = y_labels if y_labels is not None else y_centers
327
+
328
+ # Reset index and set new coordinates
329
+ df_stats = df_stats.reset_index()
330
+ df_stats[f"{x}"] = pd.Categorical(df_stats[f"{x}_binned"].map(dict(zip(x_intervals, x_coords, strict=False))))
331
+ df_stats[f"{y}"] = pd.Categorical(df_stats[f"{y}_binned"].map(dict(zip(y_intervals, y_coords, strict=False))))
332
+
333
+ # Set new MultiIndex with coordinates
334
+ df_stats = df_stats.set_index([f"{x}", f"{y}"])
335
+ df_stats = df_stats.drop(columns=[f"{x}_binned", f"{y}_binned"])
336
+
337
+ # Convert to dataset
338
+ ds = df_stats.to_xarray()
339
+
340
+ # Transpose arrays
341
+ ds = ds.transpose(y, x)
342
+ return ds
@@ -17,12 +17,12 @@
17
17
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
18
18
  # -----------------------------------------------------------------------------.
19
19
  """Define utilities for Directory/File Checks/Creation/Deletion."""
20
-
21
20
  import glob
22
21
  import logging
23
22
  import os
24
23
  import pathlib
25
24
  import shutil
25
+ import subprocess
26
26
  from typing import Union
27
27
 
28
28
  from disdrodb.utils.list import flatten_list
@@ -207,10 +207,22 @@ def _remove_file_or_directories(path, logger=None):
207
207
  log_info(logger, msg=f"Deleted the empty directory {path}")
208
208
  # If not empty directory
209
209
  else:
210
- shutil.rmtree(path)
210
+ # If not window use shutil.rmtree
211
+ if os.name != "nt": # Check if not Windows
212
+ shutil.rmtree(path)
213
+ else:
214
+ rmtree_windows(path)
211
215
  log_info(logger, msg=f"Deleted directories within {path}")
212
216
 
213
217
 
218
+ def rmtree_windows(path):
219
+ """Remove a directory tree on Windows."""
220
+ if not os.path.isdir(path):
221
+ raise FileNotFoundError(f"{path!r} is not a valid directory")
222
+ # Use rd (alias rmdir) with /S (remove all subdirectories/files) and /Q (quiet)
223
+ subprocess.check_call(["cmd", "/c", "rd", "/S", "/Q", path])
224
+
225
+
214
226
  def remove_if_exists(path: str, force: bool = False, logger=None) -> None:
215
227
  """Remove file or directory if exists and ``force=True``.
216
228
 
disdrodb/utils/xarray.py CHANGED
@@ -97,6 +97,89 @@ def xr_get_last_valid_idx(da_condition, dim, fill_value=None):
97
97
  return last_idx
98
98
 
99
99
 
100
+ ####-------------------------------------------------------------------
101
+ #### Unstacking dimension
102
+
103
+
104
+ def _check_coord_handling(coord_handling):
105
+ if coord_handling not in {"keep", "drop", "unstack"}:
106
+ raise ValueError("coord_handling must be one of 'keep', 'drop', or 'unstack'.")
107
+
108
+
109
+ def _unstack_coordinates(xr_obj, dim, prefix, suffix):
110
+ # Identify coordinates that share the target dimension
111
+ coords_with_dim = _get_non_dimensional_coordinates(xr_obj, dim=dim)
112
+ ds = xr.Dataset()
113
+ for coord_name in coords_with_dim:
114
+ coord_da = xr_obj[coord_name]
115
+ # Split the coordinate DataArray along the target dimension, drop coordinate and merge
116
+ split_ds = unstack_datarray_dimension(coord_da, coord_handling="drop", dim=dim, prefix=prefix, suffix=suffix)
117
+ ds.update(split_ds)
118
+ return ds
119
+
120
+
121
+ def _handle_unstack_non_dim_coords(ds, source_xr_obj, coord_handling, dim, prefix, suffix):
122
+ # Deal with coordinates sharing the target dimension
123
+ if coord_handling == "keep":
124
+ return ds
125
+ if coord_handling == "unstack":
126
+ ds_coords = _unstack_coordinates(source_xr_obj, dim=dim, prefix=prefix, suffix=suffix)
127
+ ds.update(ds_coords)
128
+ # Remove non dimensional coordinates (unstack and drop coord_handling)
129
+ ds = ds.drop_vars(_get_non_dimensional_coordinates(ds, dim=dim))
130
+ return ds
131
+
132
+
133
+ def _get_non_dimensional_coordinates(xr_obj, dim):
134
+ return [coord_name for coord_name, coord_da in xr_obj.coords.items() if dim in coord_da.dims and coord_name != dim]
135
+
136
+
137
+ def unstack_datarray_dimension(da, dim, coord_handling="keep", prefix="", suffix=""):
138
+ """
139
+ Split a DataArray along a specified dimension into a Dataset with separate prefixed and suffixed variables.
140
+
141
+ Parameters
142
+ ----------
143
+ da : xarray.DataArray
144
+ The DataArray to split.
145
+ dim : str
146
+ The dimension along which to split the DataArray.
147
+ coord_handling : str, optional
148
+ Option to handle coordinates sharing the target dimension.
149
+ Choices are 'keep', 'drop', or 'unstack'. Defaults to 'keep'.
150
+ prefix : str, optional
151
+ String to prepend to each new variable name.
152
+ suffix : str, optional
153
+ String to append to each new variable name.
154
+
155
+ Returns
156
+ -------
157
+ xarray.Dataset
158
+ A Dataset with each variable split along the specified dimension.
159
+ The Dataset variables are named "{prefix}{name}{suffix}{dim_value}".
160
+ Coordinates sharing the target dimension are handled based on `coord_handling`.
161
+ """
162
+ # Retrieve DataArray name
163
+ name = da.name
164
+ # Unstack variables
165
+ ds = da.to_dataset(dim=dim)
166
+ rename_dict = {dim_value: f"{prefix}{name}{suffix}{dim_value}" for dim_value in list(ds.data_vars)}
167
+ ds = ds.rename_vars(rename_dict)
168
+ # Deal with coordinates sharing the target dimension
169
+ return _handle_unstack_non_dim_coords(
170
+ ds=ds,
171
+ source_xr_obj=da,
172
+ coord_handling=coord_handling,
173
+ dim=dim,
174
+ prefix=prefix,
175
+ suffix=suffix,
176
+ )
177
+
178
+
179
+ ####--------------------------------------------------------------------------
180
+ #### Fill Values Utilities
181
+
182
+
100
183
  def define_dataarray_fill_value(da):
101
184
  """Define the fill value for a numerical xarray.DataArray."""
102
185
  if np.issubdtype(da.dtype, np.floating):