climarraykit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climarraykit/__init__.py +12 -0
- climarraykit/conversions.py +208 -0
- climarraykit/data_manipulation.py +386 -0
- climarraykit/file_utils.py +309 -0
- climarraykit/patterns.py +616 -0
- climarraykit/xarray_obj_handler.py +575 -0
- climarraykit-0.2.0.dist-info/METADATA +86 -0
- climarraykit-0.2.0.dist-info/RECORD +11 -0
- climarraykit-0.2.0.dist-info/WHEEL +5 -0
- climarraykit-0.2.0.dist-info/licenses/LICENSE +21 -0
- climarraykit-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,575 @@
|
|
|
1
|
+
#! /usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
#----------------#
|
|
5
|
+
# Import modules #
|
|
6
|
+
#----------------#
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
import xarray as xr
|
|
11
|
+
|
|
12
|
+
#------------------------#
|
|
13
|
+
# Import project modules #
|
|
14
|
+
#------------------------#
|
|
15
|
+
|
|
16
|
+
from filewise.pandas_utils.pandas_obj_handler import save2csv
|
|
17
|
+
from climarraykit.patterns import find_coordinate_variables
|
|
18
|
+
from pygenutils.arrays_and_lists.data_manipulation import flatten_list
|
|
19
|
+
from pygenutils.strings.string_handler import append_ext, get_obj_specs
|
|
20
|
+
from pygenutils.time_handling.date_and_time_utils import find_dt_key
|
|
21
|
+
|
|
22
|
+
#-------------------------#
|
|
23
|
+
# Define custom functions #
|
|
24
|
+
#-------------------------#
|
|
25
|
+
|
|
26
|
+
# Main functions #
|
|
27
|
+
#----------------#
|
|
28
|
+
|
|
29
|
+
# xarray objects #
|
|
30
|
+
#~~~~~~~~~~~~~~~~#
|
|
31
|
+
|
|
32
|
+
def create_ds_component(var_name: str,
|
|
33
|
+
data_array: xr.DataArray | list | tuple,
|
|
34
|
+
dimlist: list[str],
|
|
35
|
+
dim_dict: dict[str, xr.DataArray | list | tuple],
|
|
36
|
+
attrs_dict: dict[str, str | int | float]) -> dict[str, xr.DataArray]:
|
|
37
|
+
"""
|
|
38
|
+
Create an xarray.DataArray component to be added to an xarray.Dataset.
|
|
39
|
+
|
|
40
|
+
Parameters:
|
|
41
|
+
-----------
|
|
42
|
+
var_name : str
|
|
43
|
+
Name of the variable represented by the DataArray.
|
|
44
|
+
data_array : xarray.DataArray or array-like
|
|
45
|
+
The array containing data to be stored in the DataArray.
|
|
46
|
+
dimlist : list[str]
|
|
47
|
+
List of dimension names corresponding to the dimensions of the data.
|
|
48
|
+
dim_dict : dict
|
|
49
|
+
Dictionary mapping dimension names to coordinate arrays.
|
|
50
|
+
attrs_dict : dict
|
|
51
|
+
Dictionary of attributes describing the DataArray (e.g., units, description).
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
--------
|
|
55
|
+
data_array_dict : dict
|
|
56
|
+
A dictionary containing the DataArray with the variable name as the key.
|
|
57
|
+
|
|
58
|
+
Raises:
|
|
59
|
+
-------
|
|
60
|
+
TypeError
|
|
61
|
+
If parameters have incorrect types.
|
|
62
|
+
ValueError
|
|
63
|
+
If var_name is empty, dimlist is empty, or dictionaries are empty.
|
|
64
|
+
|
|
65
|
+
Notes:
|
|
66
|
+
------
|
|
67
|
+
- The returned dictionary can be used to construct or extend an xarray.Dataset.
|
|
68
|
+
"""
|
|
69
|
+
# Parameter validation
|
|
70
|
+
if not isinstance(var_name, str) or not var_name.strip():
|
|
71
|
+
raise ValueError("var_name must be a non-empty string")
|
|
72
|
+
|
|
73
|
+
if not isinstance(dimlist, list) or not dimlist:
|
|
74
|
+
raise ValueError("dimlist must be a non-empty list")
|
|
75
|
+
|
|
76
|
+
if not all(isinstance(dim, str) and dim.strip() for dim in dimlist):
|
|
77
|
+
raise ValueError("All dimension names must be non-empty strings")
|
|
78
|
+
|
|
79
|
+
if not isinstance(dim_dict, dict) or not dim_dict:
|
|
80
|
+
raise ValueError("dim_dict must be a non-empty dictionary")
|
|
81
|
+
|
|
82
|
+
if not isinstance(attrs_dict, dict):
|
|
83
|
+
raise ValueError("attrs_dict must be a dictionary")
|
|
84
|
+
|
|
85
|
+
# Validate that all dimensions in dimlist are present in dim_dict
|
|
86
|
+
missing_dims = set(dimlist) - set(dim_dict.keys())
|
|
87
|
+
if missing_dims:
|
|
88
|
+
raise ValueError(f"Missing dimensions in dim_dict: {missing_dims}")
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
data_array_dict = {
|
|
92
|
+
var_name: xr.DataArray(
|
|
93
|
+
data=data_array,
|
|
94
|
+
dims=dimlist,
|
|
95
|
+
coords=dim_dict,
|
|
96
|
+
attrs=attrs_dict,
|
|
97
|
+
)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return data_array_dict
|
|
101
|
+
except Exception as e:
|
|
102
|
+
raise RuntimeError(f"Failed to create DataArray component: {e}")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# netCDF files #
|
|
106
|
+
#~~~~~~~~~~~~~~#
|
|
107
|
+
|
|
108
|
+
def save2nc(file_name: str | Path,
|
|
109
|
+
data: xr.Dataset | None = None,
|
|
110
|
+
file_format: str = "NETCDF4",
|
|
111
|
+
vardim_list: str | list[str] | None = None,
|
|
112
|
+
data_arrays: xr.DataArray | list[xr.DataArray] | None = None,
|
|
113
|
+
dimlists: list[str] | list[list[str]] | None = None,
|
|
114
|
+
dim_dict_list: dict | list[dict] | None = None,
|
|
115
|
+
attrs_dict_list: dict | list[dict] | None = None,
|
|
116
|
+
global_attrs_dict: dict[str, str | int | float] | None = None) -> None:
|
|
117
|
+
"""
|
|
118
|
+
Save data to a NetCDF file. Can handle either a fully constructed
|
|
119
|
+
xarray.Dataset or build a new dataset from components.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
file_name : str | Path
|
|
124
|
+
The name of the resulting NetCDF file.
|
|
125
|
+
The '.nc' extension will be added automatically if not present.
|
|
126
|
+
data : xarray.Dataset, optional
|
|
127
|
+
An xarray Dataset, i.e. the pre-existing one, that will be directly saved.
|
|
128
|
+
file_format : {"NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC"}, default "NETCDF4"
|
|
129
|
+
File format for the resulting netCDF file.
|
|
130
|
+
vardim_list : str | list[str], optional
|
|
131
|
+
List of variable-dimension names for building the dataset.
|
|
132
|
+
data_arrays : xarray.DataArray | list[xr.DataArray], optional
|
|
133
|
+
Data arrays for building the dataset if `data` is not provided.
|
|
134
|
+
dimlists : list[str] | list[list[str]], optional
|
|
135
|
+
List of dimension names for each variable in the dataset.
|
|
136
|
+
dim_dict_list : dict | list[dict], optional
|
|
137
|
+
List of dictionaries containing dimension information for each variable.
|
|
138
|
+
attrs_dict_list : dict | list[dict], optional
|
|
139
|
+
List of attribute dictionaries for each variable in the dataset.
|
|
140
|
+
global_attrs_dict : dict, optional
|
|
141
|
+
Dictionary for global attributes to assign to the dataset.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
None
|
|
146
|
+
Saves a NetCDF file and prints success confirmation.
|
|
147
|
+
|
|
148
|
+
Raises
|
|
149
|
+
------
|
|
150
|
+
TypeError
|
|
151
|
+
If parameters have incorrect types.
|
|
152
|
+
ValueError
|
|
153
|
+
If file_name is empty, file_format is invalid, or parameter combinations are invalid.
|
|
154
|
+
RuntimeError
|
|
155
|
+
If dataset creation or file saving fails.
|
|
156
|
+
|
|
157
|
+
Notes
|
|
158
|
+
-----
|
|
159
|
+
- If `data` is provided, the function directly saves it as a NetCDF file.
|
|
160
|
+
- If `data` is not provided, the function will construct a dataset using the
|
|
161
|
+
`vardim_list`, `data_arrays`, `dimlists`, etc.
|
|
162
|
+
"""
|
|
163
|
+
# Parameter validation
|
|
164
|
+
if not isinstance(file_name, (str, Path)) or not str(file_name).strip():
|
|
165
|
+
raise ValueError("file_name must be a non-empty string or Path")
|
|
166
|
+
|
|
167
|
+
if not isinstance(file_format, str) or file_format not in NC_FILE_FORMATS:
|
|
168
|
+
raise ValueError(f"Unsupported file format '{file_format}'. "
|
|
169
|
+
f"Choose one from {NC_FILE_FORMATS}.")
|
|
170
|
+
|
|
171
|
+
if data is not None and not isinstance(data, xr.Dataset):
|
|
172
|
+
raise TypeError("data must be an xarray.Dataset or None")
|
|
173
|
+
|
|
174
|
+
if global_attrs_dict is not None and not isinstance(global_attrs_dict, dict):
|
|
175
|
+
raise TypeError("global_attrs_dict must be a dictionary or None")
|
|
176
|
+
|
|
177
|
+
# Convert arguments to lists if they are not already lists (defensive programming)
|
|
178
|
+
if vardim_list is not None:
|
|
179
|
+
vardim_list = _ensure_list(vardim_list)
|
|
180
|
+
vardim_list = flatten_list(vardim_list)
|
|
181
|
+
|
|
182
|
+
if data_arrays is not None:
|
|
183
|
+
data_arrays = _ensure_list(data_arrays)
|
|
184
|
+
data_arrays = flatten_list(data_arrays)
|
|
185
|
+
|
|
186
|
+
if dimlists is not None:
|
|
187
|
+
dimlists = _ensure_list(dimlists)
|
|
188
|
+
dimlists = flatten_list(dimlists)
|
|
189
|
+
|
|
190
|
+
if dim_dict_list is not None:
|
|
191
|
+
dim_dict_list = _ensure_list(dim_dict_list)
|
|
192
|
+
dim_dict_list = flatten_list(dim_dict_list)
|
|
193
|
+
|
|
194
|
+
if attrs_dict_list is not None:
|
|
195
|
+
attrs_dict_list = _ensure_list(attrs_dict_list)
|
|
196
|
+
attrs_dict_list = flatten_list(attrs_dict_list)
|
|
197
|
+
|
|
198
|
+
# Check if dataset exists
|
|
199
|
+
if data is not None:
|
|
200
|
+
# Call helper if dataset is already created
|
|
201
|
+
_save_ds_as_nc(data, file_name, global_attrs_dict)
|
|
202
|
+
|
|
203
|
+
else:
|
|
204
|
+
# Validate required parameters for dataset construction
|
|
205
|
+
if not all([vardim_list, data_arrays, dimlists, dim_dict_list, attrs_dict_list]):
|
|
206
|
+
raise ValueError("When data is None, all of vardim_list, data_arrays, "
|
|
207
|
+
"dimlists, dim_dict_list, and attrs_dict_list must be provided")
|
|
208
|
+
|
|
209
|
+
# Validate parameter lengths match
|
|
210
|
+
param_lengths = [len(vardim_list), len(data_arrays), len(dimlists),
|
|
211
|
+
len(dim_dict_list), len(attrs_dict_list)]
|
|
212
|
+
if not all(length == param_lengths[0] for length in param_lengths):
|
|
213
|
+
raise ValueError("All parameter lists must have the same length")
|
|
214
|
+
|
|
215
|
+
# Build dataset from components
|
|
216
|
+
try:
|
|
217
|
+
ds = xr.Dataset()
|
|
218
|
+
for vardim, data_array, dimlist, dim_dict, attrs_dict in zip(
|
|
219
|
+
vardim_list, data_arrays, dimlists, dim_dict_list, attrs_dict_list
|
|
220
|
+
):
|
|
221
|
+
|
|
222
|
+
data_array_dict = create_ds_component(vardim,
|
|
223
|
+
data_array,
|
|
224
|
+
dimlist,
|
|
225
|
+
dim_dict,
|
|
226
|
+
attrs_dict)
|
|
227
|
+
ds = ds.merge(data_array_dict)
|
|
228
|
+
except Exception as e:
|
|
229
|
+
raise RuntimeError(f"Failed to build dataset from components: {e}")
|
|
230
|
+
|
|
231
|
+
# Add netCDF file extension ('.nc') if not present
|
|
232
|
+
if get_obj_specs(str(file_name), "ext") != f".{EXTENSIONS[0]}":
|
|
233
|
+
file_name = append_ext(str(file_name), EXTENSIONS[0])
|
|
234
|
+
|
|
235
|
+
# Save to file
|
|
236
|
+
_save_ds_as_nc(ds, file_name, global_attrs_dict)
|
|
237
|
+
print(f"{file_name} file successfully created")
|
|
238
|
+
|
|
239
|
+
# CSV files #
|
|
240
|
+
#~~~~~~~~~~~#
|
|
241
|
+
|
|
242
|
+
def save_nc_as_csv(nc_file: str | xr.Dataset | xr.DataArray | Path,
|
|
243
|
+
columns_to_drop: str | list[str] | None = None,
|
|
244
|
+
separator: str = ",",
|
|
245
|
+
save_index: bool = False,
|
|
246
|
+
save_header: bool = True,
|
|
247
|
+
csv_file_name: str | Path | None = None,
|
|
248
|
+
date_format: str | None = None,
|
|
249
|
+
approximate_coords: bool = False,
|
|
250
|
+
latitude_point: float | None = None,
|
|
251
|
+
longitude_point: float | None = None) -> None:
|
|
252
|
+
"""
|
|
253
|
+
Save netCDF data into a CSV file. The function handles
|
|
254
|
+
3D data variables (typically dependent on time, latitude, longitude)
|
|
255
|
+
and speeds up further data processes.
|
|
256
|
+
|
|
257
|
+
Parameters
|
|
258
|
+
----------
|
|
259
|
+
nc_file : str | Path | xarray.Dataset | xarray.DataArray
|
|
260
|
+
String of the xarray data set file path or the already opened dataset or data array.
|
|
261
|
+
columns_to_drop : str | list[str], optional
|
|
262
|
+
Names of columns to drop. Use "coords" to drop coordinate variables.
|
|
263
|
+
separator : str, default ','
|
|
264
|
+
Separator used in the CSV file.
|
|
265
|
+
save_index : bool, default False
|
|
266
|
+
Whether to include an index column in the CSV.
|
|
267
|
+
save_header : bool, default True
|
|
268
|
+
Whether to include a header row in the CSV.
|
|
269
|
+
csv_file_name : str | Path, optional
|
|
270
|
+
Name of the output CSV file. If None, extracts from nc_file name.
|
|
271
|
+
date_format : str, optional
|
|
272
|
+
Date format to apply if the dataset contains time data.
|
|
273
|
+
approximate_coords : bool, default False
|
|
274
|
+
If True, approximates the nearest latitude/longitude points.
|
|
275
|
+
latitude_point : float, optional
|
|
276
|
+
Latitude point for approximation.
|
|
277
|
+
longitude_point : float, optional
|
|
278
|
+
Longitude point for approximation.
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
None
|
|
283
|
+
Saves a CSV file and prints success confirmation.
|
|
284
|
+
|
|
285
|
+
Raises
|
|
286
|
+
------
|
|
287
|
+
TypeError
|
|
288
|
+
If parameters have incorrect types.
|
|
289
|
+
ValueError
|
|
290
|
+
If required parameters are missing or invalid, or coordinate selection is invalid.
|
|
291
|
+
FileNotFoundError
|
|
292
|
+
If the input file doesn't exist.
|
|
293
|
+
RuntimeError
|
|
294
|
+
If coordinate approximation or file operations fail.
|
|
295
|
+
"""
|
|
296
|
+
# Parameter validation
|
|
297
|
+
if not isinstance(nc_file, (str, xr.Dataset, xr.DataArray, Path)):
|
|
298
|
+
raise TypeError("nc_file must be a string, Path, xarray.Dataset, or xarray.DataArray")
|
|
299
|
+
|
|
300
|
+
if isinstance(nc_file, (str, Path)) and not str(nc_file).strip():
|
|
301
|
+
raise ValueError("File path cannot be empty")
|
|
302
|
+
|
|
303
|
+
if columns_to_drop is not None:
|
|
304
|
+
if isinstance(columns_to_drop, str):
|
|
305
|
+
columns_to_drop = [columns_to_drop] if columns_to_drop != "coords" else columns_to_drop
|
|
306
|
+
elif isinstance(columns_to_drop, list):
|
|
307
|
+
columns_to_drop = flatten_list(columns_to_drop)
|
|
308
|
+
if not all(isinstance(col, str) for col in columns_to_drop):
|
|
309
|
+
raise TypeError("All items in columns_to_drop must be strings")
|
|
310
|
+
else:
|
|
311
|
+
raise TypeError("columns_to_drop must be a string, list of strings, or None")
|
|
312
|
+
|
|
313
|
+
if not isinstance(separator, str) or not separator:
|
|
314
|
+
raise ValueError("separator must be a non-empty string")
|
|
315
|
+
|
|
316
|
+
if not isinstance(save_index, bool):
|
|
317
|
+
raise TypeError("save_index must be a boolean")
|
|
318
|
+
|
|
319
|
+
if not isinstance(save_header, bool):
|
|
320
|
+
raise TypeError("save_header must be a boolean")
|
|
321
|
+
|
|
322
|
+
if csv_file_name is not None and not isinstance(csv_file_name, (str, Path)):
|
|
323
|
+
raise TypeError("csv_file_name must be a string, Path, or None")
|
|
324
|
+
|
|
325
|
+
if csv_file_name is not None and not str(csv_file_name).strip():
|
|
326
|
+
raise ValueError("csv_file_name cannot be empty")
|
|
327
|
+
|
|
328
|
+
if not isinstance(approximate_coords, bool):
|
|
329
|
+
raise TypeError("approximate_coords must be a boolean")
|
|
330
|
+
|
|
331
|
+
if latitude_point is not None and not isinstance(latitude_point, (int, float)):
|
|
332
|
+
raise TypeError("latitude_point must be a number or None")
|
|
333
|
+
|
|
334
|
+
if longitude_point is not None and not isinstance(longitude_point, (int, float)):
|
|
335
|
+
raise TypeError("longitude_point must be a number or None")
|
|
336
|
+
|
|
337
|
+
# Open netCDF data file if passed a string or Path
|
|
338
|
+
if isinstance(nc_file, (str, Path)):
|
|
339
|
+
nc_path = Path(nc_file)
|
|
340
|
+
if not nc_path.exists():
|
|
341
|
+
raise FileNotFoundError(f"NetCDF file not found: {nc_path}")
|
|
342
|
+
|
|
343
|
+
print(f"Opening {nc_file}...")
|
|
344
|
+
try:
|
|
345
|
+
ds = xr.open_dataset(nc_file)
|
|
346
|
+
except Exception as e:
|
|
347
|
+
raise RuntimeError(f"Failed to open netCDF file: {e}")
|
|
348
|
+
else:
|
|
349
|
+
ds = nc_file.copy()
|
|
350
|
+
|
|
351
|
+
try:
|
|
352
|
+
if latitude_point is not None or longitude_point is not None:
|
|
353
|
+
if latitude_point is None or longitude_point is None:
|
|
354
|
+
raise ValueError("Both latitude_point and longitude_point must be provided when using coordinate selection")
|
|
355
|
+
|
|
356
|
+
coord_varlist = find_coordinate_variables(ds)
|
|
357
|
+
lats = ds[coord_varlist[0]]
|
|
358
|
+
lons = ds[coord_varlist[1]]
|
|
359
|
+
|
|
360
|
+
if len(lats) == len(lons) == 1:
|
|
361
|
+
raise ValueError("Object is already point data")
|
|
362
|
+
|
|
363
|
+
# Approximate or select coordinates
|
|
364
|
+
try:
|
|
365
|
+
if approximate_coords:
|
|
366
|
+
lat_idx = abs(lats - latitude_point).argmin()
|
|
367
|
+
lon_idx = abs(lons - longitude_point).argmin()
|
|
368
|
+
coord_idx_kw = {coord_varlist[0]: lat_idx, coord_varlist[1]: lon_idx}
|
|
369
|
+
ds = ds.isel(**coord_idx_kw)
|
|
370
|
+
else:
|
|
371
|
+
coord_idx_kw = {coord_varlist[0]: latitude_point, coord_varlist[1]: longitude_point}
|
|
372
|
+
ds = ds.sel(**coord_idx_kw)
|
|
373
|
+
except Exception as e:
|
|
374
|
+
raise RuntimeError(f"Failed to select coordinates: {e}")
|
|
375
|
+
|
|
376
|
+
# Drop columns if needed
|
|
377
|
+
try:
|
|
378
|
+
if columns_to_drop is None:
|
|
379
|
+
data_frame = ds.to_dataframe().reset_index(drop=False)
|
|
380
|
+
elif columns_to_drop == "coords":
|
|
381
|
+
coord_varlist = find_coordinate_variables(ds)
|
|
382
|
+
data_frame = ds.to_dataframe().reset_index(drop=False).drop(columns=coord_varlist)
|
|
383
|
+
else:
|
|
384
|
+
data_frame = ds.to_dataframe().reset_index(drop=False).drop(columns=columns_to_drop)
|
|
385
|
+
except Exception as e:
|
|
386
|
+
raise RuntimeError(f"Failed to process DataFrame: {e}")
|
|
387
|
+
|
|
388
|
+
# Create CSV file name
|
|
389
|
+
if isinstance(nc_file, (str, Path)) and not csv_file_name:
|
|
390
|
+
csv_file_name = str(nc_file).split(".")[0] + ".csv"
|
|
391
|
+
elif not isinstance(nc_file, (str, Path)) and not csv_file_name:
|
|
392
|
+
raise ValueError("You must provide a CSV file name when input is not a file path.")
|
|
393
|
+
|
|
394
|
+
# Save to CSV
|
|
395
|
+
save2csv(str(csv_file_name), data_frame, separator, save_index, save_header, date_format)
|
|
396
|
+
|
|
397
|
+
except Exception as e:
|
|
398
|
+
if "Failed to" in str(e) or "You must provide" in str(e):
|
|
399
|
+
raise # Re-raise our custom errors
|
|
400
|
+
else:
|
|
401
|
+
raise RuntimeError(f"Unexpected error during CSV conversion: {e}")
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def save_da_as_csv(data_array: xr.DataArray,
|
|
405
|
+
separator: str = ",",
|
|
406
|
+
save_index: bool = False,
|
|
407
|
+
save_header: bool = True,
|
|
408
|
+
csv_file_name: str | Path | None = None,
|
|
409
|
+
new_columns: str | list[str] | None = None,
|
|
410
|
+
date_format: str | None = None) -> None:
|
|
411
|
+
"""
|
|
412
|
+
Save a xarray.DataArray object to a CSV file. Data variables may
|
|
413
|
+
originally be 3D, typically depending on (time, latitude, longitude).
|
|
414
|
+
|
|
415
|
+
Parameters
|
|
416
|
+
----------
|
|
417
|
+
data_array : xarray.DataArray
|
|
418
|
+
DataArray object to save.
|
|
419
|
+
separator : str, default ','
|
|
420
|
+
Separator for the CSV.
|
|
421
|
+
save_index : bool, default False
|
|
422
|
+
Whether to include an index column in the CSV.
|
|
423
|
+
save_header : bool, default True
|
|
424
|
+
Whether to include a header row in the CSV.
|
|
425
|
+
csv_file_name : str | Path, optional
|
|
426
|
+
Name for the CSV file.
|
|
427
|
+
new_columns : str | list[str], optional
|
|
428
|
+
Names for the new columns in the output CSV. Default uses 'time' and variable name.
|
|
429
|
+
date_format : str, optional
|
|
430
|
+
Date format for time data, if present.
|
|
431
|
+
|
|
432
|
+
Returns
|
|
433
|
+
-------
|
|
434
|
+
None
|
|
435
|
+
Saves a CSV file and prints success confirmation.
|
|
436
|
+
|
|
437
|
+
Raises
|
|
438
|
+
------
|
|
439
|
+
TypeError
|
|
440
|
+
If parameters have incorrect types.
|
|
441
|
+
ValueError
|
|
442
|
+
If required parameters are missing or invalid.
|
|
443
|
+
RuntimeError
|
|
444
|
+
If DataFrame processing or file operations fail.
|
|
445
|
+
"""
|
|
446
|
+
# Parameter validation
|
|
447
|
+
if not isinstance(data_array, xr.DataArray):
|
|
448
|
+
raise TypeError("data_array must be an xarray.DataArray")
|
|
449
|
+
|
|
450
|
+
if not isinstance(separator, str) or not separator:
|
|
451
|
+
raise ValueError("separator must be a non-empty string")
|
|
452
|
+
|
|
453
|
+
if not isinstance(save_index, bool):
|
|
454
|
+
raise TypeError("save_index must be a boolean")
|
|
455
|
+
|
|
456
|
+
if not isinstance(save_header, bool):
|
|
457
|
+
raise TypeError("save_header must be a boolean")
|
|
458
|
+
|
|
459
|
+
if csv_file_name is None:
|
|
460
|
+
raise ValueError("You must provide a CSV file name.")
|
|
461
|
+
|
|
462
|
+
if not isinstance(csv_file_name, (str, Path)) or not str(csv_file_name).strip():
|
|
463
|
+
raise ValueError("csv_file_name must be a non-empty string or Path")
|
|
464
|
+
|
|
465
|
+
if new_columns is not None:
|
|
466
|
+
if isinstance(new_columns, str):
|
|
467
|
+
new_columns = [new_columns]
|
|
468
|
+
elif isinstance(new_columns, list):
|
|
469
|
+
new_columns = flatten_list(new_columns)
|
|
470
|
+
if not all(isinstance(col, str) and col.strip() for col in new_columns):
|
|
471
|
+
raise ValueError("All column names must be non-empty strings")
|
|
472
|
+
else:
|
|
473
|
+
raise TypeError("new_columns must be a string, list of strings, or None")
|
|
474
|
+
|
|
475
|
+
try:
|
|
476
|
+
# Convert to pandas DataFrame
|
|
477
|
+
data_frame = data_array.to_dataframe().reset_index(drop=False)
|
|
478
|
+
|
|
479
|
+
# Rename the columns based on the provided new_columns
|
|
480
|
+
if not new_columns:
|
|
481
|
+
date_key = find_dt_key(data_array)
|
|
482
|
+
new_columns = [date_key, data_array.name if data_array.name else "value"]
|
|
483
|
+
|
|
484
|
+
if len(new_columns) != len(data_frame.columns):
|
|
485
|
+
raise ValueError(f"Number of new column names ({len(new_columns)}) "
|
|
486
|
+
f"must match number of DataFrame columns ({len(data_frame.columns)})")
|
|
487
|
+
|
|
488
|
+
data_frame.columns = new_columns
|
|
489
|
+
|
|
490
|
+
# Save to CSV
|
|
491
|
+
save2csv(str(csv_file_name), data_frame, separator, save_index, save_header, date_format)
|
|
492
|
+
|
|
493
|
+
except Exception as e:
|
|
494
|
+
if "must match" in str(e) or "You must provide" in str(e):
|
|
495
|
+
raise # Re-raise our custom errors
|
|
496
|
+
else:
|
|
497
|
+
raise RuntimeError(f"Failed to process DataArray or save to CSV: {e}")
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
# Helpers #
|
|
501
|
+
#---------#
|
|
502
|
+
|
|
503
|
+
# Helper function to save an existing dataset with optional attribute updates
|
|
504
|
+
def _save_ds_as_nc(xarray_ds: xr.Dataset, file_name: str | Path, attrs_dict: dict | None = None) -> None:
|
|
505
|
+
"""
|
|
506
|
+
Helper function to save an xarray Dataset to netCDF format.
|
|
507
|
+
|
|
508
|
+
Parameters
|
|
509
|
+
----------
|
|
510
|
+
xarray_ds : xarray.Dataset
|
|
511
|
+
Dataset to save.
|
|
512
|
+
file_name : str | Path
|
|
513
|
+
Output file name.
|
|
514
|
+
attrs_dict : dict, optional
|
|
515
|
+
Global attributes to add to the dataset.
|
|
516
|
+
|
|
517
|
+
Raises
|
|
518
|
+
------
|
|
519
|
+
TypeError
|
|
520
|
+
If parameters have incorrect types.
|
|
521
|
+
ValueError
|
|
522
|
+
If file_name is empty.
|
|
523
|
+
RuntimeError
|
|
524
|
+
If file saving fails.
|
|
525
|
+
"""
|
|
526
|
+
if not isinstance(xarray_ds, xr.Dataset):
|
|
527
|
+
raise TypeError("xarray_ds must be an xarray.Dataset")
|
|
528
|
+
|
|
529
|
+
if not isinstance(file_name, (str, Path)) or not str(file_name).strip():
|
|
530
|
+
raise ValueError("file_name must be a non-empty string or Path")
|
|
531
|
+
|
|
532
|
+
if attrs_dict is not None and not isinstance(attrs_dict, dict):
|
|
533
|
+
raise TypeError("attrs_dict must be a dictionary or None")
|
|
534
|
+
|
|
535
|
+
if attrs_dict:
|
|
536
|
+
xarray_ds.attrs = attrs_dict
|
|
537
|
+
|
|
538
|
+
# Add netCDF file extension ('.nc') if not present
|
|
539
|
+
file_name_str = str(file_name)
|
|
540
|
+
if get_obj_specs(file_name_str, "ext") != ".nc":
|
|
541
|
+
file_name_str += ".nc"
|
|
542
|
+
|
|
543
|
+
# Save to file
|
|
544
|
+
try:
|
|
545
|
+
xarray_ds.to_netcdf(file_name_str, mode="w", format="NETCDF4")
|
|
546
|
+
print(f"{file_name_str} has been successfully created")
|
|
547
|
+
except Exception as e:
|
|
548
|
+
raise RuntimeError(f"Failed to save netCDF file: {e}")
|
|
549
|
+
|
|
550
|
+
def _ensure_list(arg: Any) -> list:
|
|
551
|
+
"""
|
|
552
|
+
Helper function to ensure argument is a list.
|
|
553
|
+
|
|
554
|
+
Parameters
|
|
555
|
+
----------
|
|
556
|
+
arg : Any
|
|
557
|
+
Argument to convert to list if not already a list.
|
|
558
|
+
|
|
559
|
+
Returns
|
|
560
|
+
-------
|
|
561
|
+
list
|
|
562
|
+
The argument as a list.
|
|
563
|
+
"""
|
|
564
|
+
return arg if isinstance(arg, list) else [arg]
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
#--------------------------#
|
|
568
|
+
# Parameters and constants #
|
|
569
|
+
#--------------------------#
|
|
570
|
+
|
|
571
|
+
# File extensions #
|
|
572
|
+
EXTENSIONS = ["nc", "csv"]
|
|
573
|
+
|
|
574
|
+
# Valid netCDF file formats #
|
|
575
|
+
NC_FILE_FORMATS = ["NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC"]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: climarraykit
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Climate-focused xarray toolkit for NetCDF/GRIB handling, coordinate discovery, and dataset serialisation
|
|
5
|
+
Author-email: Jon Ander Gabantxo <jagabantxo@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 climarraykit
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/EusDancerDev/climarraykit
|
|
29
|
+
Project-URL: Documentation, https://github.com/EusDancerDev/climarraykit#readme
|
|
30
|
+
Project-URL: Repository, https://github.com/EusDancerDev/climarraykit.git
|
|
31
|
+
Project-URL: Bug Reports, https://github.com/EusDancerDev/climarraykit/issues
|
|
32
|
+
Keywords: xarray,climate,netcdf,grib,meteorology,climatology
|
|
33
|
+
Classifier: Development Status :: 3 - Alpha
|
|
34
|
+
Classifier: Intended Audience :: Science/Research
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
39
|
+
Classifier: Operating System :: OS Independent
|
|
40
|
+
Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
|
|
41
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
42
|
+
Requires-Python: >=3.10
|
|
43
|
+
Description-Content-Type: text/markdown
|
|
44
|
+
License-File: LICENSE
|
|
45
|
+
Requires-Dist: numpy>=2.2.3
|
|
46
|
+
Requires-Dist: pandas>=2.2.3
|
|
47
|
+
Requires-Dist: xarray>=2024.2.0
|
|
48
|
+
Requires-Dist: filewise>=3.13.0
|
|
49
|
+
Requires-Dist: pygenutils>=16.4.0
|
|
50
|
+
Requires-Dist: paramlib>=3.4.10
|
|
51
|
+
Provides-Extra: grib
|
|
52
|
+
Requires-Dist: cfgrib>=0.9.10; extra == "grib"
|
|
53
|
+
Provides-Extra: regrid
|
|
54
|
+
Requires-Dist: xesmf>=0.8.0; extra == "regrid"
|
|
55
|
+
Provides-Extra: dev
|
|
56
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
57
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
58
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
59
|
+
Requires-Dist: isort>=5.0.0; extra == "dev"
|
|
60
|
+
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
|
61
|
+
Dynamic: license-file
|
|
62
|
+
|
|
63
|
+
# climarraykit
|
|
64
|
+
|
|
65
|
+
`climarraykit` is a dedicated xarray-centric toolkit for climate and weather data workflows.
|
|
66
|
+
|
|
67
|
+
It is introduced to decouple heavy xarray/netCDF functionality from broader utility packages
|
|
68
|
+
where those dependencies are not always needed.
|
|
69
|
+
|
|
70
|
+
## Scope
|
|
71
|
+
|
|
72
|
+
- netCDF file scanning and integrity checks
|
|
73
|
+
- coordinate and dimension discovery utilities
|
|
74
|
+
- GRIB-to-netCDF conversion helpers
|
|
75
|
+
- xarray Dataset/DataArray creation and serialisation helpers
|
|
76
|
+
- climate-oriented xarray data manipulation helpers
|
|
77
|
+
|
|
78
|
+
## Transitional compatibility
|
|
79
|
+
|
|
80
|
+
This initial release preserves behaviour by exposing the same module-level APIs as the
|
|
81
|
+
legacy `filewise.xarray_utils` package while enabling downstream projects to migrate imports
|
|
82
|
+
to `climarraykit`.
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
Current version: **0.2.0** (see [CHANGELOG.md](CHANGELOG.md)).
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
climarraykit/__init__.py,sha256=xQvdKC3uqFS4WhKgxK7T-dB8yDRrnTqXzQ_OKKeYnX0,189
|
|
2
|
+
climarraykit/conversions.py,sha256=rVXavDimSZ3uWzlLPG0_Hd8YplaH8l-ghV1e8B_FZj0,8094
|
|
3
|
+
climarraykit/data_manipulation.py,sha256=gS4CSBL2tfiJLTz_LCtCfrQkO5WgBMBt1xtRnDBRjek,14035
|
|
4
|
+
climarraykit/file_utils.py,sha256=ML-aDtl04Fmo-v6wZBdke-rHKpInrsOL-tvVeKyRmVE,9687
|
|
5
|
+
climarraykit/patterns.py,sha256=O59gZvF2JxnsIPcZ6E5014D4WdnFY_hWARbvhXSDi5A,21344
|
|
6
|
+
climarraykit/xarray_obj_handler.py,sha256=rCeX1ReA1dUR4i1vApXfw5Li72D3uBTB-VQOVxWgDSg,22386
|
|
7
|
+
climarraykit-0.2.0.dist-info/licenses/LICENSE,sha256=vkQej5GYK0WzkANVWBuaw_h2ap6amFnA1OoGef8sUds,1069
|
|
8
|
+
climarraykit-0.2.0.dist-info/METADATA,sha256=DtNAtC_3CkNy5HfKgirqvph6ZY2g1a1-V9VhPqOXeV4,3796
|
|
9
|
+
climarraykit-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
10
|
+
climarraykit-0.2.0.dist-info/top_level.txt,sha256=wONDIV9uoNEYo0-K5nRCMTQ6VCpD2nCtVpj5p2ocHNA,13
|
|
11
|
+
climarraykit-0.2.0.dist-info/RECORD,,
|