climarraykit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climarraykit/__init__.py +12 -0
- climarraykit/conversions.py +208 -0
- climarraykit/data_manipulation.py +386 -0
- climarraykit/file_utils.py +309 -0
- climarraykit/patterns.py +616 -0
- climarraykit/xarray_obj_handler.py +575 -0
- climarraykit-0.2.0.dist-info/METADATA +86 -0
- climarraykit-0.2.0.dist-info/RECORD +11 -0
- climarraykit-0.2.0.dist-info/WHEEL +5 -0
- climarraykit-0.2.0.dist-info/licenses/LICENSE +21 -0
- climarraykit-0.2.0.dist-info/top_level.txt +1 -0
climarraykit/__init__.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
#----------------#
|
|
5
|
+
# Import modules #
|
|
6
|
+
#----------------#
|
|
7
|
+
|
|
8
|
+
import xarray as xr
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
#------------------------#
|
|
12
|
+
# Import project modules #
|
|
13
|
+
#------------------------#
|
|
14
|
+
|
|
15
|
+
from climarraykit.xarray_obj_handler import _save_ds_as_nc
|
|
16
|
+
from paramlib.global_parameters import CLIMATE_FILE_EXTENSIONS
|
|
17
|
+
from pygenutils.arrays_and_lists.conversions import flatten_to_string
|
|
18
|
+
from pygenutils.arrays_and_lists.data_manipulation import flatten_list
|
|
19
|
+
from pygenutils.operative_systems.os_operations import exit_info, run_system_command
|
|
20
|
+
from pygenutils.strings.string_handler import (
|
|
21
|
+
find_substring_index,
|
|
22
|
+
get_obj_specs,
|
|
23
|
+
modify_obj_specs
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
#------------------#
|
|
27
|
+
# Define functions #
|
|
28
|
+
#------------------#
|
|
29
|
+
|
|
30
|
+
# Xarray objects #
|
|
31
|
+
#----------------#
|
|
32
|
+
|
|
33
|
+
def grib2nc(
|
|
34
|
+
grib_file_list: str | list[str],
|
|
35
|
+
on_shell: bool = False,
|
|
36
|
+
option_str: str | None = None,
|
|
37
|
+
capture_output: bool = False,
|
|
38
|
+
return_output_name: bool = False,
|
|
39
|
+
encoding: str = "utf-8",
|
|
40
|
+
shell: bool = True) -> None:
|
|
41
|
+
"""
|
|
42
|
+
Converts a GRIB file or list of GRIB files to netCDF format. The conversion
|
|
43
|
+
can be executed either via shell commands or programmatically using xarray.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
grib_file_list : str | list[str]
|
|
48
|
+
The file path(s) of the GRIB file(s) to be converted.
|
|
49
|
+
on_shell : bool, optional
|
|
50
|
+
If True, the conversion will be handled through shell commands using
|
|
51
|
+
the 'grib_to_netcdf' tool. If False, the conversion will be done
|
|
52
|
+
programmatically using xarray.
|
|
53
|
+
option_str : str, optional
|
|
54
|
+
Additional options to pass to the shell command for 'grib_to_netcdf'.
|
|
55
|
+
This parameter is only used if 'on_shell' is set to True.
|
|
56
|
+
capture_output : bool, optional
|
|
57
|
+
Whether to capture the command output. Default is False.
|
|
58
|
+
return_output_name : bool, optional
|
|
59
|
+
Whether to return file descriptor names. Default is False.
|
|
60
|
+
encoding : str, optional
|
|
61
|
+
Encoding to use when decoding command output. Default is "utf-8".
|
|
62
|
+
shell : bool, optional
|
|
63
|
+
Whether to execute the command through the shell. Default is True.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
None
|
|
68
|
+
Converts the GRIB file(s) to netCDF format and saves the output
|
|
69
|
+
netCDF file(s) in the same directory as the GRIB files.
|
|
70
|
+
|
|
71
|
+
Raises
|
|
72
|
+
------
|
|
73
|
+
TypeError
|
|
74
|
+
If grib_file_list is not str or list of str.
|
|
75
|
+
ValueError
|
|
76
|
+
If any GRIB file path is invalid or empty.
|
|
77
|
+
FileNotFoundError
|
|
78
|
+
If any GRIB file doesn't exist.
|
|
79
|
+
|
|
80
|
+
Notes
|
|
81
|
+
-----
|
|
82
|
+
- When 'on_shell' is True, the function builds and runs a shell command
|
|
83
|
+
that calls the 'grib_to_netcdf' tool, with optional flags.
|
|
84
|
+
- When 'on_shell' is False, xarray is used to directly open the GRIB file
|
|
85
|
+
and convert it to netCDF format.
|
|
86
|
+
- The function will prompt for input in the case of multiple GRIB files if
|
|
87
|
+
'on_shell' is True.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
# Parameter validation
|
|
91
|
+
if not isinstance(grib_file_list, (str, list)):
|
|
92
|
+
raise TypeError("grib_file_list must be a string or list of strings")
|
|
93
|
+
|
|
94
|
+
# Flatten nested lists for defensive programming
|
|
95
|
+
if isinstance(grib_file_list, list):
|
|
96
|
+
grib_file_list = flatten_list(grib_file_list)
|
|
97
|
+
|
|
98
|
+
# Validate all items are strings
|
|
99
|
+
if not all(isinstance(item, str) for item in grib_file_list):
|
|
100
|
+
raise TypeError("All items in grib_file_list must be strings")
|
|
101
|
+
|
|
102
|
+
# Check for empty strings
|
|
103
|
+
if not all(item.strip() for item in grib_file_list):
|
|
104
|
+
raise ValueError("All GRIB file paths must be non-empty strings")
|
|
105
|
+
else:
|
|
106
|
+
# Single string validation
|
|
107
|
+
if not isinstance(grib_file_list, str) or not grib_file_list.strip():
|
|
108
|
+
raise ValueError("GRIB file path must be a non-empty string")
|
|
109
|
+
|
|
110
|
+
# Check file existence
|
|
111
|
+
files_to_check = [grib_file_list] if isinstance(grib_file_list, str) else grib_file_list
|
|
112
|
+
for grib_file in files_to_check:
|
|
113
|
+
if not Path(grib_file).exists():
|
|
114
|
+
raise FileNotFoundError(f"GRIB file not found: {grib_file}")
|
|
115
|
+
|
|
116
|
+
# Check if file has expected GRIB extension
|
|
117
|
+
if not any(grib_file.lower().endswith(ext.lower()) for ext in ['.grib', '.grb', '.grib2', '.grb2']):
|
|
118
|
+
print(f"Warning: File {grib_file} may not be a GRIB file based on extension")
|
|
119
|
+
|
|
120
|
+
# Shell-based conversion #
|
|
121
|
+
#-#-#-#-#-#-#-#-#-#-#-#-#
|
|
122
|
+
|
|
123
|
+
if on_shell:
|
|
124
|
+
# Handle single GRIB file
|
|
125
|
+
if isinstance(grib_file_list, str):
|
|
126
|
+
nc_file_new = modify_obj_specs(grib_file_list, "ext", EXTENSIONS[0])
|
|
127
|
+
|
|
128
|
+
# Handle list of GRIB files
|
|
129
|
+
else:
|
|
130
|
+
grib_allfile_info_str = flatten_to_string(grib_file_list)
|
|
131
|
+
|
|
132
|
+
# Prompt user for the netCDF file name without extension
|
|
133
|
+
nc_file_new_noext = input("Please introduce a name "
|
|
134
|
+
"for the netCDF file, "
|
|
135
|
+
"WITHOUT THE EXTENSION: ")
|
|
136
|
+
|
|
137
|
+
# Validate the file name using RegEx
|
|
138
|
+
allowed_minimum_char_idx = find_substring_index(nc_file_new_noext,
|
|
139
|
+
REGEX_GRIB2NC,
|
|
140
|
+
advanced_search=True)
|
|
141
|
+
|
|
142
|
+
while allowed_minimum_char_idx == -1:
|
|
143
|
+
print("Invalid file name.\nIt can contain alphanumeric characters, "
|
|
144
|
+
"as well as the following non-word characters: {. _ -}")
|
|
145
|
+
nc_file_new_noext = input("Please introduce a valid name: ")
|
|
146
|
+
allowed_minimum_char_idx = find_substring_index(nc_file_new_noext,
|
|
147
|
+
REGEX_GRIB2NC,
|
|
148
|
+
advanced_search=True)
|
|
149
|
+
|
|
150
|
+
# Modify the file name to have the .nc extension
|
|
151
|
+
nc_file_new = modify_obj_specs(nc_file_new_noext,
|
|
152
|
+
obj2modify="ext",
|
|
153
|
+
new_obj=EXTENSIONS[0])
|
|
154
|
+
|
|
155
|
+
# Construct the shell command for conversion
|
|
156
|
+
grib2nc_template = "grib_to_netcdf "
|
|
157
|
+
if option_str:
|
|
158
|
+
grib2nc_template += f"{option_str} "
|
|
159
|
+
grib2nc_template += f"-o {nc_file_new} {grib_allfile_info_str}"
|
|
160
|
+
|
|
161
|
+
# Execute the shell command
|
|
162
|
+
try:
|
|
163
|
+
process_exit_info = run_system_command(
|
|
164
|
+
grib2nc_template,
|
|
165
|
+
capture_output=capture_output,
|
|
166
|
+
return_output_name=return_output_name,
|
|
167
|
+
encoding=encoding,
|
|
168
|
+
shell=shell
|
|
169
|
+
)
|
|
170
|
+
# Call exit_info with parameters based on capture_output
|
|
171
|
+
exit_info(
|
|
172
|
+
process_exit_info,
|
|
173
|
+
check_stdout=True,
|
|
174
|
+
check_stderr=True,
|
|
175
|
+
check_return_code=True
|
|
176
|
+
)
|
|
177
|
+
except Exception as e:
|
|
178
|
+
raise RuntimeError(f"Shell command execution failed: {e}")
|
|
179
|
+
|
|
180
|
+
# Programmatic conversion #
|
|
181
|
+
#-#-#-#-#-#-#-#-#-#-#-#-#-#
|
|
182
|
+
|
|
183
|
+
else:
|
|
184
|
+
# Ensure grib_file_list is a list
|
|
185
|
+
if isinstance(grib_file_list, str):
|
|
186
|
+
grib_file_list = [grib_file_list]
|
|
187
|
+
|
|
188
|
+
# Convert each GRIB file in the list to netCDF
|
|
189
|
+
for grib_file in grib_file_list:
|
|
190
|
+
try:
|
|
191
|
+
grib_file_noext = get_obj_specs(grib_file, "name_noext", EXTENSIONS[0])
|
|
192
|
+
ds = xr.open_dataset(grib_file, engine="cfgrib")
|
|
193
|
+
_save_ds_as_nc(ds, grib_file_noext)
|
|
194
|
+
print(f"Successfully converted {grib_file} to netCDF format")
|
|
195
|
+
except Exception as e:
|
|
196
|
+
print(f"Error converting {grib_file}: {e}")
|
|
197
|
+
raise
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
#--------------------------#
|
|
201
|
+
# Parameters and constants #
|
|
202
|
+
#--------------------------#
|
|
203
|
+
|
|
204
|
+
# Valid file extensions #
|
|
205
|
+
EXTENSIONS = CLIMATE_FILE_EXTENSIONS[::3]
|
|
206
|
+
|
|
207
|
+
# RegEx control for GRIB-to-netCDF single file name #
|
|
208
|
+
REGEX_GRIB2NC = r"^[a-zA-Z\d\._-]$"
|
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
#! /usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
#----------------#
|
|
5
|
+
# Import modules #
|
|
6
|
+
#----------------#
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
#------------------------#
|
|
12
|
+
# Import project modules #
|
|
13
|
+
#------------------------#
|
|
14
|
+
|
|
15
|
+
from filewise.file_operations.ops_handler import move_files
|
|
16
|
+
from filewise.file_operations.path_utils import find_dirs_with_files, find_files
|
|
17
|
+
from climarraykit.file_utils import ncfile_integrity_status
|
|
18
|
+
from climarraykit.patterns import (
|
|
19
|
+
find_coordinate_variables,
|
|
20
|
+
get_latlon_bounds,
|
|
21
|
+
get_latlon_deltas,
|
|
22
|
+
get_times
|
|
23
|
+
)
|
|
24
|
+
from paramlib.global_parameters import CLIMATE_FILE_EXTENSIONS
|
|
25
|
+
from pygenutils.strings.text_formatters import format_string, string_underliner
|
|
26
|
+
from pygenutils.time_handling.date_and_time_utils import find_dt_key
|
|
27
|
+
|
|
28
|
+
#-------------------------#
|
|
29
|
+
# Define custom functions #
|
|
30
|
+
#-------------------------#
|
|
31
|
+
|
|
32
|
+
# Data extractors #
|
|
33
|
+
#-----------------#
|
|
34
|
+
|
|
35
|
+
def extract_latlon_bounds(delta_decimal_places: int, value_decimal_places: int) -> None:
|
|
36
|
+
"""
|
|
37
|
+
Extract latitude and longitude bounds from netCDF files.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
delta_decimal_places : int
|
|
42
|
+
Number of decimal places to round off the delta between latitude and longitude points.
|
|
43
|
+
value_decimal_places : int
|
|
44
|
+
Number of decimal places to round off the latitude and longitude values.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
None
|
|
49
|
+
|
|
50
|
+
Raises
|
|
51
|
+
------
|
|
52
|
+
TypeError
|
|
53
|
+
If decimal places parameters are not integers.
|
|
54
|
+
ValueError
|
|
55
|
+
If decimal places parameters are negative.
|
|
56
|
+
|
|
57
|
+
Notes
|
|
58
|
+
-----
|
|
59
|
+
- The extracted latitude and longitude arrays, their dimensions,
|
|
60
|
+
and deltas are saved in a report file.
|
|
61
|
+
- If any files are faulty or cannot be processed, relevant error information
|
|
62
|
+
is recorded in the report.
|
|
63
|
+
"""
|
|
64
|
+
# Parameter validation
|
|
65
|
+
if not isinstance(delta_decimal_places, int):
|
|
66
|
+
raise TypeError("delta_decimal_places must be an integer")
|
|
67
|
+
|
|
68
|
+
if not isinstance(value_decimal_places, int):
|
|
69
|
+
raise TypeError("value_decimal_places must be an integer")
|
|
70
|
+
|
|
71
|
+
if delta_decimal_places < 0:
|
|
72
|
+
raise ValueError("delta_decimal_places must be non-negative")
|
|
73
|
+
|
|
74
|
+
if value_decimal_places < 0:
|
|
75
|
+
raise ValueError("value_decimal_places must be non-negative")
|
|
76
|
+
|
|
77
|
+
nc_dirs = find_dirs_with_files(EXTENSIONS[0], search_path=CODE_CALL_DIR)
|
|
78
|
+
|
|
79
|
+
for dir_num, dir_name in enumerate(nc_dirs, start=1):
|
|
80
|
+
nc_files = find_files(EXTENSIONS[0], dir_name, match_type="ext", top_path_only=True)
|
|
81
|
+
|
|
82
|
+
with open(COORD_INFO_FNAME, "w") as report:
|
|
83
|
+
if nc_files:
|
|
84
|
+
for file_num, nc_file in enumerate(nc_files, start=1):
|
|
85
|
+
print(f"Processing file {file_num} out of {len(nc_files)} "
|
|
86
|
+
f"in directory {dir_num} out of {len(nc_dirs)}...")
|
|
87
|
+
report.write(format_string(string_underliner(DIR_INFO_TEMPLATE, dir_name), "+"))
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
ncfile_integrity_status(nc_file)
|
|
91
|
+
except Exception as ncf_err:
|
|
92
|
+
report.write(f"FAULTY FILE '{nc_file}': {ncf_err}\n")
|
|
93
|
+
else:
|
|
94
|
+
try:
|
|
95
|
+
coord_vars = find_coordinate_variables(nc_file)
|
|
96
|
+
except Exception as coord_err:
|
|
97
|
+
report.write(f"ERROR IN FILE '{nc_file}': {coord_err}\n")
|
|
98
|
+
else:
|
|
99
|
+
try:
|
|
100
|
+
lats, lons = get_latlon_bounds(nc_file, coord_vars[0], coord_vars[1], value_decimal_places)
|
|
101
|
+
lat_delta, lon_delta = get_latlon_deltas(lats, lons, delta_decimal_places)
|
|
102
|
+
|
|
103
|
+
format_args_latlon_bounds = (
|
|
104
|
+
nc_file,
|
|
105
|
+
lats,
|
|
106
|
+
lons,
|
|
107
|
+
len(lats),
|
|
108
|
+
len(lons),
|
|
109
|
+
lat_delta,
|
|
110
|
+
lon_delta
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
report.write(format_string(LATLON_INFO_TEMPLATE, format_args_latlon_bounds))
|
|
114
|
+
except Exception as e:
|
|
115
|
+
report.write(f"ERROR PROCESSING COORDINATES IN FILE '{nc_file}': {e}\n")
|
|
116
|
+
|
|
117
|
+
move_files(COORD_INFO_FNAME,
|
|
118
|
+
input_directories=".",
|
|
119
|
+
destination_directories=dir_name,
|
|
120
|
+
match_type="glob")
|
|
121
|
+
else:
|
|
122
|
+
report.write(f"No netCDF files in directory {dir_name}\n")
|
|
123
|
+
move_files(COORD_INFO_FNAME,
|
|
124
|
+
input_directories=".",
|
|
125
|
+
destination_directories=dir_name,
|
|
126
|
+
match_type="glob")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def extract_time_bounds() -> None:
|
|
130
|
+
"""
|
|
131
|
+
Extract the time bounds (start and end times) from netCDF files.
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
None
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
None
|
|
140
|
+
|
|
141
|
+
Notes
|
|
142
|
+
-----
|
|
143
|
+
- The time range (start and end times) and the total number of time records
|
|
144
|
+
are saved in a report file.
|
|
145
|
+
- If any files are faulty or cannot be processed, relevant error information
|
|
146
|
+
is recorded in the report.
|
|
147
|
+
"""
|
|
148
|
+
nc_dirs = find_dirs_with_files(EXTENSIONS[0], search_path=CODE_CALL_DIR)
|
|
149
|
+
|
|
150
|
+
for dir_num, dir_name in enumerate(nc_dirs, start=1):
|
|
151
|
+
nc_files = find_files(EXTENSIONS[0], dir_name, match_type="ext", top_path_only=True)
|
|
152
|
+
|
|
153
|
+
with open(DATE_RANGE_INFO_FNAME, "w") as report:
|
|
154
|
+
if nc_files:
|
|
155
|
+
for file_num, nc_file in enumerate(nc_files, start=1):
|
|
156
|
+
print(f"Processing file {file_num} out of {len(nc_files)} "
|
|
157
|
+
f"in directory {dir_num} out of {len(nc_dirs)}...")
|
|
158
|
+
report.write(format_string(string_underliner(DIR_INFO_TEMPLATE, dir_name), "+"))
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
ncfile_integrity_status(nc_file)
|
|
162
|
+
except Exception as ncf_err:
|
|
163
|
+
report.write(f"FAULTY FILE '{nc_file}': {ncf_err}\n")
|
|
164
|
+
else:
|
|
165
|
+
try:
|
|
166
|
+
time_var = find_dt_key(nc_file)
|
|
167
|
+
except Exception as time_err:
|
|
168
|
+
report.write(f"ERROR IN FILE '{nc_file}': {time_err}\n")
|
|
169
|
+
else:
|
|
170
|
+
try:
|
|
171
|
+
times = get_times(nc_file, time_var)
|
|
172
|
+
format_args_time_periods = (
|
|
173
|
+
nc_file,
|
|
174
|
+
times[0].values,
|
|
175
|
+
times[-1].values,
|
|
176
|
+
len(times)
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
report.write(format_string(PERIOD_INFO_TEMPLATE, format_args_time_periods))
|
|
180
|
+
except Exception as e:
|
|
181
|
+
report.write(f"ERROR PROCESSING TIME DATA IN FILE '{nc_file}': {e}\n")
|
|
182
|
+
|
|
183
|
+
move_files(DATE_RANGE_INFO_FNAME,
|
|
184
|
+
input_directories=".",
|
|
185
|
+
destination_directories=dir_name,
|
|
186
|
+
match_type="glob")
|
|
187
|
+
else:
|
|
188
|
+
report.write(f"No netCDF files in directory {dir_name}\n")
|
|
189
|
+
move_files(DATE_RANGE_INFO_FNAME,
|
|
190
|
+
input_directories=".",
|
|
191
|
+
destination_directories=dir_name,
|
|
192
|
+
match_type="glob")
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def extract_time_formats() -> None:
|
|
196
|
+
"""
|
|
197
|
+
Extract the time formats from netCDF files.
|
|
198
|
+
|
|
199
|
+
Parameters
|
|
200
|
+
----------
|
|
201
|
+
None
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
None
|
|
206
|
+
|
|
207
|
+
Notes
|
|
208
|
+
-----
|
|
209
|
+
- The extracted time formats and the total number of time records are saved
|
|
210
|
+
in a report file.
|
|
211
|
+
- If any files are faulty or cannot be processed, relevant error information
|
|
212
|
+
is recorded in the report.
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
nc_dirs = find_dirs_with_files(EXTENSIONS[0], search_path=CODE_CALL_DIR)
|
|
216
|
+
|
|
217
|
+
for dir_num, dir_name in enumerate(nc_dirs, start=1):
|
|
218
|
+
nc_files = find_files(EXTENSIONS[0], dir_name, match_type="ext", top_path_only=True)
|
|
219
|
+
|
|
220
|
+
with open(TIME_FORMATS_FILE_NAME, "w") as report:
|
|
221
|
+
if nc_files:
|
|
222
|
+
for file_num, nc_file in enumerate(nc_files, start=1):
|
|
223
|
+
print(f"Processing file {file_num} out of {len(nc_files)} "
|
|
224
|
+
f"in directory {dir_num} out of {len(nc_dirs)}...")
|
|
225
|
+
report.write(format_string(string_underliner(DIR_INFO_TEMPLATE, dir_name), "+"))
|
|
226
|
+
|
|
227
|
+
try:
|
|
228
|
+
ncfile_integrity_status(nc_file)
|
|
229
|
+
except Exception as ncf_err:
|
|
230
|
+
report.write(f"FAULTY FILE '{nc_file}': {ncf_err}\n")
|
|
231
|
+
else:
|
|
232
|
+
try:
|
|
233
|
+
time_var = find_dt_key(nc_file)
|
|
234
|
+
except Exception as time_err:
|
|
235
|
+
report.write(f"ERROR IN FILE '{nc_file}': {time_err}\n")
|
|
236
|
+
else:
|
|
237
|
+
try:
|
|
238
|
+
times = get_times(nc_file, time_var)
|
|
239
|
+
format_args_time_formats = (
|
|
240
|
+
nc_file,
|
|
241
|
+
times.values,
|
|
242
|
+
len(times)
|
|
243
|
+
)
|
|
244
|
+
report.write(format_string(TIME_FORMAT_INFO_TEMPLATE, format_args_time_formats))
|
|
245
|
+
except Exception as e:
|
|
246
|
+
report.write(f"ERROR PROCESSING TIME FORMATS IN FILE '{nc_file}': {e}\n")
|
|
247
|
+
|
|
248
|
+
move_files(TIME_FORMATS_FILE_NAME,
|
|
249
|
+
input_directories=".",
|
|
250
|
+
destination_directories=dir_name,
|
|
251
|
+
match_type="glob")
|
|
252
|
+
else:
|
|
253
|
+
report.write(f"No netCDF files in directory {dir_name}\n")
|
|
254
|
+
move_files(TIME_FORMATS_FILE_NAME,
|
|
255
|
+
input_directories=".",
|
|
256
|
+
destination_directories=dir_name,
|
|
257
|
+
match_type="glob")
|
|
258
|
+
|
|
259
|
+
# File regridding #
|
|
260
|
+
#-----------------#
|
|
261
|
+
|
|
262
|
+
def netcdf_regridder(ds_in: 'xr.Dataset', ds_image: 'xr.Dataset', regrid_method: str = "bilinear") -> 'xr.Dataset':
|
|
263
|
+
"""
|
|
264
|
+
Function that regrids a xarray Dataset to that of the desired Dataset.
|
|
265
|
+
It is similar to CDO but more intuitive and
|
|
266
|
+
easier to understand, supported by Python.
|
|
267
|
+
|
|
268
|
+
Parameters
|
|
269
|
+
----------
|
|
270
|
+
ds_in : xarray.Dataset
|
|
271
|
+
Input xarray data set
|
|
272
|
+
ds_image : xarray.Dataset
|
|
273
|
+
Xarray data set with grid specifications to which apply on ds_in.
|
|
274
|
+
regrid_method : {'bilinear', 'conservative', 'nearest_s2d', 'nearest_d2s', 'patch'}
|
|
275
|
+
Regridding method. Defaults 'bilinear'.
|
|
276
|
+
|
|
277
|
+
Returns
|
|
278
|
+
-------
|
|
279
|
+
ds_out : xarray.Dataset
|
|
280
|
+
Output data set regridded according to the grid specs of ds_in.
|
|
281
|
+
|
|
282
|
+
Raises
|
|
283
|
+
------
|
|
284
|
+
TypeError
|
|
285
|
+
If input datasets are not xarray.Dataset objects.
|
|
286
|
+
ValueError
|
|
287
|
+
If regrid_method is not valid.
|
|
288
|
+
ImportError
|
|
289
|
+
If xesmf package is not available.
|
|
290
|
+
RuntimeError
|
|
291
|
+
If regridding operation fails.
|
|
292
|
+
"""
|
|
293
|
+
# Parameter validation
|
|
294
|
+
try:
|
|
295
|
+
import xarray as xr
|
|
296
|
+
if not isinstance(ds_in, xr.Dataset):
|
|
297
|
+
raise TypeError("ds_in must be an xarray.Dataset")
|
|
298
|
+
|
|
299
|
+
if not isinstance(ds_image, xr.Dataset):
|
|
300
|
+
raise TypeError("ds_image must be an xarray.Dataset")
|
|
301
|
+
except ImportError:
|
|
302
|
+
raise ImportError("xarray package is required but not available")
|
|
303
|
+
|
|
304
|
+
if not isinstance(regrid_method, str):
|
|
305
|
+
raise TypeError("regrid_method must be a string")
|
|
306
|
+
|
|
307
|
+
if regrid_method not in REGRID_METHOD_LIST:
|
|
308
|
+
raise ValueError("Invalid regridding method.\n"
|
|
309
|
+
f"Choose one from {REGRID_METHOD_LIST}.")
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
import xesmf as xe
|
|
313
|
+
except ImportError:
|
|
314
|
+
raise ImportError("xesmf package is required for regridding but not available")
|
|
315
|
+
|
|
316
|
+
try:
|
|
317
|
+
regridder = xe.Regridder(ds_in, ds_image, regrid_method)
|
|
318
|
+
ds_out = regridder(ds_in)
|
|
319
|
+
return ds_out
|
|
320
|
+
except Exception as e:
|
|
321
|
+
raise RuntimeError(f"Regridding operation failed: {e}")
|
|
322
|
+
|
|
323
|
+
#--------------------------#
|
|
324
|
+
# Parameters and constants #
|
|
325
|
+
#--------------------------#
|
|
326
|
+
|
|
327
|
+
# Directory from where this code is being called #
|
|
328
|
+
CODE_CALL_DIR = os.getcwd()
|
|
329
|
+
|
|
330
|
+
# File extensions #
|
|
331
|
+
EXTENSIONS = CLIMATE_FILE_EXTENSIONS[::3]
|
|
332
|
+
|
|
333
|
+
# Main file names #
|
|
334
|
+
COORD_INFO_FNAME = "latlon_bounds.txt"
|
|
335
|
+
DATE_RANGE_INFO_FNAME = "period_bounds.txt"
|
|
336
|
+
TIME_FORMATS_FILE_NAME = "time_formats.txt"
|
|
337
|
+
|
|
338
|
+
# Regridding method options #
|
|
339
|
+
REGRID_METHOD_LIST = [
|
|
340
|
+
"bilinear",
|
|
341
|
+
"conservative",
|
|
342
|
+
"conservative_normed",
|
|
343
|
+
"nearest_s2d",
|
|
344
|
+
"nearest_d2s",
|
|
345
|
+
"patch"
|
|
346
|
+
]
|
|
347
|
+
|
|
348
|
+
# Template strings #
|
|
349
|
+
#------------------#
|
|
350
|
+
|
|
351
|
+
# Main parameter scanning info strings #
|
|
352
|
+
LATLON_INFO_TEMPLATE = \
|
|
353
|
+
"""=========================================================
|
|
354
|
+
·File: {}
|
|
355
|
+
|
|
356
|
+
·Latitudes:
|
|
357
|
+
{}
|
|
358
|
+
|
|
359
|
+
·Longitudes:
|
|
360
|
+
{}
|
|
361
|
+
|
|
362
|
+
-Latitude-longitude array dimensions = {} x {}
|
|
363
|
+
-Latitude-longitude array delta = ({}, {})
|
|
364
|
+
|
|
365
|
+
"""
|
|
366
|
+
|
|
367
|
+
PERIOD_INFO_TEMPLATE = \
|
|
368
|
+
"""=========================================================
|
|
369
|
+
·File: {}
|
|
370
|
+
·Time range: {} -- {}
|
|
371
|
+
-Range length = {}
|
|
372
|
+
|
|
373
|
+
"""
|
|
374
|
+
|
|
375
|
+
TIME_FORMAT_INFO_TEMPLATE = \
|
|
376
|
+
"""=========================================================
|
|
377
|
+
·File: {}
|
|
378
|
+
|
|
379
|
+
·Time array:
|
|
380
|
+
{}
|
|
381
|
+
|
|
382
|
+
-Array length = {}
|
|
383
|
+
"""
|
|
384
|
+
|
|
385
|
+
# File scanning progress information strings #
|
|
386
|
+
DIR_INFO_TEMPLATE = """\nDirectory: {}"""
|