oafuncs 0.0.97.13__py3-none-any.whl → 0.0.97.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/_script/cprogressbar.py +310 -75
- oafuncs/{oa_tool → _script}/email.py +26 -24
- oafuncs/_script/netcdf_merge.py +83 -330
- oafuncs/_script/netcdf_modify.py +106 -0
- oafuncs/_script/netcdf_write.py +125 -0
- oafuncs/{oa_tool → _script}/parallel.py +5 -3
- oafuncs/oa_cmap.py +59 -112
- oafuncs/oa_data.py +3 -24
- oafuncs/oa_date.py +47 -11
- oafuncs/oa_down/hycom_3hourly.py +4 -53
- oafuncs/oa_draw.py +11 -132
- oafuncs/oa_file.py +1 -23
- oafuncs/oa_nc.py +53 -281
- oafuncs/oa_python.py +77 -87
- oafuncs/oa_sign/meteorological.py +3 -3
- oafuncs/oa_tool.py +83 -0
- {oafuncs-0.0.97.13.dist-info → oafuncs-0.0.97.15.dist-info}/METADATA +1 -1
- {oafuncs-0.0.97.13.dist-info → oafuncs-0.0.97.15.dist-info}/RECORD +21 -20
- oafuncs/_script/auto_optimized_parallel_executor.py +0 -459
- oafuncs/oa_tool/__init__.py +0 -7
- {oafuncs-0.0.97.13.dist-info → oafuncs-0.0.97.15.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.97.13.dist-info → oafuncs-0.0.97.15.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.97.13.dist-info → oafuncs-0.0.97.15.dist-info}/top_level.txt +0 -0
oafuncs/_script/netcdf_merge.py
CHANGED
@@ -1,354 +1,107 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding=utf-8
|
3
|
-
"""
|
4
|
-
Author: Liu Kun && 16031215@qq.com
|
5
|
-
Date: 2025-03-30 11:16:29
|
6
|
-
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2025-03-30 11:16:31
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\_script\\netcdf_merge.py
|
9
|
-
Description:
|
10
|
-
EditPlatform: vscode
|
11
|
-
ComputerInfo: XPS 15 9510
|
12
|
-
SystemInfo: Windows 11
|
13
|
-
Python Version: 3.12
|
14
|
-
"""
|
15
|
-
|
16
|
-
import logging
|
17
1
|
import os
|
18
|
-
from typing import
|
19
|
-
|
20
|
-
import numpy as np
|
21
|
-
import xarray as xr
|
2
|
+
from typing import List, Optional, Union
|
22
3
|
from dask.diagnostics import ProgressBar
|
4
|
+
import xarray as xr
|
5
|
+
from oafuncs import pbar
|
23
6
|
|
24
|
-
|
25
|
-
logging.basicConfig(level=logging.INFO)
|
26
|
-
logger = logging.getLogger(__name__)
|
27
|
-
|
28
|
-
|
29
|
-
def merge(file_list: Union[str, List[str]], var_name: Union[str, List[str], None] = None, dim_name: str = "time", target_filename: str = "merged.nc", chunk_config: Dict = {"time": 1000}, compression: Union[bool, Dict] = True, sanity_check: bool = True, overwrite: bool = True, parallel: bool = True) -> None:
|
7
|
+
def merge_nc(file_list: Union[str, List[str]], var_name: Optional[Union[str, List[str]]] = None, dim_name: Optional[str] = None, target_filename: Optional[str] = None) -> None:
|
30
8
|
"""
|
31
|
-
|
9
|
+
Description:
|
10
|
+
Merge variables from multiple NetCDF files along a specified dimension and write to a new file.
|
11
|
+
If var_name is a string, it is considered a single variable; if it is a list and has only one element, it is also a single variable;
|
12
|
+
If the list has more than one element, it is a multi-variable; if var_name is None, all variables are merged.
|
32
13
|
|
33
14
|
Parameters:
|
34
|
-
file_list: List of file paths or single file path
|
35
|
-
var_name:
|
36
|
-
dim_name: Dimension
|
37
|
-
target_filename:
|
38
|
-
chunk_config: Dask chunking configuration, e.g. {"time": 1000}
|
39
|
-
compression: Compression configuration (True enables default compression, or custom encoding dictionary)
|
40
|
-
sanity_check: Whether to perform data integrity validation
|
41
|
-
overwrite: Whether to overwrite existing files
|
42
|
-
parallel: Whether to enable parallel processing
|
15
|
+
file_list: List of NetCDF file paths or a single file path as a string
|
16
|
+
var_name: Name of the variable to be extracted or a list of variable names, default is None, which means all variables are extracted
|
17
|
+
dim_name: Dimension name used for merging
|
18
|
+
target_filename: Target file name after merging
|
43
19
|
|
44
20
|
Example:
|
45
|
-
merge(
|
46
|
-
|
47
|
-
|
48
|
-
chunk_config={"time": 500})
|
21
|
+
merge(file_list, var_name='u', dim_name='time', target_filename='merged.nc')
|
22
|
+
merge(file_list, var_name=['u', 'v'], dim_name='time', target_filename='merged.nc')
|
23
|
+
merge(file_list, var_name=None, dim_name='time', target_filename='merged.nc')
|
49
24
|
"""
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
static_vars = _identify_static_vars(file_list[0], var_names, dim_name)
|
54
|
-
|
55
|
-
# Estimate required memory for processing
|
56
|
-
_estimate_memory_usage(file_list, var_names, chunk_config)
|
57
|
-
|
58
|
-
# ------------------------ Data validation phase ------------------------#
|
59
|
-
if sanity_check:
|
60
|
-
_perform_sanity_checks(file_list, var_names, dim_name, static_vars)
|
61
|
-
|
62
|
-
# ------------------------ Core merging logic ------------------------#
|
63
|
-
with xr.set_options(keep_attrs=True): # Preserve metadata attributes
|
64
|
-
# Merge dynamic variables
|
65
|
-
merged_ds = xr.open_mfdataset(
|
66
|
-
file_list,
|
67
|
-
combine="nested",
|
68
|
-
concat_dim=dim_name,
|
69
|
-
data_vars=[var for var in var_names if var not in static_vars],
|
70
|
-
chunks=chunk_config,
|
71
|
-
parallel=parallel,
|
72
|
-
preprocess=lambda ds: ds[var_names], # Only load target variables
|
73
|
-
)
|
74
|
-
|
75
|
-
# Process static variables
|
76
|
-
if static_vars:
|
77
|
-
with xr.open_dataset(file_list[0], chunks=chunk_config) as ref_ds:
|
78
|
-
merged_ds = merged_ds.assign({var: ref_ds[var] for var in static_vars})
|
79
|
-
|
80
|
-
# ------------------------ Time dimension processing ------------------------#
|
81
|
-
if dim_name == "time":
|
82
|
-
merged_ds = _process_time_dimension(merged_ds)
|
83
|
-
|
84
|
-
# ------------------------ File output ------------------------#
|
85
|
-
encoding = _generate_encoding_config(merged_ds, compression)
|
86
|
-
_write_to_netcdf(merged_ds, target_filename, encoding)
|
87
|
-
|
88
|
-
|
89
|
-
# ------------------------ Helper functions ------------------------#
|
90
|
-
def _validate_and_preprocess_inputs(file_list: Union[str, List[str]], target_filename: str, overwrite: bool) -> List[str]:
|
91
|
-
"""Input parameter validation and preprocessing"""
|
92
|
-
if not file_list:
|
93
|
-
raise ValueError("File list cannot be empty")
|
25
|
+
|
26
|
+
if target_filename is None:
|
27
|
+
target_filename = "merged.nc"
|
94
28
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
29
|
+
# 确保目标路径存在
|
30
|
+
target_dir = os.path.dirname(target_filename)
|
31
|
+
if target_dir and not os.path.exists(target_dir):
|
32
|
+
os.makedirs(target_dir)
|
99
33
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
if os.path.exists(target_filename):
|
104
|
-
if overwrite:
|
105
|
-
logger.warning(f"Overwriting existing file: {target_filename}")
|
106
|
-
os.remove(target_filename)
|
107
|
-
else:
|
108
|
-
raise FileExistsError(f"Target file already exists: {target_filename}")
|
109
|
-
|
110
|
-
return file_list
|
111
|
-
|
112
|
-
|
113
|
-
def _determine_variables(file_list: List[str], var_name: Union[str, List[str], None]) -> tuple:
|
114
|
-
"""Determine the list of variables to process"""
|
115
|
-
with xr.open_dataset(file_list[0]) as ds:
|
116
|
-
all_vars = list(ds.data_vars.keys())
|
34
|
+
if isinstance(file_list, str):
|
35
|
+
file_list = [file_list]
|
117
36
|
|
37
|
+
# 初始化变量名列表
|
118
38
|
if var_name is None:
|
119
|
-
|
39
|
+
with xr.open_dataset(file_list[0]) as ds:
|
40
|
+
var_names = list(ds.variables.keys())
|
120
41
|
elif isinstance(var_name, str):
|
121
|
-
|
122
|
-
raise ValueError(f"Invalid variable name: {var_name}")
|
123
|
-
return all_vars, [var_name]
|
42
|
+
var_names = [var_name]
|
124
43
|
elif isinstance(var_name, list):
|
125
|
-
|
126
|
-
logger.warning("Empty variable list provided, will use all variables")
|
127
|
-
return all_vars, all_vars
|
128
|
-
invalid_vars = set(var_name) - set(all_vars)
|
129
|
-
if invalid_vars:
|
130
|
-
raise ValueError(f"Invalid variable names: {invalid_vars}")
|
131
|
-
return all_vars, var_name
|
44
|
+
var_names = var_name
|
132
45
|
else:
|
133
|
-
raise
|
134
|
-
|
135
|
-
|
136
|
-
def _identify_static_vars(sample_file: str, var_names: List[str], dim_name: str) -> List[str]:
|
137
|
-
"""Identify static variables"""
|
138
|
-
with xr.open_dataset(sample_file) as ds:
|
139
|
-
return [var for var in var_names if dim_name not in ds[var].dims]
|
46
|
+
raise ValueError("var_name must be a string, a list of strings, or None")
|
140
47
|
|
48
|
+
# 初始化合并数据字典
|
49
|
+
merged_data = {}
|
141
50
|
|
142
|
-
|
143
|
-
|
144
|
-
logger.info("Performing data integrity validation...")
|
145
|
-
|
146
|
-
# Check consistency of static variables
|
147
|
-
with xr.open_dataset(file_list[0]) as ref_ds:
|
148
|
-
for var in static_vars:
|
149
|
-
ref = ref_ds[var]
|
150
|
-
for f in file_list[1:]:
|
151
|
-
with xr.open_dataset(f) as ds:
|
152
|
-
if not ref.equals(ds[var]):
|
153
|
-
raise ValueError(f"Static variable {var} inconsistent\nReference file: {file_list[0]}\nProblem file: {f}")
|
154
|
-
|
155
|
-
# Check dimensions of dynamic variables
|
156
|
-
dim_sizes = {}
|
157
|
-
for f in file_list:
|
158
|
-
with xr.open_dataset(f) as ds:
|
51
|
+
for i, file in pbar(enumerate(file_list), description="Reading files", color="#f8bbd0", total=len(file_list)):
|
52
|
+
with xr.open_dataset(file) as ds:
|
159
53
|
for var in var_names:
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
return encoding
|
205
|
-
|
206
|
-
def _calculate_file_size(filepath: str) -> str:
|
207
|
-
"""Calculate file size with adaptive unit conversion"""
|
208
|
-
if os.path.exists(filepath):
|
209
|
-
size_in_bytes = os.path.getsize(filepath)
|
210
|
-
if size_in_bytes < 1e3:
|
211
|
-
return f"{size_in_bytes:.2f} B"
|
212
|
-
elif size_in_bytes < 1e6:
|
213
|
-
return f"{size_in_bytes / 1e3:.2f} KB"
|
214
|
-
elif size_in_bytes < 1e9:
|
215
|
-
return f"{size_in_bytes / 1e6:.2f} MB"
|
216
|
-
else:
|
217
|
-
return f"{size_in_bytes / 1e9:.2f} GB"
|
218
|
-
else:
|
219
|
-
raise FileNotFoundError(f"File not found: {filepath}")
|
220
|
-
|
221
|
-
def _write_to_netcdf(ds: xr.Dataset, filename: str, encoding: Dict) -> None:
|
222
|
-
"""Improved safe writing to NetCDF file"""
|
223
|
-
logger.info("Starting file write...")
|
224
|
-
unlimited_dims = [dim for dim in ds.dims if ds[dim].encoding.get("unlimited", False)]
|
225
|
-
|
226
|
-
delayed = ds.to_netcdf(filename, encoding=encoding, compute=False, unlimited_dims=unlimited_dims)
|
227
|
-
|
228
|
-
try:
|
54
|
+
data_var = ds[var]
|
55
|
+
if dim_name in data_var.dims:
|
56
|
+
merged_data.setdefault(var, []).append(data_var)
|
57
|
+
elif var not in merged_data:
|
58
|
+
merged_data[var] = data_var.fillna(0) # 用0填充NaN值
|
59
|
+
|
60
|
+
for var in pbar(merged_data, description="Merging variables", color="#9b45d1"):
|
61
|
+
if isinstance(merged_data[var], list):
|
62
|
+
merged_data[var] = xr.concat(merged_data[var], dim=dim_name).fillna(0)
|
63
|
+
# print(f"Variable '{var}' merged: min={merged_data[var].min().values:.3f}, max={merged_data[var].max().values:.3f}, mean={merged_data[var].mean().values:.3f}")
|
64
|
+
|
65
|
+
# 修改写入数据部分,支持压缩并设置基数和比例因子
|
66
|
+
# print("\nWriting data to file ...")
|
67
|
+
if os.path.exists(target_filename):
|
68
|
+
print("Warning: The target file already exists. Removing it ...")
|
69
|
+
os.remove(target_filename)
|
70
|
+
|
71
|
+
with xr.Dataset(merged_data) as merged_dataset:
|
72
|
+
encoding = {}
|
73
|
+
for var in merged_dataset.data_vars:
|
74
|
+
data = merged_dataset[var].values
|
75
|
+
# print(f"Variable '{var}' ready for writing: min={data.min():.3f}, max={data.max():.3f}, mean={data.mean():.3f}")
|
76
|
+
if data.dtype.kind in {"i", "u", "f"}: # 仅对数值型数据进行压缩
|
77
|
+
data_range = data.max() - data.min()
|
78
|
+
if data_range > 0: # 避免范围过小导致的精度问题
|
79
|
+
scale_factor = data_range / (2**16 - 1)
|
80
|
+
add_offset = data.min()
|
81
|
+
encoding[var] = {
|
82
|
+
"zlib": True,
|
83
|
+
"complevel": 4,
|
84
|
+
"dtype": "int16",
|
85
|
+
"scale_factor": scale_factor,
|
86
|
+
"add_offset": add_offset,
|
87
|
+
"_FillValue": -32767,
|
88
|
+
}
|
89
|
+
else:
|
90
|
+
encoding[var] = {"zlib": True, "complevel": 4} # 范围过小时禁用缩放
|
91
|
+
else:
|
92
|
+
encoding[var] = {"zlib": True, "complevel": 4} # 非数值型数据不使用缩放
|
93
|
+
|
94
|
+
# 确保写入时不会因编码问题导致数据丢失
|
95
|
+
# merged_dataset.to_netcdf(target_filename, encoding=encoding)
|
96
|
+
delayed_write = merged_dataset.to_netcdf(target_filename, encoding=encoding, compute=False)
|
229
97
|
with ProgressBar():
|
230
|
-
|
231
|
-
|
232
|
-
logger.info(f"Merge completed → {filename}")
|
233
|
-
# logger.info(f"File size: {os.path.getsize(filename) / 1e9:.2f}GB")
|
234
|
-
logger.info(f"File size: {_calculate_file_size(filename)}")
|
235
|
-
except MemoryError as e:
|
236
|
-
_handle_write_error(filename, "Insufficient memory to complete file write. Try adjusting chunk_config parameter to reduce memory usage", e)
|
237
|
-
except Exception as e:
|
238
|
-
_handle_write_error(filename, f"Failed to write file: {str(e)}", e)
|
239
|
-
|
240
|
-
|
241
|
-
def _handle_write_error(filename: str, message: str, exception: Exception) -> None:
|
242
|
-
"""Unified handling of file write exceptions"""
|
243
|
-
logger.error(message)
|
244
|
-
if os.path.exists(filename):
|
245
|
-
os.remove(filename)
|
246
|
-
raise exception
|
98
|
+
delayed_write.compute()
|
247
99
|
|
248
|
-
|
249
|
-
def _estimate_memory_usage(file_list: List[str], var_names: List[str], chunk_config: Dict) -> None:
|
250
|
-
"""Improved memory usage estimation"""
|
251
|
-
try:
|
252
|
-
total_size = 0
|
253
|
-
sample_file = file_list[0]
|
254
|
-
with xr.open_dataset(sample_file) as ds:
|
255
|
-
for var in var_names:
|
256
|
-
if var in ds:
|
257
|
-
# Consider variable dimension sizes
|
258
|
-
var_size = np.prod([ds[var].sizes[dim] for dim in ds[var].dims]) * ds[var].dtype.itemsize
|
259
|
-
total_size += var_size * len(file_list)
|
260
|
-
|
261
|
-
# Estimate memory usage during Dask processing (typically 2-3x original data)
|
262
|
-
estimated_memory = total_size * 3
|
263
|
-
|
264
|
-
if estimated_memory > 8e9:
|
265
|
-
logger.warning(f"Estimated memory usage may be high (approx. {estimated_memory / 1e9:.1f}GB). If memory issues occur, adjust chunk_config parameter: {chunk_config}")
|
266
|
-
except Exception as e:
|
267
|
-
logger.debug(f"Memory estimation failed: {str(e)}")
|
100
|
+
print(f'\nFile "{target_filename}" has been successfully created.')
|
268
101
|
|
269
102
|
|
103
|
+
# Example usage
|
270
104
|
if __name__ == "__main__":
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
# 示例1: 基础用法 - 合并全部变量
|
275
|
-
print("\n" + "=" * 40)
|
276
|
-
print("示例1: 合并所有变量(默认配置)")
|
277
|
-
merge(file_list=sample_files, target_filename="merged_all_vars.nc")
|
278
|
-
|
279
|
-
# 示例2: 合并指定变量
|
280
|
-
print("\n" + "=" * 40)
|
281
|
-
print("示例2: 合并指定变量(温度、盐度)")
|
282
|
-
merge(
|
283
|
-
file_list=sample_files,
|
284
|
-
var_name=["temperature", "salinity"],
|
285
|
-
target_filename="merged_selected_vars.nc",
|
286
|
-
chunk_config={"time": 500}, # 更保守的内存分配
|
287
|
-
)
|
288
|
-
|
289
|
-
# 示例3: 自定义压缩配置
|
290
|
-
print("\n" + "=" * 40)
|
291
|
-
print("示例3: 自定义压缩参数")
|
292
|
-
merge(file_list=sample_files, var_name="chlorophyll", compression={"chlorophyll": {"zlib": True, "complevel": 5, "dtype": "float32"}}, target_filename="merged_compressed.nc")
|
293
|
-
|
294
|
-
# 示例4: 处理大型数据集
|
295
|
-
print("\n" + "=" * 40)
|
296
|
-
print("示例4: 大文件分块策略")
|
297
|
-
merge(file_list=sample_files, chunk_config={"time": 2000, "lat": 100, "lon": 100}, target_filename="merged_large_dataset.nc", parallel=True)
|
298
|
-
|
299
|
-
# 示例5: 时间维度特殊处理
|
300
|
-
print("\n" + "=" * 40)
|
301
|
-
print("示例5: 时间维度排序去重")
|
302
|
-
merge(
|
303
|
-
file_list=sample_files,
|
304
|
-
dim_name="time",
|
305
|
-
target_filename="merged_time_processed.nc",
|
306
|
-
sanity_check=True, # 强制数据校验
|
307
|
-
)
|
308
|
-
|
309
|
-
# 示例6: 覆盖已存在文件
|
310
|
-
print("\n" + "=" * 40)
|
311
|
-
print("示例6: 强制覆盖现有文件")
|
312
|
-
try:
|
313
|
-
merge(
|
314
|
-
file_list=sample_files,
|
315
|
-
target_filename="merged_all_vars.nc", # 与示例1相同文件名
|
316
|
-
overwrite=True, # 显式启用覆盖
|
317
|
-
)
|
318
|
-
except FileExistsError as e:
|
319
|
-
print(f"捕获预期外异常: {str(e)}")
|
320
|
-
|
321
|
-
# 示例7: 禁用并行处理
|
322
|
-
print("\n" + "=" * 40)
|
323
|
-
print("示例7: 单线程模式运行")
|
324
|
-
merge(file_list=sample_files, target_filename="merged_single_thread.nc", parallel=False)
|
325
|
-
|
326
|
-
# 示例8: 处理特殊维度
|
327
|
-
print("\n" + "=" * 40)
|
328
|
-
print("示例8: 按深度维度合并")
|
329
|
-
merge(file_list=sample_files, dim_name="depth", var_name=["density", "oxygen"], target_filename="merged_by_depth.nc")
|
330
|
-
|
331
|
-
# 示例9: 混合变量类型处理
|
332
|
-
print("\n" + "=" * 40)
|
333
|
-
print("示例9: 混合静态/动态变量")
|
334
|
-
merge(
|
335
|
-
file_list=sample_files,
|
336
|
-
var_name=["bathymetry", "temperature"], # bathymetry为静态变量
|
337
|
-
target_filename="merged_mixed_vars.nc",
|
338
|
-
sanity_check=True, # 验证静态变量一致性
|
339
|
-
)
|
340
|
-
|
341
|
-
# 示例10: 完整配置演示
|
342
|
-
print("\n" + "=" * 40)
|
343
|
-
print("示例10: 全参数配置演示")
|
344
|
-
merge(
|
345
|
-
file_list=sample_files,
|
346
|
-
var_name=None, # 所有变量
|
347
|
-
dim_name="time",
|
348
|
-
target_filename="merged_full_config.nc",
|
349
|
-
chunk_config={"time": 1000, "lat": 500, "lon": 500},
|
350
|
-
compression={"temperature": {"complevel": 4}, "salinity": {"zlib": False}},
|
351
|
-
sanity_check=True,
|
352
|
-
overwrite=True,
|
353
|
-
parallel=True,
|
354
|
-
)
|
105
|
+
files_to_merge = ["file1.nc", "file2.nc", "file3.nc"]
|
106
|
+
output_path = "merged_output.nc"
|
107
|
+
merge_nc(files_to_merge, var_name=None, dim_name="time", target_filename=output_path)
|
@@ -0,0 +1,106 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# coding=utf-8
|
3
|
+
"""
|
4
|
+
Author: Liu Kun && 16031215@qq.com
|
5
|
+
Date: 2025-04-05 14:00:50
|
6
|
+
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
+
LastEditTime: 2025-04-05 14:00:50
|
8
|
+
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\_script\\netcdf_modify.py
|
9
|
+
Description:
|
10
|
+
EditPlatform: vscode
|
11
|
+
ComputerInfo: XPS 15 9510
|
12
|
+
SystemInfo: Windows 11
|
13
|
+
Python Version: 3.12
|
14
|
+
"""
|
15
|
+
|
16
|
+
import os
|
17
|
+
|
18
|
+
import netCDF4 as nc
|
19
|
+
import numpy as np
|
20
|
+
from rich import print
|
21
|
+
|
22
|
+
|
23
|
+
def _is_valid_netcdf_file(file_path):
|
24
|
+
"""
|
25
|
+
Check if the file is a valid NetCDF file.
|
26
|
+
"""
|
27
|
+
try:
|
28
|
+
with nc.Dataset(file_path, "r") as _:
|
29
|
+
pass
|
30
|
+
return True
|
31
|
+
except Exception:
|
32
|
+
return False
|
33
|
+
|
34
|
+
|
35
|
+
def _modify_var(nc_file_path, variable_name, new_value):
|
36
|
+
"""
|
37
|
+
Modify the value of a variable in a NetCDF file.
|
38
|
+
"""
|
39
|
+
if not os.path.exists(nc_file_path):
|
40
|
+
raise FileNotFoundError(f"NetCDF file '{nc_file_path}' does not exist.")
|
41
|
+
if not _is_valid_netcdf_file(nc_file_path):
|
42
|
+
raise ValueError(f"File '{nc_file_path}' is not a valid NetCDF file.")
|
43
|
+
if not variable_name:
|
44
|
+
raise ValueError("Variable name cannot be empty or None.")
|
45
|
+
if not isinstance(new_value, np.ndarray):
|
46
|
+
raise TypeError("New value must be a numpy.ndarray.")
|
47
|
+
|
48
|
+
try:
|
49
|
+
with nc.Dataset(nc_file_path, "r+") as dataset:
|
50
|
+
if variable_name not in dataset.variables:
|
51
|
+
raise ValueError(f"Variable '{variable_name}' not found in the NetCDF file.")
|
52
|
+
variable = dataset.variables[variable_name]
|
53
|
+
if variable.shape != new_value.shape:
|
54
|
+
raise ValueError(f"Shape mismatch: Variable '{variable_name}' has shape {variable.shape}, but new value has shape {new_value.shape}.")
|
55
|
+
variable[:] = new_value
|
56
|
+
print(f"[green]Successfully modified variable '{variable_name}' in '{nc_file_path}'.[/green]")
|
57
|
+
return True
|
58
|
+
except (FileNotFoundError, ValueError, TypeError) as e:
|
59
|
+
print(f"[red]Error:[/red] {e}")
|
60
|
+
return False
|
61
|
+
except Exception as e:
|
62
|
+
print(f"[red]Unexpected Error:[/red] Failed to modify variable '{variable_name}' in '{nc_file_path}'. [bold]Details:[/bold] {e}")
|
63
|
+
return False
|
64
|
+
|
65
|
+
|
66
|
+
def _modify_attr(nc_file_path, variable_name, attribute_name, attribute_value):
|
67
|
+
"""
|
68
|
+
Add or modify an attribute of a variable in a NetCDF file.
|
69
|
+
"""
|
70
|
+
if not os.path.exists(nc_file_path):
|
71
|
+
raise FileNotFoundError(f"NetCDF file '{nc_file_path}' does not exist.")
|
72
|
+
if not _is_valid_netcdf_file(nc_file_path):
|
73
|
+
raise ValueError(f"File '{nc_file_path}' is not a valid NetCDF file.")
|
74
|
+
if not variable_name:
|
75
|
+
raise ValueError("Variable name cannot be empty or None.")
|
76
|
+
if not attribute_name:
|
77
|
+
raise ValueError("Attribute name cannot be empty or None.")
|
78
|
+
|
79
|
+
try:
|
80
|
+
with nc.Dataset(nc_file_path, "r+") as ds:
|
81
|
+
if variable_name not in ds.variables:
|
82
|
+
raise ValueError(f"Variable '{variable_name}' not found in the NetCDF file.")
|
83
|
+
variable = ds.variables[variable_name]
|
84
|
+
variable.setncattr(attribute_name, attribute_value)
|
85
|
+
print(f"[green]Successfully modified attribute '{attribute_name}' of variable '{variable_name}' in '{nc_file_path}'.[/green]")
|
86
|
+
return True
|
87
|
+
except (FileNotFoundError, ValueError) as e:
|
88
|
+
print(f"[red]Error:[/red] {e}")
|
89
|
+
return False
|
90
|
+
except Exception as e:
|
91
|
+
print(f"[red]Unexpected Error:[/red] Failed to modify attribute '{attribute_name}' of variable '{variable_name}' in file '{nc_file_path}'. [bold]Details:[/bold] {e}")
|
92
|
+
return False
|
93
|
+
|
94
|
+
|
95
|
+
def modify_nc(nc_file, var_name, attr_name=None, new_value=None):
|
96
|
+
"""
|
97
|
+
Modify the value of a variable or the value of an attribute in a NetCDF file.
|
98
|
+
"""
|
99
|
+
try:
|
100
|
+
if attr_name is None:
|
101
|
+
return _modify_var(nc_file, var_name, new_value)
|
102
|
+
else:
|
103
|
+
return _modify_attr(nc_file, var_name, attr_name, new_value)
|
104
|
+
except Exception as e:
|
105
|
+
print(f"[red]Error:[/red] An error occurred while modifying '{var_name}' in '{nc_file}'. [bold]Details:[/bold] {e}")
|
106
|
+
return False
|
@@ -0,0 +1,125 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import os
|
3
|
+
import netCDF4 as nc
|
4
|
+
import xarray as xr
|
5
|
+
|
6
|
+
|
7
|
+
def _numpy_to_nc_type(numpy_type):
|
8
|
+
"""将NumPy数据类型映射到NetCDF数据类型"""
|
9
|
+
numpy_to_nc = {
|
10
|
+
"float32": "f4",
|
11
|
+
"float64": "f8",
|
12
|
+
"int8": "i1",
|
13
|
+
"int16": "i2",
|
14
|
+
"int32": "i4",
|
15
|
+
"int64": "i8",
|
16
|
+
"uint8": "u1",
|
17
|
+
"uint16": "u2",
|
18
|
+
"uint32": "u4",
|
19
|
+
"uint64": "u8",
|
20
|
+
}
|
21
|
+
# 确保传入的是字符串类型,如果不是,则转换为字符串
|
22
|
+
numpy_type_str = str(numpy_type) if not isinstance(numpy_type, str) else numpy_type
|
23
|
+
return numpy_to_nc.get(numpy_type_str, "f4") # 默认使用 'float32'
|
24
|
+
|
25
|
+
|
26
|
+
def _calculate_scale_and_offset(data, n=16):
|
27
|
+
if not isinstance(data, np.ndarray):
|
28
|
+
raise ValueError("Input data must be a NumPy array.")
|
29
|
+
|
30
|
+
# 使用 nan_to_num 来避免 NaN 值对 min 和 max 的影响
|
31
|
+
data_min = np.nanmin(data)
|
32
|
+
data_max = np.nanmax(data)
|
33
|
+
|
34
|
+
if np.isnan(data_min) or np.isnan(data_max):
|
35
|
+
raise ValueError("Input data contains NaN values, which are not allowed.")
|
36
|
+
|
37
|
+
scale_factor = (data_max - data_min) / (2**n - 1)
|
38
|
+
add_offset = data_min + 2 ** (n - 1) * scale_factor
|
39
|
+
|
40
|
+
return scale_factor, add_offset
|
41
|
+
|
42
|
+
|
43
|
+
def save_to_nc(file, data, varname=None, coords=None, mode="w", scale_offset_switch=True, compile_switch=True):
|
44
|
+
"""
|
45
|
+
Description:
|
46
|
+
Write data to NetCDF file
|
47
|
+
Parameters:
|
48
|
+
file: str, file path
|
49
|
+
data: data
|
50
|
+
varname: str, variable name
|
51
|
+
coords: dict, coordinates, key is the dimension name, value is the coordinate data
|
52
|
+
mode: str, write mode, 'w' for write, 'a' for append
|
53
|
+
scale_offset_switch: bool, whether to use scale_factor and add_offset, default is True
|
54
|
+
compile_switch: bool, whether to use compression parameters, default is True
|
55
|
+
Example:
|
56
|
+
save(r'test.nc', data, 'u', {'time': np.linspace(0, 120, 100), 'lev': np.linspace(0, 120, 50)}, 'a')
|
57
|
+
"""
|
58
|
+
# 设置压缩参数
|
59
|
+
kwargs = {"zlib": True, "complevel": 4} if compile_switch else {}
|
60
|
+
|
61
|
+
# 检查文件存在性并根据模式决定操作
|
62
|
+
if mode == "w" and os.path.exists(file):
|
63
|
+
os.remove(file)
|
64
|
+
elif mode == "a" and not os.path.exists(file):
|
65
|
+
mode = "w"
|
66
|
+
|
67
|
+
# 打开 NetCDF 文件
|
68
|
+
with nc.Dataset(file, mode, format="NETCDF4") as ncfile:
|
69
|
+
# 如果 data 是 DataArray 并且没有提供 varname 和 coords
|
70
|
+
if varname is None and coords is None and isinstance(data, xr.DataArray):
|
71
|
+
encoding = {}
|
72
|
+
for var in data.data_vars:
|
73
|
+
scale_factor, add_offset = _calculate_scale_and_offset(data[var].values)
|
74
|
+
encoding[var] = {
|
75
|
+
"zlib": True,
|
76
|
+
"complevel": 4,
|
77
|
+
"dtype": "int16",
|
78
|
+
"scale_factor": scale_factor,
|
79
|
+
"add_offset": add_offset,
|
80
|
+
"_FillValue": -32767,
|
81
|
+
}
|
82
|
+
data.to_netcdf(file, mode=mode, encoding=encoding)
|
83
|
+
return
|
84
|
+
|
85
|
+
# 添加坐标
|
86
|
+
for dim, coord_data in coords.items():
|
87
|
+
if dim in ncfile.dimensions:
|
88
|
+
if len(coord_data) != len(ncfile.dimensions[dim]):
|
89
|
+
raise ValueError(f"Length of coordinate '{dim}' does not match the dimension length.")
|
90
|
+
else:
|
91
|
+
ncfile.variables[dim][:] = np.array(coord_data)
|
92
|
+
else:
|
93
|
+
ncfile.createDimension(dim, len(coord_data))
|
94
|
+
var = ncfile.createVariable(dim, _numpy_to_nc_type(coord_data.dtype), (dim,), **kwargs)
|
95
|
+
var[:] = np.array(coord_data)
|
96
|
+
|
97
|
+
# 如果坐标数据有属性,则添加到 NetCDF 变量
|
98
|
+
if isinstance(coord_data, xr.DataArray) and coord_data.attrs:
|
99
|
+
for attr_name, attr_value in coord_data.attrs.items():
|
100
|
+
var.setncattr(attr_name, attr_value)
|
101
|
+
|
102
|
+
# 添加或更新变量
|
103
|
+
if varname in ncfile.variables:
|
104
|
+
if data.shape != ncfile.variables[varname].shape:
|
105
|
+
raise ValueError(f"Shape of data does not match the variable shape for '{varname}'.")
|
106
|
+
ncfile.variables[varname][:] = np.array(data)
|
107
|
+
else:
|
108
|
+
# 创建变量
|
109
|
+
dim_names = tuple(coords.keys())
|
110
|
+
if scale_offset_switch:
|
111
|
+
scale_factor, add_offset = _calculate_scale_and_offset(np.array(data))
|
112
|
+
dtype = "i2"
|
113
|
+
var = ncfile.createVariable(varname, dtype, dim_names, fill_value=-32767, **kwargs)
|
114
|
+
var.setncattr("scale_factor", scale_factor)
|
115
|
+
var.setncattr("add_offset", add_offset)
|
116
|
+
else:
|
117
|
+
dtype = _numpy_to_nc_type(data.dtype)
|
118
|
+
var = ncfile.createVariable(varname, dtype, dim_names, **kwargs)
|
119
|
+
var[:] = np.array(data)
|
120
|
+
|
121
|
+
# 添加属性
|
122
|
+
if isinstance(data, xr.DataArray) and data.attrs:
|
123
|
+
for key, value in data.attrs.items():
|
124
|
+
if key not in ["scale_factor", "add_offset", "_FillValue", "missing_value"] or not scale_offset_switch:
|
125
|
+
var.setncattr(key, value)
|