oafuncs 0.0.98.20__py3-none-any.whl → 0.0.98.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/_script/netcdf_merge.py +60 -31
- oafuncs/_script/netcdf_write.py +293 -79
- oafuncs/oa_cmap.py +18 -9
- oafuncs/oa_down/hycom_3hourly.py +4 -20
- oafuncs/oa_draw.py +2 -2
- oafuncs/oa_nc.py +73 -6
- oafuncs/oa_tool.py +1 -1
- {oafuncs-0.0.98.20.dist-info → oafuncs-0.0.98.22.dist-info}/METADATA +1 -1
- {oafuncs-0.0.98.20.dist-info → oafuncs-0.0.98.22.dist-info}/RECORD +12 -12
- {oafuncs-0.0.98.20.dist-info → oafuncs-0.0.98.22.dist-info}/WHEEL +1 -1
- {oafuncs-0.0.98.20.dist-info → oafuncs-0.0.98.22.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.98.20.dist-info → oafuncs-0.0.98.22.dist-info}/top_level.txt +0 -0
oafuncs/_script/netcdf_merge.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
3
|
from typing import List, Optional, Union
|
4
|
-
|
4
|
+
|
5
5
|
import xarray as xr
|
6
6
|
|
7
7
|
from oafuncs import pbar
|
@@ -25,7 +25,6 @@ def merge_nc(file_list: Union[str, List[str]], var_name: Optional[Union[str, Lis
|
|
25
25
|
merge(file_list, var_name=['u', 'v'], dim_name='time', target_filename='merged.nc')
|
26
26
|
merge(file_list, var_name=None, dim_name='time', target_filename='merged.nc')
|
27
27
|
"""
|
28
|
-
from oafuncs._script.netcdf_write import save_to_nc
|
29
28
|
|
30
29
|
if target_filename is None:
|
31
30
|
target_filename = "merged.nc"
|
@@ -52,48 +51,78 @@ def merge_nc(file_list: Union[str, List[str]], var_name: Optional[Union[str, Lis
|
|
52
51
|
# 初始化合并数据字典
|
53
52
|
merged_data = {}
|
54
53
|
|
55
|
-
for i, file in pbar(enumerate(file_list),
|
54
|
+
for i, file in pbar(enumerate(file_list), "Reading files", total=len(file_list)):
|
56
55
|
with xr.open_dataset(file) as ds:
|
57
56
|
for var in var_names:
|
58
57
|
data_var = ds[var]
|
59
58
|
if dim_name in data_var.dims:
|
60
59
|
merged_data.setdefault(var, []).append(data_var)
|
61
60
|
elif var not in merged_data:
|
62
|
-
#
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
61
|
+
# 只负责合并,不做NaN填充,统一交由 netcdf_write.py 处理
|
62
|
+
merged_data[var] = data_var
|
63
|
+
|
64
|
+
# 记录变量的填充值和缺失值信息,确保不会丢失
|
65
|
+
fill_values = {}
|
66
|
+
missing_values = {}
|
67
|
+
for var_name, var_data in merged_data.items():
|
68
|
+
if isinstance(var_data, list) and var_data:
|
69
|
+
# 如果是要合并的变量,检查第一个元素的属性
|
70
|
+
attrs = var_data[0].attrs
|
71
|
+
if "_FillValue" in attrs:
|
72
|
+
fill_values[var_name] = attrs["_FillValue"]
|
73
|
+
if "missing_value" in attrs:
|
74
|
+
missing_values[var_name] = attrs["missing_value"]
|
75
|
+
else:
|
76
|
+
# 如果是单个变量,直接检查属性
|
77
|
+
attrs = var_data.attrs if hasattr(var_data, "attrs") else {}
|
78
|
+
if "_FillValue" in attrs:
|
79
|
+
fill_values[var_name] = attrs["_FillValue"]
|
80
|
+
if "missing_value" in attrs:
|
81
|
+
missing_values[var_name] = attrs["missing_value"]
|
82
|
+
|
83
|
+
for var in pbar(merged_data, "Merging variables"):
|
72
84
|
if isinstance(merged_data[var], list):
|
73
|
-
#
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
# 在构建最终数据集前,再次检查确保所有数值型变量没有NaN
|
85
|
+
# 使用 coords='minimal' 替代默认值,并移除可能冲突的 compat='override'
|
86
|
+
merged_data[var] = xr.concat(merged_data[var], dim=dim_name, coords="minimal")
|
87
|
+
# 恢复原始填充值和缺失值属性
|
88
|
+
if var in fill_values:
|
89
|
+
merged_data[var].attrs["_FillValue"] = fill_values[var]
|
90
|
+
if var in missing_values:
|
91
|
+
merged_data[var].attrs["missing_value"] = missing_values[var]
|
92
|
+
|
93
|
+
# 合并后构建 Dataset,此时 merged_data 只包含数据变量,不包含坐标变量
|
83
94
|
merged_ds = xr.Dataset(merged_data)
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
95
|
+
|
96
|
+
# 自动补充坐标变量(如 time、lat、lon 等),以第一个文件为准
|
97
|
+
with xr.open_dataset(file_list[0]) as ds0:
|
98
|
+
for coord in ds0.coords:
|
99
|
+
# 保证坐标变量不会被覆盖,且数据类型和属性保持一致
|
100
|
+
if coord not in merged_ds.coords:
|
101
|
+
merged_ds = merged_ds.assign_coords({coord: ds0[coord]})
|
102
|
+
|
103
|
+
""" # 修改合并维度验证逻辑,更合理地检查所有文件维度的兼容性
|
104
|
+
if dim_name in merged_ds.coords and len(file_list) > 1:
|
105
|
+
logging.info(f"检查合并维度 {dim_name} 的有效性...")
|
106
|
+
|
107
|
+
# 收集所有文件的该维度值
|
108
|
+
all_dim_values = []
|
109
|
+
for file in file_list:
|
110
|
+
with xr.open_dataset(file) as ds:
|
111
|
+
if dim_name in ds.coords:
|
112
|
+
all_dim_values.append(ds[dim_name].values)
|
113
|
+
|
114
|
+
# 只有当有两个或更多不同值集合时才警告
|
115
|
+
unique_values_count = len({tuple(vals.tolist()) if hasattr(vals, "tolist") else tuple(vals) for vals in all_dim_values})
|
116
|
+
if unique_values_count > 1:
|
117
|
+
logging.warning(f"检测到 {unique_values_count} 种不同的 {dim_name} 坐标值集合,合并可能导致数据重新排列")
|
118
|
+
else:
|
119
|
+
logging.info(f"所有文件的 {dim_name} 坐标值完全一致,合并将保持原始顺序") """
|
90
120
|
|
91
121
|
if os.path.exists(target_filename):
|
92
|
-
# print("Warning: The target file already exists. Removing it ...")
|
93
122
|
logging.warning("The target file already exists. Removing it ...")
|
94
123
|
os.remove(target_filename)
|
95
124
|
|
96
|
-
|
125
|
+
merged_ds.to_netcdf(target_filename, mode="w")
|
97
126
|
|
98
127
|
|
99
128
|
# Example usage
|
oafuncs/_script/netcdf_write.py
CHANGED
@@ -8,7 +8,8 @@ import xarray as xr
|
|
8
8
|
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
9
9
|
|
10
10
|
|
11
|
-
|
11
|
+
|
12
|
+
def _nan_to_fillvalue(ncfile,set_fill_value):
|
12
13
|
"""
|
13
14
|
将 NetCDF 文件中所有变量的 NaN 和掩码值替换为其 _FillValue 属性(若无则自动添加 _FillValue=-32767 并替换)。
|
14
15
|
同时处理掩码数组中的无效值。
|
@@ -27,8 +28,10 @@ def _nan_to_fillvalue(ncfile):
|
|
27
28
|
# 确定填充值
|
28
29
|
if "_FillValue" in var.ncattrs():
|
29
30
|
fill_value = var.getncattr("_FillValue")
|
31
|
+
elif hasattr(var, "missing_value"):
|
32
|
+
fill_value = var.getncattr("missing_value")
|
30
33
|
else:
|
31
|
-
fill_value =
|
34
|
+
fill_value = set_fill_value
|
32
35
|
try:
|
33
36
|
var.setncattr("_FillValue", fill_value)
|
34
37
|
except Exception:
|
@@ -39,11 +42,10 @@ def _nan_to_fillvalue(ncfile):
|
|
39
42
|
if hasattr(arr, "mask"):
|
40
43
|
# 如果是掩码数组,将掩码位置的值设为 fill_value
|
41
44
|
if np.any(arr.mask):
|
42
|
-
|
43
|
-
arr = np.ma.filled(arr, fill_value=fill_value)
|
45
|
+
arr = np.where(arr.mask, fill_value, arr.data if hasattr(arr, "data") else arr)
|
44
46
|
|
45
|
-
# 处理剩余NaN
|
46
|
-
if np.any(np.
|
47
|
+
# 处理剩余 NaN 和无穷值
|
48
|
+
if arr.dtype.kind in ["f", "i", "u"] and np.any(~np.isfinite(arr)):
|
47
49
|
arr = np.nan_to_num(arr, nan=fill_value, posinf=fill_value, neginf=fill_value)
|
48
50
|
|
49
51
|
# 写回变量
|
@@ -68,60 +70,93 @@ def _numpy_to_nc_type(numpy_type):
|
|
68
70
|
return numpy_to_nc.get(numpy_type_str, "f4")
|
69
71
|
|
70
72
|
|
71
|
-
def _calculate_scale_and_offset(data,
|
73
|
+
def _calculate_scale_and_offset(data, dtype="int32"):
|
72
74
|
"""
|
73
|
-
|
74
|
-
|
75
|
-
同时处理数据中的 NaN 值。
|
75
|
+
只对有效数据(非NaN、非填充值、非自定义缺失值)计算scale_factor和add_offset。
|
76
|
+
使用 int32 类型,n=32
|
76
77
|
"""
|
77
78
|
if not isinstance(data, np.ndarray):
|
78
79
|
raise ValueError("Input data must be a NumPy array.")
|
80
|
+
|
81
|
+
if dtype == "int32":
|
82
|
+
n = 32
|
83
|
+
fill_value = np.iinfo(np.int32).min # -2147483648
|
84
|
+
elif dtype == "int16":
|
85
|
+
n = 16
|
86
|
+
fill_value = np.iinfo(np.int16).min # -32768
|
87
|
+
else:
|
88
|
+
raise ValueError("Unsupported dtype. Supported types are 'int16' and 'int32'.")
|
79
89
|
|
80
|
-
#
|
81
|
-
|
90
|
+
# 有效掩码:非NaN、非inf、非fill_value
|
91
|
+
valid_mask = np.isfinite(data) & (data != fill_value)
|
92
|
+
if hasattr(data, "mask") and np.ma.is_masked(data):
|
93
|
+
valid_mask &= ~data.mask
|
82
94
|
|
83
|
-
# 计算有效值的最小最大值(排除填充值)
|
84
|
-
valid_mask = clean_data != -32767
|
85
95
|
if np.any(valid_mask):
|
86
|
-
data_min = np.min(
|
87
|
-
data_max = np.max(
|
96
|
+
data_min = np.min(data[valid_mask])-1
|
97
|
+
data_max = np.max(data[valid_mask])+1
|
88
98
|
else:
|
89
|
-
# 全都是填充值的情况,使用默认范围
|
90
99
|
data_min, data_max = 0, 1
|
91
100
|
|
101
|
+
# 防止scale为0,且保证scale/offset不会影响缺省值
|
92
102
|
if data_max == data_min:
|
93
103
|
scale_factor = 1.0
|
94
104
|
add_offset = data_min
|
95
105
|
else:
|
96
|
-
scale_factor = (data_max - data_min) / (2**n -
|
97
|
-
add_offset =
|
106
|
+
scale_factor = (data_max - data_min) / (2**n - 2)
|
107
|
+
add_offset = (data_max + data_min) / 2.0
|
98
108
|
return scale_factor, add_offset
|
99
109
|
|
100
110
|
|
101
|
-
def _data_to_scale_offset(data, scale, offset):
|
111
|
+
def _data_to_scale_offset(data, scale, offset, dtype='int32'):
|
102
112
|
"""
|
103
|
-
|
104
|
-
|
105
|
-
|
113
|
+
只对有效数据做缩放,NaN/inf/填充值直接赋为fill_value。
|
114
|
+
掩码区域的值会被保留并进行缩放,除非掩码本身标记为无效。
|
115
|
+
使用 int32 类型
|
106
116
|
"""
|
107
117
|
if not isinstance(data, np.ndarray):
|
108
118
|
raise ValueError("Input data must be a NumPy array.")
|
119
|
+
|
120
|
+
if dtype == "int32":
|
121
|
+
# n = 32
|
122
|
+
np_dtype = np.int32
|
123
|
+
fill_value = np.iinfo(np.int32).min # -2147483648
|
124
|
+
clip_min = np.iinfo(np.int32).min + 1 # -2147483647
|
125
|
+
clip_max = np.iinfo(np.int32).max # 2147483647
|
126
|
+
elif dtype == "int16":
|
127
|
+
# n = 16
|
128
|
+
np_dtype = np.int16
|
129
|
+
fill_value = np.iinfo(np.int16).min # -32768
|
130
|
+
clip_min = np.iinfo(np.int16).min + 1 # -32767
|
131
|
+
clip_max = np.iinfo(np.int16).max # 32767
|
132
|
+
else:
|
133
|
+
raise ValueError("Unsupported dtype. Supported types are 'int16' and 'int32'.")
|
134
|
+
|
135
|
+
# 创建掩码,只排除 NaN/inf 和显式的填充值
|
136
|
+
valid_mask = np.isfinite(data)
|
137
|
+
valid_mask &= data != fill_value
|
138
|
+
|
139
|
+
# 如果数据有掩码属性,还需考虑掩码
|
140
|
+
if hasattr(data, "mask") and np.ma.is_masked(data):
|
141
|
+
# 只有掩码标记的区域视为无效
|
142
|
+
valid_mask &= ~data.mask
|
109
143
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
144
|
+
result = data.copy()
|
145
|
+
if np.any(valid_mask):
|
146
|
+
# 反向映射时能还原原始值
|
147
|
+
scaled = (data[valid_mask] - offset) / scale
|
148
|
+
scaled = np.round(scaled).astype(np_dtype)
|
149
|
+
# clip到int32范围,保留最大范围供转换
|
150
|
+
scaled = np.clip(scaled, clip_min, clip_max) # 不使用 -2147483648,保留做 _FillValue
|
151
|
+
result[valid_mask] = scaled
|
152
|
+
return result
|
118
153
|
|
119
154
|
|
120
|
-
def save_to_nc(file, data, varname=None, coords=None, mode="w", scale_offset_switch=True, compile_switch=True):
|
155
|
+
def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='int32',scale_offset_switch=True, compile_switch=True, preserve_mask_values=True):
|
121
156
|
"""
|
122
157
|
保存数据到 NetCDF 文件,支持 xarray 对象(DataArray 或 Dataset)和 numpy 数组。
|
123
158
|
|
124
|
-
仅对数据变量中数值型数据进行压缩转换(利用 scale_factor/add_offset 转换后转为
|
159
|
+
仅对数据变量中数值型数据进行压缩转换(利用 scale_factor/add_offset 转换后转为 int32),
|
125
160
|
非数值型数据以及所有坐标变量将禁用任何压缩,直接保存原始数据。
|
126
161
|
|
127
162
|
参数:
|
@@ -130,64 +165,134 @@ def save_to_nc(file, data, varname=None, coords=None, mode="w", scale_offset_swi
|
|
130
165
|
- varname: 变量名(仅适用于传入 numpy 数组或 DataArray 时)
|
131
166
|
- coords: 坐标字典(numpy 数组分支时使用),所有坐标变量均不压缩
|
132
167
|
- mode: "w"(覆盖)或 "a"(追加)
|
168
|
+
- convert_dtype: 转换为的数值类型("int16" 或 "int32"),默认为 "int32"
|
133
169
|
- scale_offset_switch: 是否对数值型数据变量进行压缩转换
|
134
170
|
- compile_switch: 是否启用 NetCDF4 的 zlib 压缩(仅针对数值型数据有效)
|
171
|
+
- missing_value: 自定义缺失值,将被替换为 fill_value
|
172
|
+
- preserve_mask_values: 是否保留掩码区域的原始值(True)或将其替换为缺省值(False)
|
135
173
|
"""
|
174
|
+
if convert_dtype not in ["int16", "int32"]:
|
175
|
+
convert_dtype = "int32"
|
176
|
+
nc_dtype = _numpy_to_nc_type(convert_dtype)
|
177
|
+
# fill_value = np.iinfo(np.convert_dtype).min # -2147483648 或 -32768
|
178
|
+
# fill_value = np.iinfo(eval('np.' + convert_dtype)).min # -2147483648 或 -32768
|
179
|
+
np_dtype = getattr(np, convert_dtype) # 更安全的类型获取方式
|
180
|
+
fill_value = np.iinfo(np_dtype).min
|
181
|
+
# ----------------------------------------------------------------------------
|
136
182
|
# 处理 xarray 对象(DataArray 或 Dataset)的情况
|
137
183
|
if isinstance(data, (xr.DataArray, xr.Dataset)):
|
138
|
-
encoding = {}
|
184
|
+
encoding = {}
|
139
185
|
|
140
186
|
if isinstance(data, xr.DataArray):
|
141
187
|
if data.name is None:
|
142
188
|
data = data.rename("data")
|
143
189
|
varname = data.name if varname is None else varname
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
190
|
+
arr = np.array(data.values)
|
191
|
+
try:
|
192
|
+
data_missing_val = data.attrs.get("missing_value")
|
193
|
+
except AttributeError:
|
194
|
+
data_missing_val = data.attrs.get("_FillValue", None)
|
195
|
+
# 只对有效数据计算scale/offset
|
196
|
+
valid_mask = np.ones(arr.shape, dtype=bool) # 默认所有值都有效
|
197
|
+
if arr.dtype.kind in ["f", "i", "u"]: # 仅对数值数据应用isfinite
|
198
|
+
valid_mask = np.isfinite(arr)
|
199
|
+
if data_missing_val is not None:
|
200
|
+
valid_mask &= arr != data_missing_val
|
201
|
+
if hasattr(arr, "mask"):
|
202
|
+
valid_mask &= ~getattr(arr, "mask", False)
|
203
|
+
if np.issubdtype(arr.dtype, np.number) and scale_offset_switch:
|
204
|
+
arr_valid = arr[valid_mask]
|
205
|
+
scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
|
206
|
+
# 写入前处理无效值(只在这里做!)
|
207
|
+
arr_to_save = arr.copy()
|
208
|
+
# 处理自定义缺失值
|
209
|
+
if data_missing_val is not None:
|
210
|
+
arr_to_save[arr == data_missing_val] = fill_value
|
211
|
+
# 处理 NaN/inf
|
212
|
+
arr_to_save[~np.isfinite(arr_to_save)] = fill_value
|
213
|
+
new_values = _data_to_scale_offset(arr_to_save, scale, offset)
|
149
214
|
new_da = data.copy(data=new_values)
|
215
|
+
# 移除 _FillValue 和 missing_value 属性
|
216
|
+
for k in ["_FillValue", "missing_value"]:
|
217
|
+
if k in new_da.attrs:
|
218
|
+
del new_da.attrs[k]
|
150
219
|
new_da.attrs["scale_factor"] = float(scale)
|
151
220
|
new_da.attrs["add_offset"] = float(offset)
|
152
221
|
encoding[varname] = {
|
153
222
|
"zlib": compile_switch,
|
154
223
|
"complevel": 4,
|
155
|
-
"dtype":
|
156
|
-
"_FillValue": -
|
224
|
+
"dtype": nc_dtype,
|
225
|
+
# "_FillValue": -2147483648,
|
157
226
|
}
|
158
227
|
new_da.to_dataset(name=varname).to_netcdf(file, mode=mode, encoding=encoding)
|
159
228
|
else:
|
229
|
+
for k in ["_FillValue", "missing_value"]:
|
230
|
+
if k in data.attrs:
|
231
|
+
del data.attrs[k]
|
160
232
|
data.to_dataset(name=varname).to_netcdf(file, mode=mode)
|
161
|
-
_nan_to_fillvalue(file)
|
233
|
+
_nan_to_fillvalue(file, fill_value)
|
162
234
|
return
|
163
235
|
|
164
|
-
else:
|
165
|
-
# 处理 Dataset 的情况,仅处理 data_vars 数据变量,坐标变量保持原样
|
236
|
+
else: # Dataset 情况
|
166
237
|
new_vars = {}
|
167
238
|
encoding = {}
|
168
239
|
for var in data.data_vars:
|
169
240
|
da = data[var]
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
241
|
+
arr = np.array(da.values)
|
242
|
+
try:
|
243
|
+
data_missing_val = da.attrs.get("missing_value")
|
244
|
+
except AttributeError:
|
245
|
+
data_missing_val = da.attrs.get("_FillValue", None)
|
246
|
+
valid_mask = np.ones(arr.shape, dtype=bool) # 默认所有值都有效
|
247
|
+
if arr.dtype.kind in ["f", "i", "u"]: # 仅对数值数据应用isfinite
|
248
|
+
valid_mask = np.isfinite(arr)
|
249
|
+
if data_missing_val is not None:
|
250
|
+
valid_mask &= arr != data_missing_val
|
251
|
+
if hasattr(arr, "mask"):
|
252
|
+
valid_mask &= ~getattr(arr, "mask", False)
|
253
|
+
|
254
|
+
# 创建属性的副本以避免修改原始数据集
|
255
|
+
attrs = da.attrs.copy()
|
256
|
+
for k in ["_FillValue", "missing_value"]:
|
257
|
+
if k in attrs:
|
258
|
+
del attrs[k]
|
259
|
+
|
260
|
+
if np.issubdtype(arr.dtype, np.number) and scale_offset_switch:
|
261
|
+
# 处理边缘情况:检查是否有有效数据
|
262
|
+
if not np.any(valid_mask):
|
263
|
+
# 如果没有有效数据,创建一个简单的拷贝,不做转换
|
264
|
+
new_vars[var] = xr.DataArray(arr, dims=da.dims, coords=da.coords, attrs=attrs)
|
265
|
+
continue
|
266
|
+
|
267
|
+
arr_valid = arr[valid_mask]
|
268
|
+
scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
|
269
|
+
arr_to_save = arr.copy()
|
270
|
+
|
271
|
+
# 使用与DataArray相同的逻辑,使用_data_to_scale_offset处理数据
|
272
|
+
# 处理自定义缺失值
|
273
|
+
if data_missing_val is not None:
|
274
|
+
arr_to_save[arr == data_missing_val] = fill_value
|
275
|
+
# 处理 NaN/inf
|
276
|
+
arr_to_save[~np.isfinite(arr_to_save)] = fill_value
|
277
|
+
new_values = _data_to_scale_offset(arr_to_save, scale, offset)
|
278
|
+
new_da = xr.DataArray(new_values, dims=da.dims, coords=da.coords, attrs=attrs)
|
174
279
|
new_da.attrs["scale_factor"] = float(scale)
|
175
280
|
new_da.attrs["add_offset"] = float(offset)
|
281
|
+
# 不设置_FillValue属性,改为使用missing_value
|
282
|
+
# new_da.attrs["missing_value"] = -2147483648
|
176
283
|
new_vars[var] = new_da
|
177
284
|
encoding[var] = {
|
178
285
|
"zlib": compile_switch,
|
179
286
|
"complevel": 4,
|
180
|
-
"dtype":
|
181
|
-
"_FillValue": -32767,
|
287
|
+
"dtype": nc_dtype,
|
182
288
|
}
|
183
289
|
else:
|
184
|
-
new_vars[var] = da
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
else
|
189
|
-
|
190
|
-
_nan_to_fillvalue(file) # 替换 NaN 为 _FillValue
|
290
|
+
new_vars[var] = xr.DataArray(arr, dims=da.dims, coords=da.coords, attrs=attrs)
|
291
|
+
|
292
|
+
# 确保坐标变量被正确复制
|
293
|
+
new_ds = xr.Dataset(new_vars, coords=data.coords.copy())
|
294
|
+
new_ds.to_netcdf(file, mode=mode, encoding=encoding if encoding else None)
|
295
|
+
_nan_to_fillvalue(file, fill_value)
|
191
296
|
return
|
192
297
|
|
193
298
|
# 处理纯 numpy 数组情况
|
@@ -197,9 +302,16 @@ def save_to_nc(file, data, varname=None, coords=None, mode="w", scale_offset_swi
|
|
197
302
|
mode = "w"
|
198
303
|
data = np.asarray(data)
|
199
304
|
is_numeric = np.issubdtype(data.dtype, np.number)
|
305
|
+
|
306
|
+
if hasattr(data, "mask") and np.ma.is_masked(data):
|
307
|
+
# 处理掩码数组,获取缺失值
|
308
|
+
data = data.data
|
309
|
+
missing_value = getattr(data, "missing_value", None)
|
310
|
+
else:
|
311
|
+
missing_value = None
|
312
|
+
|
200
313
|
try:
|
201
314
|
with nc.Dataset(file, mode, format="NETCDF4") as ncfile:
|
202
|
-
# 坐标变量直接写入,不做压缩
|
203
315
|
if coords is not None:
|
204
316
|
for dim, values in coords.items():
|
205
317
|
if dim not in ncfile.dimensions:
|
@@ -209,45 +321,147 @@ def save_to_nc(file, data, varname=None, coords=None, mode="w", scale_offset_swi
|
|
209
321
|
|
210
322
|
dims = list(coords.keys()) if coords else []
|
211
323
|
if is_numeric and scale_offset_switch:
|
212
|
-
|
213
|
-
|
214
|
-
|
324
|
+
arr = np.array(data)
|
325
|
+
|
326
|
+
# 构建有效掩码,但不排除掩码区域的数值(如果 preserve_mask_values 为 True)
|
327
|
+
valid_mask = np.isfinite(arr) # 排除 NaN 和无限值
|
328
|
+
if missing_value is not None:
|
329
|
+
valid_mask &= arr != missing_value # 排除明确的缺失值
|
330
|
+
|
331
|
+
# 如果不保留掩码区域的值,则将掩码区域视为无效
|
332
|
+
if not preserve_mask_values and hasattr(arr, "mask"):
|
333
|
+
valid_mask &= ~arr.mask
|
334
|
+
|
335
|
+
arr_to_save = arr.copy()
|
336
|
+
|
337
|
+
# 确保有有效数据
|
338
|
+
if not np.any(valid_mask):
|
339
|
+
# 如果没有有效数据,不进行压缩,直接保存原始数据类型
|
340
|
+
dtype = _numpy_to_nc_type(data.dtype)
|
341
|
+
var = ncfile.createVariable(varname, dtype, dims, zlib=False)
|
342
|
+
# 确保没有 NaN
|
343
|
+
clean_data = np.nan_to_num(data, nan=missing_value if missing_value is not None else fill_value)
|
344
|
+
var[:] = clean_data
|
345
|
+
return
|
346
|
+
|
347
|
+
# 计算 scale 和 offset 仅使用有效区域数据
|
348
|
+
arr_valid = arr_to_save[valid_mask]
|
349
|
+
scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
|
350
|
+
|
351
|
+
# 执行压缩转换
|
352
|
+
new_data = _data_to_scale_offset(arr_to_save, scale, offset)
|
353
|
+
|
354
|
+
# 创建变量并设置属性
|
355
|
+
var = ncfile.createVariable(varname, nc_dtype, dims, zlib=compile_switch)
|
215
356
|
var.scale_factor = scale
|
216
357
|
var.add_offset = offset
|
217
|
-
|
358
|
+
var._FillValue = fill_value # 明确设置填充值
|
218
359
|
var[:] = new_data
|
219
360
|
else:
|
220
|
-
# 非数值型数据,禁止压缩
|
221
361
|
dtype = _numpy_to_nc_type(data.dtype)
|
222
362
|
var = ncfile.createVariable(varname, dtype, dims, zlib=False)
|
223
|
-
|
224
|
-
|
363
|
+
# 确保不写入 NaN
|
364
|
+
if np.issubdtype(data.dtype, np.floating) and np.any(~np.isfinite(data)):
|
365
|
+
fill_val = missing_value if missing_value is not None else fill_value
|
366
|
+
var._FillValue = fill_val
|
367
|
+
clean_data = np.nan_to_num(data, nan=fill_val)
|
368
|
+
var[:] = clean_data
|
369
|
+
else:
|
370
|
+
var[:] = data
|
371
|
+
# 最后确保所有 NaN 值被处理
|
372
|
+
_nan_to_fillvalue(file, fill_value)
|
225
373
|
except Exception as e:
|
226
374
|
raise RuntimeError(f"netCDF4 保存失败: {str(e)}") from e
|
227
375
|
|
228
376
|
|
377
|
+
def _compress_netcdf(src_path, dst_path=None, tolerance=1e-10, preserve_mask_values=True):
|
378
|
+
"""
|
379
|
+
压缩 NetCDF 文件,使用 scale_factor/add_offset 压缩数据。
|
380
|
+
若 dst_path 省略,则自动生成新文件名,写出后删除原文件并将新文件改回原名。
|
381
|
+
压缩后验证数据是否失真。
|
382
|
+
|
383
|
+
参数:
|
384
|
+
- src_path: 原始 NetCDF 文件路径
|
385
|
+
- dst_path: 压缩后的文件路径(可选)
|
386
|
+
- tolerance: 数据验证的允许误差范围(默认 1e-10)
|
387
|
+
- preserve_mask_values: 是否保留掩码区域的原始值(True)或将其替换为缺省值(False)
|
388
|
+
"""
|
389
|
+
# 判断是否要替换原文件
|
390
|
+
delete_orig = dst_path is None
|
391
|
+
if delete_orig:
|
392
|
+
dst_path = src_path.replace(".nc", "_compress.nc")
|
393
|
+
# 打开原始文件并保存压缩文件
|
394
|
+
ds = xr.open_dataset(src_path)
|
395
|
+
save_to_nc(dst_path, ds, convert_dtype='int32',scale_offset_switch=True, compile_switch=True, preserve_mask_values=preserve_mask_values)
|
396
|
+
ds.close()
|
397
|
+
|
398
|
+
# 验证压缩后的数据是否失真
|
399
|
+
original_ds = xr.open_dataset(src_path)
|
400
|
+
compressed_ds = xr.open_dataset(dst_path)
|
401
|
+
# 更详细地验证数据
|
402
|
+
for var in original_ds.data_vars:
|
403
|
+
original_data = original_ds[var].values
|
404
|
+
compressed_data = compressed_ds[var].values
|
405
|
+
# 跳过非数值类型变量
|
406
|
+
if not np.issubdtype(original_data.dtype, np.number):
|
407
|
+
continue
|
408
|
+
# 获取掩码(如果存在)
|
409
|
+
original_mask = None
|
410
|
+
if hasattr(original_data, "mask") and np.ma.is_masked(original_data): # 修正:确保是有效的掩码数组
|
411
|
+
original_mask = original_data.mask.copy()
|
412
|
+
# 检查有效数据是否在允许误差范围内
|
413
|
+
valid_mask = np.isfinite(original_data)
|
414
|
+
if original_mask is not None:
|
415
|
+
valid_mask &= ~original_mask
|
416
|
+
if np.any(valid_mask):
|
417
|
+
if np.issubdtype(original_data.dtype, np.floating):
|
418
|
+
diff = np.abs(original_data[valid_mask] - compressed_data[valid_mask])
|
419
|
+
max_diff = np.max(diff)
|
420
|
+
if max_diff > tolerance:
|
421
|
+
print(f"警告: 变量 {var} 的压缩误差 {max_diff} 超出容许范围 {tolerance}")
|
422
|
+
if max_diff > tolerance * 10: # 严重偏差时抛出错误
|
423
|
+
raise ValueError(f"变量 {var} 的数据在压缩后严重失真 (max_diff={max_diff})")
|
424
|
+
elif np.issubdtype(original_data.dtype, np.integer):
|
425
|
+
# 整数类型应该完全相等
|
426
|
+
if not np.array_equal(original_data[valid_mask], compressed_data[valid_mask]):
|
427
|
+
raise ValueError(f"变量 {var} 的整数数据在压缩后不一致")
|
428
|
+
# 如果需要保留掩码区域值,检查掩码区域的值
|
429
|
+
if preserve_mask_values and original_mask is not None and np.any(original_mask):
|
430
|
+
# 确保掩码区域的原始值被正确保留
|
431
|
+
# 修正:掩码数组可能存在数据类型不匹配问题,添加安全检查
|
432
|
+
try:
|
433
|
+
mask_diff = np.abs(original_data[original_mask] - compressed_data[original_mask])
|
434
|
+
if np.any(mask_diff > tolerance):
|
435
|
+
print(f"警告: 变量 {var} 的掩码区域数据在压缩后发生变化")
|
436
|
+
except Exception as e:
|
437
|
+
print(f"警告: 变量 {var} 的掩码区域数据比较失败: {str(e)}")
|
438
|
+
original_ds.close()
|
439
|
+
compressed_ds.close()
|
440
|
+
|
441
|
+
# 替换原文件
|
442
|
+
if delete_orig:
|
443
|
+
os.remove(src_path)
|
444
|
+
os.rename(dst_path, src_path)
|
445
|
+
|
446
|
+
|
229
447
|
# 测试用例
|
230
448
|
if __name__ == "__main__":
|
231
|
-
#
|
232
|
-
|
233
|
-
file = r"F:\roms_rst.nc"
|
449
|
+
# 示例文件路径,需根据实际情况修改
|
450
|
+
file = "dataset_test.nc"
|
234
451
|
ds = xr.open_dataset(file)
|
235
|
-
outfile =
|
452
|
+
outfile = "dataset_test_compressed.nc"
|
236
453
|
save_to_nc(outfile, ds)
|
237
454
|
ds.close()
|
238
|
-
|
455
|
+
|
239
456
|
# dataarray
|
240
457
|
data = np.random.rand(4, 3, 2)
|
241
458
|
coords = {"x": np.arange(4), "y": np.arange(3), "z": np.arange(2)}
|
242
459
|
varname = "test_var"
|
243
460
|
data = xr.DataArray(data, dims=("x", "y", "z"), coords=coords, name=varname)
|
244
|
-
outfile =
|
461
|
+
outfile = "test_dataarray.nc"
|
245
462
|
save_to_nc(outfile, data)
|
246
|
-
|
247
|
-
# numpy array
|
248
|
-
|
249
|
-
|
250
|
-
varname =
|
251
|
-
outfile = r"F:\test_numpy.nc"
|
252
|
-
save_to_nc(outfile, data, varname=varname, coords=coords)
|
253
|
-
# --------------------------------
|
463
|
+
|
464
|
+
# numpy array with custom missing value
|
465
|
+
coords = {"dim0": np.arange(5)}
|
466
|
+
data = np.array([1, 2, -999, 4, np.nan])
|
467
|
+
save_to_nc("test_numpy_missing.nc", data, varname="data", coords=coords, missing_value=-999)
|
oafuncs/oa_cmap.py
CHANGED
@@ -8,7 +8,9 @@ __all__ = ["show", "to_color", "create", "get"]
|
|
8
8
|
|
9
9
|
|
10
10
|
# ** 将cmap用填色图可视化(官网摘抄函数)
|
11
|
-
def show(
|
11
|
+
def show(
|
12
|
+
colormaps: Union[str, mpl.colors.Colormap, List[Union[str, mpl.colors.Colormap]]],
|
13
|
+
) -> None:
|
12
14
|
"""Helper function to plot data with associated colormap.
|
13
15
|
|
14
16
|
This function creates a visualization of one or more colormaps by applying them
|
@@ -97,7 +99,14 @@ def to_color(colormap_name: str, num_colors: int = 256) -> List[tuple]:
|
|
97
99
|
|
98
100
|
|
99
101
|
# ** 自制cmap,多色,可带位置
|
100
|
-
def create(
|
102
|
+
def create(
|
103
|
+
color_list: Optional[List[Union[str, tuple]]] = None,
|
104
|
+
rgb_file: Optional[str] = None,
|
105
|
+
color_positions: Optional[List[float]] = None,
|
106
|
+
below_range_color: Optional[Union[str, tuple]] = None,
|
107
|
+
above_range_color: Optional[Union[str, tuple]] = None,
|
108
|
+
value_delimiter: str = ",",
|
109
|
+
) -> mpl.colors.Colormap:
|
101
110
|
"""Create a custom colormap from a list of colors or an RGB txt document.
|
102
111
|
|
103
112
|
Args:
|
@@ -144,7 +153,7 @@ def create(color_list: Optional[List[Union[str, tuple]]] = None, rgb_file: Optio
|
|
144
153
|
|
145
154
|
if rgb_file:
|
146
155
|
try:
|
147
|
-
print(f"Reading RGB data from {rgb_file}...")
|
156
|
+
# print(f"Reading RGB data from {rgb_file}...")
|
148
157
|
|
149
158
|
with open(rgb_file) as fid:
|
150
159
|
data = [line.strip() for line in fid if line.strip() and not line.strip().startswith("#")]
|
@@ -178,7 +187,7 @@ def create(color_list: Optional[List[Union[str, tuple]]] = None, rgb_file: Optio
|
|
178
187
|
if max_rgb > 2:
|
179
188
|
rgb = rgb / 255.0
|
180
189
|
cmap_color = mpl.colors.ListedColormap(rgb, name="my_color")
|
181
|
-
print(f"Successfully created colormap from {rgb_file}")
|
190
|
+
# print(f"Successfully created colormap from {rgb_file}")
|
182
191
|
except FileNotFoundError:
|
183
192
|
error_msg = f"RGB file not found: {rgb_file}"
|
184
193
|
print(error_msg)
|
@@ -189,15 +198,15 @@ def create(color_list: Optional[List[Union[str, tuple]]] = None, rgb_file: Optio
|
|
189
198
|
cmap_color = mpl.colors.LinearSegmentedColormap.from_list("mycmap", color_list)
|
190
199
|
else:
|
191
200
|
cmap_color = mpl.colors.LinearSegmentedColormap.from_list("mycmap", list(zip(color_positions, color_list)))
|
192
|
-
print(f"Successfully created colormap from {len(color_list)} colors")
|
201
|
+
# print(f"Successfully created colormap from {len(color_list)} colors")
|
193
202
|
|
194
203
|
# Set below/above range colors if provided
|
195
204
|
if below_range_color is not None:
|
196
205
|
cmap_color.set_under(below_range_color)
|
197
|
-
print(f"Set below-range color to {below_range_color}")
|
206
|
+
# print(f"Set below-range color to {below_range_color}")
|
198
207
|
if above_range_color is not None:
|
199
208
|
cmap_color.set_over(above_range_color)
|
200
|
-
print(f"Set above-range color to {above_range_color}")
|
209
|
+
# print(f"Set above-range color to {above_range_color}")
|
201
210
|
|
202
211
|
return cmap_color
|
203
212
|
|
@@ -246,12 +255,12 @@ def get(colormap_name: Optional[str] = None, show_available: bool = False) -> Op
|
|
246
255
|
return None
|
247
256
|
|
248
257
|
if colormap_name in my_cmap_dict:
|
249
|
-
print(f"Using custom colormap: {colormap_name}")
|
258
|
+
# print(f"Using custom colormap: {colormap_name}")
|
250
259
|
return create(my_cmap_dict[colormap_name])
|
251
260
|
else:
|
252
261
|
try:
|
253
262
|
cmap = mpl.colormaps.get_cmap(colormap_name)
|
254
|
-
print(f"Using matplotlib colormap: {colormap_name}")
|
263
|
+
# print(f"Using matplotlib colormap: {colormap_name}")
|
255
264
|
return cmap
|
256
265
|
except ValueError:
|
257
266
|
print(f"Warning: Unknown cmap name: {colormap_name}")
|
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -1,18 +1,3 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding=utf-8
|
3
|
-
"""
|
4
|
-
Author: Liu Kun && 16031215@qq.com
|
5
|
-
Date: 2025-04-07 10:51:09
|
6
|
-
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2025-04-07 10:51:09
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly copy.py
|
9
|
-
Description:
|
10
|
-
EditPlatform: vscode
|
11
|
-
ComputerInfo: XPS 15 9510
|
12
|
-
SystemInfo: Windows 11
|
13
|
-
Python Version: 3.12
|
14
|
-
"""
|
15
|
-
|
16
1
|
import asyncio
|
17
2
|
import datetime
|
18
3
|
import logging
|
@@ -23,7 +8,6 @@ import warnings
|
|
23
8
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
24
9
|
from pathlib import Path
|
25
10
|
from threading import Lock
|
26
|
-
from oafuncs.oa_tool import pbar
|
27
11
|
|
28
12
|
import httpx
|
29
13
|
import matplotlib.pyplot as plt
|
@@ -38,6 +22,7 @@ from oafuncs.oa_down.user_agent import get_ua
|
|
38
22
|
from oafuncs.oa_file import file_size
|
39
23
|
from oafuncs.oa_nc import check as check_nc
|
40
24
|
from oafuncs.oa_nc import modify as modify_nc
|
25
|
+
from oafuncs.oa_tool import pbar
|
41
26
|
|
42
27
|
logging.getLogger("httpx").setLevel(logging.WARNING) # 关闭 httpx 的 INFO 日志,只显示 WARNING 及以上
|
43
28
|
|
@@ -724,7 +709,6 @@ class _HycomDownloader:
|
|
724
709
|
logging.info(f"{file_name}: {percent}% ({downloaded / 1024:.1f} KB / {total / 1024:.1f} KB)")
|
725
710
|
last_percent = percent
|
726
711
|
|
727
|
-
|
728
712
|
elapsed = datetime.datetime.now() - start
|
729
713
|
# logging.info(f"File {file_name} downloaded, Time: {elapsed}")
|
730
714
|
logging.info(f"Saving {file_name} ...")
|
@@ -966,7 +950,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
966
950
|
print("Downloading a series of files...")
|
967
951
|
time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour, "hour")
|
968
952
|
# with Progress() as progress:
|
969
|
-
|
953
|
+
# task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
|
970
954
|
if num_workers is None or num_workers <= 1:
|
971
955
|
for i, time_str in pbar(enumerate(time_list), description=f"{bar_desc}", total=len(time_list), next_line=True):
|
972
956
|
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
|
@@ -976,7 +960,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
976
960
|
futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
|
977
961
|
""" for feature in as_completed(futures):
|
978
962
|
_done_callback(feature, progress, task, len(time_list), counter_lock) """
|
979
|
-
for _ in pbar(as_completed(futures),description=f"{bar_desc}", total=len(futures),next_line=True):
|
963
|
+
for _ in pbar(as_completed(futures), description=f"{bar_desc}", total=len(futures), next_line=True):
|
980
964
|
pass
|
981
965
|
else:
|
982
966
|
print("[bold red]Please ensure the time_s is no more than time_e")
|
@@ -1174,7 +1158,7 @@ def download(
|
|
1174
1158
|
|
1175
1159
|
count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
|
1176
1160
|
print("[bold #ecdbfe]=" * mark_len)
|
1177
|
-
print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}\nNo data: {count_dict['no_data']}")
|
1161
|
+
print(f"[bold #ff80ab]Total : {count_dict['total']}\nSuccess: {count_dict['success']}\nFail : {count_dict['fail']}\nSkip : {count_dict['skip']}\nNo data: {count_dict['no_data']}")
|
1178
1162
|
print("[bold #ecdbfe]=" * mark_len)
|
1179
1163
|
if count_dict["fail"] > 0:
|
1180
1164
|
print("[bold #be5528]Please try again to download the failed data later.")
|
oafuncs/oa_draw.py
CHANGED
@@ -318,7 +318,7 @@ def add_gridlines(axes: plt.Axes, longitude_lines: list[float] = None, latitude_
|
|
318
318
|
if latitude_lines is not None:
|
319
319
|
gl.ylocator = mticker.FixedLocator(np.array(latitude_lines))
|
320
320
|
|
321
|
-
print("[green]Gridlines added successfully.[/green]")
|
321
|
+
# print("[green]Gridlines added successfully.[/green]")
|
322
322
|
return axes, gl
|
323
323
|
|
324
324
|
|
@@ -365,7 +365,7 @@ def add_cartopy(axes: plt.Axes, longitude_data: np.ndarray = None, latitude_data
|
|
365
365
|
lat_min, lat_max = np.nanmin(latitude_data), np.nanmax(latitude_data)
|
366
366
|
axes.set_extent([lon_min, lon_max, lat_min, lat_max], crs=map_projection)
|
367
367
|
|
368
|
-
print("[green]Cartopy features added successfully.[/green]")
|
368
|
+
# print("[green]Cartopy features added successfully.[/green]")
|
369
369
|
return axes
|
370
370
|
|
371
371
|
|
oafuncs/oa_nc.py
CHANGED
@@ -6,7 +6,8 @@ import numpy as np
|
|
6
6
|
import xarray as xr
|
7
7
|
from rich import print
|
8
8
|
|
9
|
-
__all__ = ["save", "merge", "modify", "rename", "check", "convert_longitude", "isel", "draw", "
|
9
|
+
__all__ = ["save", "merge", "modify", "rename", "check", "convert_longitude", "isel", "draw", "compress", "unscale"]
|
10
|
+
|
10
11
|
|
11
12
|
|
12
13
|
def save(
|
@@ -15,8 +16,10 @@ def save(
|
|
15
16
|
variable_name: Optional[str] = None,
|
16
17
|
coordinates: Optional[dict] = None,
|
17
18
|
write_mode: str = "w",
|
19
|
+
convert_dtype: str = "int32",
|
18
20
|
use_scale_offset: bool = True,
|
19
21
|
use_compression: bool = True,
|
22
|
+
preserve_mask_values: bool = True,
|
20
23
|
) -> None:
|
21
24
|
"""
|
22
25
|
Write data to a NetCDF file.
|
@@ -27,8 +30,10 @@ def save(
|
|
27
30
|
variable_name (Optional[str]): Variable name for the data.
|
28
31
|
coordinates (Optional[dict]): Coordinates, where keys are dimension names and values are coordinate data.
|
29
32
|
write_mode (str): Write mode, 'w' for write, 'a' for append. Default is 'w'.
|
33
|
+
convert_dtype (str): Data type to convert to. Default is 'int32'.
|
30
34
|
use_scale_offset (bool): Whether to use scale_factor and add_offset. Default is True.
|
31
35
|
use_compression (bool): Whether to use compression parameters. Default is True.
|
36
|
+
preserve_mask_values (bool): Whether to preserve mask values. Default is True.
|
32
37
|
|
33
38
|
Example:
|
34
39
|
>>> save(r'test.nc', data, 'u', {'time': np.linspace(0, 120, 100), 'lev': np.linspace(0, 120, 50)}, 'a')
|
@@ -38,7 +43,7 @@ def save(
|
|
38
43
|
"""
|
39
44
|
from ._script.netcdf_write import save_to_nc
|
40
45
|
|
41
|
-
save_to_nc(file_path, data, variable_name, coordinates, write_mode, use_scale_offset, use_compression)
|
46
|
+
save_to_nc(file_path, data, variable_name, coordinates, write_mode, convert_dtype,use_scale_offset, use_compression, preserve_mask_values)
|
42
47
|
print(f"[green]Data successfully saved to {file_path}[/green]")
|
43
48
|
|
44
49
|
|
@@ -278,7 +283,7 @@ def draw(
|
|
278
283
|
print("[red]No dataset or file provided.[/red]")
|
279
284
|
|
280
285
|
|
281
|
-
def
|
286
|
+
def compress(src_path, dst_path=None,convert_dtype='int16'):
|
282
287
|
"""
|
283
288
|
压缩 NetCDF 文件,使用 scale_factor/add_offset 压缩数据。
|
284
289
|
若 dst_path 省略,则自动生成新文件名,写出后删除原文件并将新文件改回原名。
|
@@ -289,7 +294,7 @@ def compress_netcdf(src_path, dst_path=None):
|
|
289
294
|
dst_path = src_path.replace(".nc", "_compress.nc")
|
290
295
|
|
291
296
|
ds = xr.open_dataset(src_path)
|
292
|
-
save(dst_path, ds)
|
297
|
+
save(dst_path, ds, convert_dtype=convert_dtype, use_scale_offset=True, use_compression=True)
|
293
298
|
ds.close()
|
294
299
|
|
295
300
|
if delete_orig:
|
@@ -298,26 +303,88 @@ def compress_netcdf(src_path, dst_path=None):
|
|
298
303
|
pass
|
299
304
|
|
300
305
|
|
301
|
-
def
|
306
|
+
def unscale(src_path, dst_path=None, compression_level=4):
|
302
307
|
"""解码 NetCDF 并移除 scale_factor/add_offset,写出真实值。
|
308
|
+
保留压缩功能,但不使用比例因子和偏移量,以控制文件大小。
|
303
309
|
若 dst_path 省略,则自动生成新文件名,写出后删除原文件并将新文件改回原名。
|
310
|
+
|
311
|
+
Args:
|
312
|
+
src_path: 源文件路径
|
313
|
+
dst_path: 目标文件路径,None则替换原文件
|
314
|
+
compression_level: 压缩级别(1-9),数值越大压缩比越高,速度越慢
|
304
315
|
"""
|
305
316
|
# 判断是否要替换原文件
|
306
317
|
delete_orig = dst_path is None
|
307
318
|
if delete_orig:
|
308
319
|
dst_path = src_path.replace(".nc", "_unpacked.nc")
|
309
320
|
|
321
|
+
# 打开原始文件,获取文件大小
|
322
|
+
orig_size = os.path.getsize(src_path) / (1024 * 1024) # MB
|
323
|
+
|
324
|
+
# 先以原始模式打开,查看哪些变量使用了scale_factor/add_offset
|
325
|
+
with xr.open_dataset(src_path, decode_cf=False) as ds_raw:
|
326
|
+
has_scaling = []
|
327
|
+
for var in ds_raw.data_vars:
|
328
|
+
if "scale_factor" in ds_raw[var].attrs or "add_offset" in ds_raw[var].attrs:
|
329
|
+
has_scaling.append(var)
|
330
|
+
|
331
|
+
print(f"[yellow]文件: {src_path} (原始大小: {orig_size:.2f} MB)[/yellow]")
|
332
|
+
if has_scaling:
|
333
|
+
print(f"[yellow]发现 {len(has_scaling)} 个变量使用了比例因子: {', '.join(has_scaling)}[/yellow]")
|
334
|
+
else:
|
335
|
+
print("[yellow]未发现任何变量使用比例因子,解包可能不必要[/yellow]")
|
336
|
+
|
337
|
+
# 解码模式打开
|
310
338
|
ds = xr.open_dataset(src_path, decode_cf=True)
|
339
|
+
encoding = {}
|
340
|
+
|
311
341
|
for var in ds.data_vars:
|
342
|
+
# 保存原始的_FillValue
|
343
|
+
fill_value = None
|
344
|
+
if "_FillValue" in ds[var].attrs:
|
345
|
+
fill_value = ds[var].attrs["_FillValue"]
|
346
|
+
elif "_FillValue" in ds[var].encoding:
|
347
|
+
fill_value = ds[var].encoding["_FillValue"]
|
348
|
+
|
349
|
+
# 清除scale_factor和add_offset属性
|
312
350
|
ds[var].attrs.pop("scale_factor", None)
|
313
351
|
ds[var].attrs.pop("add_offset", None)
|
314
352
|
ds[var].encoding.clear()
|
315
|
-
|
353
|
+
|
354
|
+
# 仅对数值型变量处理
|
355
|
+
if np.issubdtype(ds[var].dtype, np.number):
|
356
|
+
# 强制转换为float32,避免float64导致文件暴涨
|
357
|
+
if np.issubdtype(ds[var].dtype, np.floating) and ds[var].dtype != np.float32:
|
358
|
+
ds[var] = ds[var].astype(np.float32)
|
359
|
+
|
360
|
+
# 设置压缩参数,但不使用scale_factor/add_offset
|
361
|
+
encoding[var] = {"zlib": True, "complevel": compression_level, "dtype": ds[var].dtype}
|
362
|
+
# 恢复_FillValue
|
363
|
+
if fill_value is not None:
|
364
|
+
encoding[var]["_FillValue"] = fill_value
|
365
|
+
|
366
|
+
# 使用save函数保存,传入encoding确保只压缩不使用scale_factor
|
367
|
+
ds.to_netcdf(dst_path, encoding=encoding)
|
316
368
|
ds.close()
|
317
369
|
|
370
|
+
# 打印输出文件大小对比
|
371
|
+
if os.path.exists(dst_path):
|
372
|
+
new_size = os.path.getsize(dst_path) / (1024 * 1024) # MB
|
373
|
+
size_change = new_size - orig_size
|
374
|
+
change_percent = (size_change / orig_size) * 100
|
375
|
+
|
376
|
+
color = "green" if size_change <= 0 else "red"
|
377
|
+
print(f"[{color}]解包后文件大小: {new_size:.2f} MB ({change_percent:+.1f}%)[/{color}]")
|
378
|
+
|
379
|
+
if size_change > orig_size * 0.5 and new_size > 100: # 如果文件增长超过50%且大于100MB
|
380
|
+
print(f"[red]警告: 文件大小增长显著! 考虑增加压缩级别(当前:{compression_level})[/red]")
|
381
|
+
|
318
382
|
if delete_orig:
|
319
383
|
os.remove(src_path)
|
320
384
|
os.rename(dst_path, src_path)
|
385
|
+
print(f"[green]已替换原文件: {src_path}[/green]")
|
386
|
+
else:
|
387
|
+
print(f"[green]已保存到: {dst_path}[/green]")
|
321
388
|
|
322
389
|
|
323
390
|
if __name__ == "__main__":
|
oafuncs/oa_tool.py
CHANGED
@@ -135,7 +135,7 @@ def email(title: str = "Title", content: Optional[str] = None, send_to: str = "1
|
|
135
135
|
|
136
136
|
def pbar(
|
137
137
|
iterable: Iterable = range(100),
|
138
|
-
description: str = "Working
|
138
|
+
description: str = "Working",
|
139
139
|
total: Optional[float] = None,
|
140
140
|
completed: float = 0,
|
141
141
|
color: Any = "None",
|
@@ -1,29 +1,29 @@
|
|
1
1
|
oafuncs/__init__.py,sha256=T_-VtnWWllV3Q91twT5Yt2sUapeA051QbPNnBxmg9nw,1456
|
2
|
-
oafuncs/oa_cmap.py,sha256=
|
2
|
+
oafuncs/oa_cmap.py,sha256=pUFAGzbIg0WLxObBP2t_--ZIg00Dxdojx0y7OjTeqEo,11551
|
3
3
|
oafuncs/oa_data.py,sha256=Aat9ktxxRGevaqQya3IJWfXeoEs-FCXGUcNE2pKnzfU,10931
|
4
4
|
oafuncs/oa_date.py,sha256=WhM6cyD4G3IeghjLTHhAMtlvJbA7kwQG2sHnxdTgyso,6303
|
5
|
-
oafuncs/oa_draw.py,sha256=
|
5
|
+
oafuncs/oa_draw.py,sha256=IaBGDx-EOxyMM2IuJ4zLZt6ruHHV5qFStPItmUOXoWk,17635
|
6
6
|
oafuncs/oa_file.py,sha256=j9gXJgPOJsliu4IOUc4bc-luW4yBvQyNCEmMyDVjUwQ,16404
|
7
7
|
oafuncs/oa_help.py,sha256=_4AZgRDq5Or0vauNvq5IDDHIBoBfdOQtzak-mG1wwAw,4537
|
8
|
-
oafuncs/oa_nc.py,sha256=
|
8
|
+
oafuncs/oa_nc.py,sha256=UUXnBg2cO5XiJ8w0jNqCZJg83FVKqxlEHxOJG5o08Z8,15201
|
9
9
|
oafuncs/oa_python.py,sha256=NkopwkYFGSEuVljnTBvXCl6o2CeyRNBqRXSsUl3euEE,5192
|
10
|
-
oafuncs/oa_tool.py,sha256=
|
10
|
+
oafuncs/oa_tool.py,sha256=QBjJh3pf54yXVuOmu97rW6Tsr6uNMyZ5KqZbR4VQFTc,8628
|
11
11
|
oafuncs/_data/hycom.png,sha256=MadKs6Gyj5n9-TOu7L4atQfTXtF9dvN9w-tdU9IfygI,10945710
|
12
12
|
oafuncs/_data/oafuncs.png,sha256=o3VD7wm-kwDea5E98JqxXl04_78cBX7VcdUt7uQXGiU,3679898
|
13
13
|
oafuncs/_script/cprogressbar.py,sha256=UIgGcLFs-6IgWlITuBLaQqrpt4OAK3Mst5RlCiNfZdQ,15772
|
14
14
|
oafuncs/_script/data_interp.py,sha256=EiZbt6n5BEaRKcng88UgX7TFPhKE6TLVZniS01awXjg,5146
|
15
15
|
oafuncs/_script/data_interp_geo.py,sha256=ZRFb3fKRiYQViZNHd19eW20C9i38BsiIU8w0fG5mbqM,7789
|
16
16
|
oafuncs/_script/email.py,sha256=lL4HGKrr524-g0xLlgs-4u7x4-u7DtgNoD9AL8XJKj4,3058
|
17
|
-
oafuncs/_script/netcdf_merge.py,sha256=
|
17
|
+
oafuncs/_script/netcdf_merge.py,sha256=tM9ePqLiEsE7eIsNM5XjEYeXwxjYOdNz5ejnEuI7xKw,6066
|
18
18
|
oafuncs/_script/netcdf_modify.py,sha256=sGRUYNhfGgf9JV70rnBzw3bzuTRSXzBTL_RMDnDPeLQ,4552
|
19
|
-
oafuncs/_script/netcdf_write.py,sha256=
|
19
|
+
oafuncs/_script/netcdf_write.py,sha256=GvyUyUhzMonzSp3y4pT8ZAfbQrsh5J3dLnmINYJKhuE,21422
|
20
20
|
oafuncs/_script/parallel.py,sha256=07-BJVHxXJNlrOrhrSGt7qCZiKWq6dBvNDBA1AANYnI,8861
|
21
21
|
oafuncs/_script/parallel_test.py,sha256=0GBqZOX7IaCOKF2t1y8N8YYu53GJ33OkfsWgpvZNqM4,372
|
22
22
|
oafuncs/_script/plot_dataset.py,sha256=zkSEnO_-biyagorwWXPoihts_cwuvripzEt-l9bHJ2E,13989
|
23
23
|
oafuncs/_script/replace_file_content.py,sha256=eCFZjnZcwyRvy6b4mmIfBna-kylSZTyJRfgXd6DdCjk,5982
|
24
24
|
oafuncs/oa_down/User_Agent-list.txt,sha256=pHaMlElMvZ8TG4vf4BqkZYKqe0JIGkr4kCN0lM1Y9FQ,514295
|
25
25
|
oafuncs/oa_down/__init__.py,sha256=kRX5eTUCbAiz3zTaQM1501paOYS_3fizDN4Pa0mtNUA,585
|
26
|
-
oafuncs/oa_down/hycom_3hourly.py,sha256=
|
26
|
+
oafuncs/oa_down/hycom_3hourly.py,sha256=R5fKfIcpNRuaQgPiov_hJRd8voWgAHVLWifAMzR6RQI,55075
|
27
27
|
oafuncs/oa_down/hycom_3hourly_proxy.py,sha256=1eaoJGI_m-7w4ZZ3n7NGxkZaeFdsm0d3U-hyw8RFNbc,54563
|
28
28
|
oafuncs/oa_down/idm.py,sha256=4z5IvgfTyIKEI1kOtqXZwN7Jnfjwp6qDBOIoVyOLp0I,1823
|
29
29
|
oafuncs/oa_down/literature.py,sha256=2bF9gSKQbzcci9LcKE81j8JEjIJwON7jbwQB3gDDA3E,11331
|
@@ -39,8 +39,8 @@ oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,5
|
|
39
39
|
oafuncs/oa_sign/meteorological.py,sha256=8091SHo2L8kl4dCFmmSH5NGVHDku5i5lSiLEG5DLnOQ,6489
|
40
40
|
oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
|
41
41
|
oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
|
42
|
-
oafuncs-0.0.98.
|
43
|
-
oafuncs-0.0.98.
|
44
|
-
oafuncs-0.0.98.
|
45
|
-
oafuncs-0.0.98.
|
46
|
-
oafuncs-0.0.98.
|
42
|
+
oafuncs-0.0.98.22.dist-info/licenses/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
|
43
|
+
oafuncs-0.0.98.22.dist-info/METADATA,sha256=ctJ9aAoY3RztAP6gD2STCFB0ZZaCbXQV8SufCLMGkbM,4273
|
44
|
+
oafuncs-0.0.98.22.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
45
|
+
oafuncs-0.0.98.22.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
|
46
|
+
oafuncs-0.0.98.22.dist-info/RECORD,,
|
File without changes
|
File without changes
|