oafuncs 0.0.98.41__tar.gz → 0.0.98.42__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oafuncs-0.0.98.41/oafuncs.egg-info → oafuncs-0.0.98.42}/PKG-INFO +1 -1
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_script/netcdf_write.py +24 -77
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_data.py +80 -2
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42/oafuncs.egg-info}/PKG-INFO +1 -1
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/setup.py +1 -1
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/LICENSE.txt +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/MANIFEST.in +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/README.md +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/__init__.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_data/hycom.png +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_data/oafuncs.png +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_script/cprogressbar.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_script/data_interp.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_script/email.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_script/netcdf_merge.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_script/netcdf_modify.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_script/parallel.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_script/parallel_bak.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_script/plot_dataset.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/_script/replace_file_content.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_cmap.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_date.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_down/User_Agent-list.txt +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_down/__init__.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_down/hycom_3hourly.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_down/idm.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_down/literature.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_down/read_proxy.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_down/test_ua.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_down/user_agent.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_draw.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_file.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_help.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_model/__init__.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_model/roms/__init__.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_model/roms/test.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_model/wrf/__init__.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_model/wrf/little_r.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_nc.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_python.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_sign/__init__.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_sign/meteorological.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_sign/ocean.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_sign/scientific.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs/oa_tool.py +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs.egg-info/SOURCES.txt +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs.egg-info/dependency_links.txt +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs.egg-info/requires.txt +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/oafuncs.egg-info/top_level.txt +0 -0
- {oafuncs-0.0.98.41 → oafuncs-0.0.98.42}/setup.cfg +0 -0
@@ -81,9 +81,13 @@ def _calculate_scale_and_offset(data, dtype="int32"):
|
|
81
81
|
if dtype == "int32":
|
82
82
|
n = 32
|
83
83
|
fill_value = np.iinfo(np.int32).min # -2147483648
|
84
|
+
max_packed_value = np.iinfo(np.int32).max # 2147483647
|
85
|
+
min_packed_value = np.iinfo(np.int32).min + 1 # -2147483647 (保留最小值作为填充值)
|
84
86
|
elif dtype == "int16":
|
85
87
|
n = 16
|
86
88
|
fill_value = np.iinfo(np.int16).min # -32768
|
89
|
+
max_packed_value = np.iinfo(np.int16).max # 32767
|
90
|
+
min_packed_value = np.iinfo(np.int16).min + 1 # -32767 (保留最小值作为填充值)
|
87
91
|
else:
|
88
92
|
raise ValueError("Unsupported dtype. Supported types are 'int16' and 'int32'.")
|
89
93
|
|
@@ -93,8 +97,15 @@ def _calculate_scale_and_offset(data, dtype="int32"):
|
|
93
97
|
valid_mask &= ~data.mask
|
94
98
|
|
95
99
|
if np.any(valid_mask):
|
96
|
-
data_min = np.min(data[valid_mask])
|
97
|
-
data_max = np.max(data[valid_mask])
|
100
|
+
data_min = np.min(data[valid_mask])
|
101
|
+
data_max = np.max(data[valid_mask])
|
102
|
+
|
103
|
+
# 添加一个小的缓冲以确保所有值都在范围内,但不要过大
|
104
|
+
data_range = data_max - data_min
|
105
|
+
if data_range > 0:
|
106
|
+
buffer = data_range * 1e-6 # 使用相对缓冲而不是绝对值1
|
107
|
+
data_min -= buffer
|
108
|
+
data_max += buffer
|
98
109
|
else:
|
99
110
|
data_min, data_max = 0, 1
|
100
111
|
|
@@ -103,7 +114,9 @@ def _calculate_scale_and_offset(data, dtype="int32"):
|
|
103
114
|
scale_factor = 1.0
|
104
115
|
add_offset = data_min
|
105
116
|
else:
|
106
|
-
scale_factor
|
117
|
+
# 使用可用的打包值范围计算scale_factor
|
118
|
+
packed_range = max_packed_value - min_packed_value
|
119
|
+
scale_factor = (data_max - data_min) / packed_range
|
107
120
|
add_offset = (data_max + data_min) / 2.0
|
108
121
|
return scale_factor, add_offset
|
109
122
|
|
@@ -141,14 +154,18 @@ def _data_to_scale_offset(data, scale, offset, dtype='int32'):
|
|
141
154
|
# 只有掩码标记的区域视为无效
|
142
155
|
valid_mask &= ~data.mask
|
143
156
|
|
144
|
-
|
157
|
+
# 初始化结果数组为填充值
|
158
|
+
result = np.full_like(data, fill_value, dtype=np_dtype)
|
159
|
+
|
145
160
|
if np.any(valid_mask):
|
146
|
-
#
|
161
|
+
# 标准的scale/offset转换公式:packed_value = (unpacked_value - add_offset) / scale_factor
|
147
162
|
scaled = (data[valid_mask] - offset) / scale
|
163
|
+
# 四舍五入到最近的整数
|
148
164
|
scaled = np.round(scaled).astype(np_dtype)
|
149
|
-
# clip
|
150
|
-
scaled = np.clip(scaled, clip_min, clip_max) #
|
165
|
+
# clip到整数范围,保留最大范围供转换
|
166
|
+
scaled = np.clip(scaled, clip_min, clip_max) # 不使用最小值,保留做 _FillValue
|
151
167
|
result[valid_mask] = scaled
|
168
|
+
|
152
169
|
return result
|
153
170
|
|
154
171
|
|
@@ -374,76 +391,6 @@ def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='i
|
|
374
391
|
raise RuntimeError(f"netCDF4 保存失败: {str(e)}") from e
|
375
392
|
|
376
393
|
|
377
|
-
def _compress_netcdf(src_path, dst_path=None, tolerance=1e-10, preserve_mask_values=True):
|
378
|
-
"""
|
379
|
-
压缩 NetCDF 文件,使用 scale_factor/add_offset 压缩数据。
|
380
|
-
若 dst_path 省略,则自动生成新文件名,写出后删除原文件并将新文件改回原名。
|
381
|
-
压缩后验证数据是否失真。
|
382
|
-
|
383
|
-
参数:
|
384
|
-
- src_path: 原始 NetCDF 文件路径
|
385
|
-
- dst_path: 压缩后的文件路径(可选)
|
386
|
-
- tolerance: 数据验证的允许误差范围(默认 1e-10)
|
387
|
-
- preserve_mask_values: 是否保留掩码区域的原始值(True)或将其替换为缺省值(False)
|
388
|
-
"""
|
389
|
-
# 判断是否要替换原文件
|
390
|
-
delete_orig = dst_path is None
|
391
|
-
if delete_orig:
|
392
|
-
dst_path = src_path.replace(".nc", "_compress.nc")
|
393
|
-
# 打开原始文件并保存压缩文件
|
394
|
-
ds = xr.open_dataset(src_path)
|
395
|
-
save_to_nc(dst_path, ds, convert_dtype='int32',scale_offset_switch=True, compile_switch=True, preserve_mask_values=preserve_mask_values)
|
396
|
-
ds.close()
|
397
|
-
|
398
|
-
# 验证压缩后的数据是否失真
|
399
|
-
original_ds = xr.open_dataset(src_path)
|
400
|
-
compressed_ds = xr.open_dataset(dst_path)
|
401
|
-
# 更详细地验证数据
|
402
|
-
for var in original_ds.data_vars:
|
403
|
-
original_data = original_ds[var].values
|
404
|
-
compressed_data = compressed_ds[var].values
|
405
|
-
# 跳过非数值类型变量
|
406
|
-
if not np.issubdtype(original_data.dtype, np.number):
|
407
|
-
continue
|
408
|
-
# 获取掩码(如果存在)
|
409
|
-
original_mask = None
|
410
|
-
if hasattr(original_data, "mask") and np.ma.is_masked(original_data): # 修正:确保是有效的掩码数组
|
411
|
-
original_mask = original_data.mask.copy()
|
412
|
-
# 检查有效数据是否在允许误差范围内
|
413
|
-
valid_mask = np.isfinite(original_data)
|
414
|
-
if original_mask is not None:
|
415
|
-
valid_mask &= ~original_mask
|
416
|
-
if np.any(valid_mask):
|
417
|
-
if np.issubdtype(original_data.dtype, np.floating):
|
418
|
-
diff = np.abs(original_data[valid_mask] - compressed_data[valid_mask])
|
419
|
-
max_diff = np.max(diff)
|
420
|
-
if max_diff > tolerance:
|
421
|
-
print(f"警告: 变量 {var} 的压缩误差 {max_diff} 超出容许范围 {tolerance}")
|
422
|
-
if max_diff > tolerance * 10: # 严重偏差时抛出错误
|
423
|
-
raise ValueError(f"变量 {var} 的数据在压缩后严重失真 (max_diff={max_diff})")
|
424
|
-
elif np.issubdtype(original_data.dtype, np.integer):
|
425
|
-
# 整数类型应该完全相等
|
426
|
-
if not np.array_equal(original_data[valid_mask], compressed_data[valid_mask]):
|
427
|
-
raise ValueError(f"变量 {var} 的整数数据在压缩后不一致")
|
428
|
-
# 如果需要保留掩码区域值,检查掩码区域的值
|
429
|
-
if preserve_mask_values and original_mask is not None and np.any(original_mask):
|
430
|
-
# 确保掩码区域的原始值被正确保留
|
431
|
-
# 修正:掩码数组可能存在数据类型不匹配问题,添加安全检查
|
432
|
-
try:
|
433
|
-
mask_diff = np.abs(original_data[original_mask] - compressed_data[original_mask])
|
434
|
-
if np.any(mask_diff > tolerance):
|
435
|
-
print(f"警告: 变量 {var} 的掩码区域数据在压缩后发生变化")
|
436
|
-
except Exception as e:
|
437
|
-
print(f"警告: 变量 {var} 的掩码区域数据比较失败: {str(e)}")
|
438
|
-
original_ds.close()
|
439
|
-
compressed_ds.close()
|
440
|
-
|
441
|
-
# 替换原文件
|
442
|
-
if delete_orig:
|
443
|
-
os.remove(src_path)
|
444
|
-
os.rename(dst_path, src_path)
|
445
|
-
|
446
|
-
|
447
394
|
# 测试用例
|
448
395
|
if __name__ == "__main__":
|
449
396
|
# 示例文件路径,需根据实际情况修改
|
@@ -1,11 +1,11 @@
|
|
1
|
-
from typing import Any, List, Union
|
1
|
+
from typing import Any, List, Union, Literal
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
import xarray as xr
|
5
5
|
from rich import print
|
6
6
|
|
7
7
|
|
8
|
-
__all__ = ["interp_along_dim", "interp_2d", "ensure_list", "mask_shapefile"]
|
8
|
+
__all__ = ["interp_along_dim", "interp_2d", "ensure_list", "mask_shapefile", "mask_land_ocean"]
|
9
9
|
|
10
10
|
|
11
11
|
def ensure_list(input_value: Any) -> List[str]:
|
@@ -188,5 +188,83 @@ def mask_shapefile(
|
|
188
188
|
return None
|
189
189
|
|
190
190
|
|
191
|
+
|
192
|
+
def _normalize_lon(lon: np.ndarray) -> np.ndarray:
|
193
|
+
"""将经度转换到 [-180, 180)。"""
|
194
|
+
lon = np.asarray(lon, dtype=float)
|
195
|
+
return np.where(lon >= 180, lon - 360, lon)
|
196
|
+
|
197
|
+
|
198
|
+
def _land_sea_mask(
|
199
|
+
lon: np.ndarray,
|
200
|
+
lat: np.ndarray,
|
201
|
+
keep: Literal["land", "ocean"],
|
202
|
+
) -> np.ndarray:
|
203
|
+
"""
|
204
|
+
根据 1-D 或 2-D 经纬度返回布尔掩膜。
|
205
|
+
True 表示该位置 *保留*,False 表示该位置将被掩掉。
|
206
|
+
"""
|
207
|
+
from global_land_mask import globe
|
208
|
+
|
209
|
+
lon = _normalize_lon(lon)
|
210
|
+
lat = np.asarray(lat, dtype=float)
|
211
|
+
|
212
|
+
# 如果输入是 1-D,则网格化;2-D 则直接使用
|
213
|
+
if lon.ndim == 1 and lat.ndim == 1:
|
214
|
+
lon_2d, lat_2d = np.meshgrid(lon, lat)
|
215
|
+
elif lon.ndim == 2 and lat.ndim == 2:
|
216
|
+
lon_2d, lat_2d = lon, lat
|
217
|
+
else:
|
218
|
+
raise ValueError("经纬度必须是同维度的 1-D 或 2-D 数组")
|
219
|
+
|
220
|
+
is_ocean = globe.is_ocean(lat_2d, lon_2d)
|
221
|
+
|
222
|
+
if keep == "land":
|
223
|
+
mask = ~is_ocean
|
224
|
+
elif keep == "ocean":
|
225
|
+
mask = is_ocean
|
226
|
+
else:
|
227
|
+
raise ValueError("keep 只能是 'land' 或 'ocean'")
|
228
|
+
|
229
|
+
return mask
|
230
|
+
|
231
|
+
|
232
|
+
def mask_land_ocean(
|
233
|
+
data: xr.DataArray | xr.Dataset,
|
234
|
+
lon: np.ndarray,
|
235
|
+
lat: np.ndarray,
|
236
|
+
*, # 强制关键字参数
|
237
|
+
keep: Literal["land", "ocean"] = "land",
|
238
|
+
) -> xr.DataArray | xr.Dataset:
|
239
|
+
"""
|
240
|
+
根据海陆分布掩膜 xarray 对象。
|
241
|
+
|
242
|
+
Parameters
|
243
|
+
----------
|
244
|
+
data : xr.DataArray 或 xr.Dataset
|
245
|
+
至少包含 'lat' 和 'lon' 维度/坐标的数组。
|
246
|
+
lon : array_like
|
247
|
+
经度,可以是 1-D 或 2-D。
|
248
|
+
lat : array_like
|
249
|
+
纬度,可以是 1-D 或 2-D。
|
250
|
+
keep : {'land', 'ocean'}, optional
|
251
|
+
指定要保留的部分,默认为 'land'。
|
252
|
+
|
253
|
+
Returns
|
254
|
+
-------
|
255
|
+
掩膜后的 xr.DataArray / xr.Dataset
|
256
|
+
"""
|
257
|
+
mask = _land_sea_mask(lon, lat, keep)
|
258
|
+
|
259
|
+
# 用 apply_ufunc 自动对齐并广播掩膜
|
260
|
+
return xr.apply_ufunc(
|
261
|
+
lambda x, m: x.where(m),
|
262
|
+
data,
|
263
|
+
xr.DataArray(mask, dims=("lat", "lon")),
|
264
|
+
dask="parallelized",
|
265
|
+
keep_attrs=True,
|
266
|
+
)
|
267
|
+
|
268
|
+
|
191
269
|
if __name__ == "__main__":
|
192
270
|
pass
|
@@ -18,7 +18,7 @@ URL = "https://github.com/Industry-Pays/OAFuncs"
|
|
18
18
|
EMAIL = "liukun0312@stu.ouc.edu.cn"
|
19
19
|
AUTHOR = "Kun Liu"
|
20
20
|
REQUIRES_PYTHON = ">=3.10.0" # 2025/03/13
|
21
|
-
VERSION = "0.0.98.
|
21
|
+
VERSION = "0.0.98.42"
|
22
22
|
|
23
23
|
# What packages are required for this module to be executed?
|
24
24
|
REQUIRED = [
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|