oafuncs 0.0.98.43__py3-none-any.whl → 0.0.98.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
oafuncs/__init__.py CHANGED
@@ -40,4 +40,6 @@ from .oa_tool import *
40
40
  # from ._script import *
41
41
  # ------------------- 2025-03-16 15:56:01 -------------------
42
42
  from .oa_date import *
43
- # ------------------- 2025-03-27 16:56:57 -------------------
43
+ # ------------------- 2025-03-27 16:56:57 -------------------
44
+ from .oa_geo import *
45
+ # ------------------- 2025-09-04 14:08:26 -------------------
@@ -1,55 +1,28 @@
1
1
  import os
2
2
  import warnings
3
-
4
- import netCDF4 as nc
5
3
  import numpy as np
6
4
  import xarray as xr
5
+ import netCDF4 as nc
7
6
 
8
7
  warnings.filterwarnings("ignore", category=RuntimeWarning)
9
8
 
10
-
11
-
12
- def _nan_to_fillvalue(ncfile,set_fill_value):
9
+ def _get_dtype_info(dtype):
13
10
  """
14
- NetCDF 文件中所有变量的 NaN 和掩码值替换为其 _FillValue 属性(若无则自动添加 _FillValue=-32767 并替换)。
15
- 同时处理掩码数组中的无效值。
16
- 仅对数值型变量(浮点型、整型)生效。
11
+ 根据输入的 dtype 返回其 numpy_type, clip_min, clip_max。
12
+ 支持 int8, int16, int32, int64 四种整数类型。
13
+ 简化处理:不使用fill_value,所有特殊值统一为NaN。
14
+ 使用完整的数据类型范围,不预留填充值空间。
17
15
  """
18
- with nc.Dataset(ncfile, "r+") as ds:
19
- for var_name in ds.variables:
20
- var = ds.variables[var_name]
21
- # 只处理数值类型变量 (f:浮点型, i:有符号整型, u:无符号整型)
22
- if var.dtype.kind not in ["f", "i", "u"]:
23
- continue
24
-
25
- # 读取数据
26
- arr = var[:]
27
-
28
- # 确定填充值
29
- if "_FillValue" in var.ncattrs():
30
- fill_value = var.getncattr("_FillValue")
31
- elif hasattr(var, "missing_value"):
32
- fill_value = var.getncattr("missing_value")
33
- else:
34
- fill_value = set_fill_value
35
- try:
36
- var.setncattr("_FillValue", fill_value)
37
- except Exception:
38
- # 某些变量可能不允许动态添加 _FillValue
39
- continue
40
-
41
- # 处理掩码数组
42
- if hasattr(arr, "mask"):
43
- # 如果是掩码数组,将掩码位置的值设为 fill_value
44
- if np.any(arr.mask):
45
- arr = np.where(arr.mask, fill_value, arr.data if hasattr(arr, "data") else arr)
46
-
47
- # 处理剩余 NaN 和无穷值
48
- if arr.dtype.kind in ["f", "i", "u"] and np.any(~np.isfinite(arr)):
49
- arr = np.nan_to_num(arr, nan=fill_value, posinf=fill_value, neginf=fill_value)
50
-
51
- # 写回变量
52
- var[:] = arr
16
+ dtype_map = {
17
+ "int8": (np.int8, np.iinfo(np.int8).min, np.iinfo(np.int8).max),
18
+ "int16": (np.int16, np.iinfo(np.int16).min, np.iinfo(np.int16).max),
19
+ "int32": (np.int32, np.iinfo(np.int32).min, np.iinfo(np.int32).max),
20
+ "int64": (np.int64, np.iinfo(np.int64).min, np.iinfo(np.int64).max),
21
+ }
22
+ if dtype not in dtype_map:
23
+ raise ValueError(f"Unsupported dtype: {dtype}. Supported types are 'int8', 'int16', 'int32', and 'int64'.")
24
+
25
+ return dtype_map[dtype]
53
26
 
54
27
 
55
28
  def _numpy_to_nc_type(numpy_type):
@@ -72,92 +45,89 @@ def _numpy_to_nc_type(numpy_type):
72
45
 
73
46
  def _calculate_scale_and_offset(data, dtype="int32"):
74
47
  """
75
- 只对有效数据(非NaN、非填充值、非自定义缺失值)计算scale_factor和add_offset。
76
- 使用 int32 类型,n=32
48
+ 只对有效数据(非NaN、非无穷值、非自定义缺失值)计算scale_factor和add_offset。
49
+ 为填充值保留最小值位置,有效数据范围为 [clip_min+1, clip_max]。
77
50
  """
78
51
  if not isinstance(data, np.ndarray):
79
52
  raise ValueError("Input data must be a NumPy array.")
80
-
81
- if dtype == "int32":
82
- n = 32
83
- fill_value = np.iinfo(np.int32).min # -2147483648
84
- elif dtype == "int16":
85
- n = 16
86
- fill_value = np.iinfo(np.int16).min # -32768
87
- else:
88
- raise ValueError("Unsupported dtype. Supported types are 'int16' and 'int32'.")
89
53
 
90
- # 有效掩码:非NaN、非inf、非fill_value
91
- valid_mask = np.isfinite(data) & (data != fill_value)
54
+ np_dtype, clip_min, clip_max = _get_dtype_info(dtype)
55
+
56
+ # 创建有效数据掩码,只排除NaN和无穷值
57
+ valid_mask = np.isfinite(data)
92
58
  if hasattr(data, "mask") and np.ma.is_masked(data):
93
59
  valid_mask &= ~data.mask
94
60
 
95
- if np.any(valid_mask):
96
- data_min = np.min(data[valid_mask])-1
97
- data_max = np.max(data[valid_mask])+1
98
- else:
99
- data_min, data_max = 0, 1
61
+ # 如果没有有效数据,返回默认值
62
+ if not np.any(valid_mask):
63
+ return 1.0, 0.0
64
+
65
+ # 基于有效数据计算最小值和最大值
66
+ data_min = np.min(data[valid_mask])
67
+ data_max = np.max(data[valid_mask])
100
68
 
101
- # 防止scale为0,且保证scale/offset不会影响缺省值
69
+ # 防止 scale 0
102
70
  if data_max == data_min:
103
71
  scale_factor = 1.0
104
72
  add_offset = data_min
105
73
  else:
106
- scale_factor = (data_max - data_min) / (2**n - 2)
74
+ # 使用数据中心点作为offset
107
75
  add_offset = (data_max + data_min) / 2.0
76
+
77
+ # 计算数据范围相对于中心点的最大偏移
78
+ max_deviation = max(abs(data_max - add_offset), abs(data_min - add_offset))
79
+
80
+ # 可用的整数范围(为填充值保留最小值)
81
+ available_range = min(abs(clip_min + 1), abs(clip_max))
82
+ scale_factor = max_deviation / available_range
83
+
108
84
  return scale_factor, add_offset
109
85
 
110
86
 
111
- def _data_to_scale_offset(data, scale, offset, dtype='int32'):
87
+ def _data_to_scale_offset(data, scale, offset, dtype="int32"):
112
88
  """
113
- 只对有效数据做缩放,NaN/inf/填充值直接赋为fill_value。
114
- 掩码区域的值会被保留并进行缩放,除非掩码本身标记为无效。
115
- 使用 int32 类型
89
+ 将数据应用 scale 和 offset 转换,转换为整型以实现压缩。
90
+ NaN、inf 和掩码值将被转换为指定数据类型的最小值作为填充值。
91
+
92
+ 转换公式:scaled_value = (original_value - add_offset) / scale_factor
93
+ 返回整型数组,用最小值表示无效数据
116
94
  """
117
95
  if not isinstance(data, np.ndarray):
118
96
  raise ValueError("Input data must be a NumPy array.")
119
-
120
- if dtype == "int32":
121
- # n = 32
122
- np_dtype = np.int32
123
- fill_value = np.iinfo(np.int32).min # -2147483648
124
- clip_min = np.iinfo(np.int32).min + 1 # -2147483647
125
- clip_max = np.iinfo(np.int32).max # 2147483647
126
- elif dtype == "int16":
127
- # n = 16
128
- np_dtype = np.int16
129
- fill_value = np.iinfo(np.int16).min # -32768
130
- clip_min = np.iinfo(np.int16).min + 1 # -32767
131
- clip_max = np.iinfo(np.int16).max # 32767
132
- else:
133
- raise ValueError("Unsupported dtype. Supported types are 'int16' and 'int32'.")
134
97
 
135
- # 创建掩码,只排除 NaN/inf 和显式的填充值
98
+ np_dtype, clip_min, clip_max = _get_dtype_info(dtype)
99
+ fill_value = clip_min # 使用数据类型的最小值作为填充值
100
+
101
+ # 创建输出数组,初始化为填充值
102
+ result = np.full(data.shape, fill_value, dtype=np_dtype)
103
+
104
+ # 只对有限值进行转换
136
105
  valid_mask = np.isfinite(data)
137
- valid_mask &= data != fill_value
138
-
139
- # 如果数据有掩码属性,还需考虑掩码
106
+
107
+ # 对于掩码数组,排除掩码区域
140
108
  if hasattr(data, "mask") and np.ma.is_masked(data):
141
- # 只有掩码标记的区域视为无效
142
109
  valid_mask &= ~data.mask
143
-
144
- result = data.copy()
110
+
145
111
  if np.any(valid_mask):
146
- # 反向映射时能还原原始值
112
+ # 进行scale/offset转换
147
113
  scaled = (data[valid_mask] - offset) / scale
148
- scaled = np.round(scaled).astype(np_dtype)
149
- # clip到int32范围,保留最大范围供转换
150
- scaled = np.clip(scaled, clip_min, clip_max) # 不使用 -2147483648,保留做 _FillValue
151
- result[valid_mask] = scaled
152
- return result
114
+ # 四舍五入并转换为目标整型,同时确保在有效范围内
115
+ scaled_int = np.round(scaled).astype(np_dtype)
116
+ # 由于我们使用了最小值作为填充值,所以有效数据范围是 [clip_min+1, clip_max]
117
+ scaled_int = np.clip(scaled_int, clip_min + 1, clip_max)
118
+ result[valid_mask] = scaled_int
119
+
120
+ return result, fill_value
153
121
 
154
122
 
155
- def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='int32',scale_offset_switch=True, compile_switch=True, preserve_mask_values=True):
123
+ def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='int16', scale_offset_switch=True, compile_switch=True, preserve_mask_values=True, missing_value=None):
156
124
  """
157
125
  保存数据到 NetCDF 文件,支持 xarray 对象(DataArray 或 Dataset)和 numpy 数组。
158
126
 
159
- 仅对数据变量中数值型数据进行压缩转换(利用 scale_factor/add_offset 转换后转为 int32),
127
+ 仅对数据变量中数值型数据进行压缩转换(利用 scale_factor/add_offset 转换后转为指定整数类型),
160
128
  非数值型数据以及所有坐标变量将禁用任何压缩,直接保存原始数据。
129
+
130
+ 简化处理:所有特殊值(missing_value、掩码、无穷值等)统一转换为NaN处理。
161
131
 
162
132
  参数:
163
133
  - file: 保存文件的路径
@@ -165,72 +135,80 @@ def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='i
165
135
  - varname: 变量名(仅适用于传入 numpy 数组或 DataArray 时)
166
136
  - coords: 坐标字典(numpy 数组分支时使用),所有坐标变量均不压缩
167
137
  - mode: "w"(覆盖)或 "a"(追加)
168
- - convert_dtype: 转换为的数值类型("int16" "int32"),默认为 "int32"
138
+ - convert_dtype: 转换为的数值类型("int8", "int16", "int32", "int64"),默认为 "int32"
169
139
  - scale_offset_switch: 是否对数值型数据变量进行压缩转换
170
140
  - compile_switch: 是否启用 NetCDF4 的 zlib 压缩(仅针对数值型数据有效)
171
- - missing_value: 自定义缺失值,将被替换为 fill_value
172
141
  - preserve_mask_values: 是否保留掩码区域的原始值(True)或将其替换为缺省值(False)
142
+ - missing_value: 自定义缺失值,将被替换为 NaN
173
143
  """
174
- if convert_dtype not in ["int16", "int32"]:
144
+ if convert_dtype not in ["int8", "int16", "int32", "int64"]:
175
145
  convert_dtype = "int32"
176
146
  nc_dtype = _numpy_to_nc_type(convert_dtype)
177
- # fill_value = np.iinfo(np.convert_dtype).min # -2147483648 或 -32768
178
- # fill_value = np.iinfo(eval('np.' + convert_dtype)).min # -2147483648 或 -32768
179
- np_dtype = getattr(np, convert_dtype) # 更安全的类型获取方式
180
- fill_value = np.iinfo(np_dtype).min
147
+
181
148
  # ----------------------------------------------------------------------------
182
- # 处理 xarray 对象(DataArray 或 Dataset)的情况
149
+ # 处理 xarray 对象(DataArray 或 Dataset
183
150
  if isinstance(data, (xr.DataArray, xr.Dataset)):
184
151
  encoding = {}
185
-
186
152
  if isinstance(data, xr.DataArray):
187
153
  if data.name is None:
188
154
  data = data.rename("data")
189
155
  varname = data.name if varname is None else varname
190
156
  arr = np.array(data.values)
191
- try:
192
- data_missing_val = data.attrs.get("missing_value")
193
- except AttributeError:
194
- data_missing_val = data.attrs.get("_FillValue", None)
195
- # 只对有效数据计算scale/offset
196
- valid_mask = np.ones(arr.shape, dtype=bool) # 默认所有值都有效
197
- if arr.dtype.kind in ["f", "i", "u"]: # 仅对数值数据应用isfinite
157
+ data_missing_val = data.attrs.get("missing_value", None)
158
+
159
+ valid_mask = np.ones(arr.shape, dtype=bool)
160
+ if arr.dtype.kind in ["f", "i", "u"]:
198
161
  valid_mask = np.isfinite(arr)
199
162
  if data_missing_val is not None:
200
163
  valid_mask &= arr != data_missing_val
201
164
  if hasattr(arr, "mask"):
202
- valid_mask &= ~getattr(arr, "mask", False)
165
+ valid_mask &= ~arr.mask
166
+
203
167
  if np.issubdtype(arr.dtype, np.number) and scale_offset_switch:
168
+ # 确保有有效数据用于计算scale/offset
169
+ if not np.any(valid_mask):
170
+ # 如果没有有效数据,不进行压缩转换
171
+ for k in ["_FillValue", "missing_value"]:
172
+ if k in data.attrs:
173
+ del data.attrs[k]
174
+ data.to_dataset(name=varname).to_netcdf(file, mode=mode)
175
+ return
176
+
204
177
  arr_valid = arr[valid_mask]
205
178
  scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
206
- # 写入前处理无效值(只在这里做!)
179
+
180
+ # 创建需要转换的数据副本,但不修改特殊值
207
181
  arr_to_save = arr.copy()
208
- # 处理自定义缺失值
182
+
183
+ # 只处理自定义缺失值,转换为NaN(让后面统一处理)
209
184
  if data_missing_val is not None:
210
- arr_to_save[arr == data_missing_val] = fill_value
211
- # 处理 NaN/inf
212
- arr_to_save[~np.isfinite(arr_to_save)] = fill_value
213
- new_values = _data_to_scale_offset(arr_to_save, scale, offset)
185
+ arr_to_save[arr == data_missing_val] = np.nan
186
+
187
+ # 进行压缩转换(_data_to_scale_offset会正确处理NaN和掩码)
188
+ new_values, fill_value = _data_to_scale_offset(arr_to_save, scale, offset, convert_dtype)
214
189
  new_da = data.copy(data=new_values)
215
- # 移除 _FillValue 和 missing_value 属性
190
+
191
+ # 清除原有的填充值属性,设置新的压缩属性
216
192
  for k in ["_FillValue", "missing_value"]:
217
193
  if k in new_da.attrs:
218
194
  del new_da.attrs[k]
195
+
219
196
  new_da.attrs["scale_factor"] = float(scale)
220
197
  new_da.attrs["add_offset"] = float(offset)
198
+
221
199
  encoding[varname] = {
222
200
  "zlib": compile_switch,
223
201
  "complevel": 4,
224
202
  "dtype": nc_dtype,
225
- # "_FillValue": -2147483648,
203
+ "_FillValue": fill_value, # 使用计算出的填充值
226
204
  }
227
205
  new_da.to_dataset(name=varname).to_netcdf(file, mode=mode, encoding=encoding)
228
206
  else:
207
+ # 对于非数值数据或不压缩的情况,移除填充值属性防止冲突
229
208
  for k in ["_FillValue", "missing_value"]:
230
209
  if k in data.attrs:
231
210
  del data.attrs[k]
232
211
  data.to_dataset(name=varname).to_netcdf(file, mode=mode)
233
- _nan_to_fillvalue(file, fill_value)
234
212
  return
235
213
 
236
214
  else: # Dataset 情况
@@ -239,19 +217,16 @@ def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='i
239
217
  for var in data.data_vars:
240
218
  da = data[var]
241
219
  arr = np.array(da.values)
242
- try:
243
- data_missing_val = da.attrs.get("missing_value")
244
- except AttributeError:
245
- data_missing_val = da.attrs.get("_FillValue", None)
246
- valid_mask = np.ones(arr.shape, dtype=bool) # 默认所有值都有效
247
- if arr.dtype.kind in ["f", "i", "u"]: # 仅对数值数据应用isfinite
220
+ data_missing_val = da.attrs.get("missing_value", None)
221
+
222
+ valid_mask = np.ones(arr.shape, dtype=bool)
223
+ if arr.dtype.kind in ["f", "i", "u"]:
248
224
  valid_mask = np.isfinite(arr)
249
225
  if data_missing_val is not None:
250
226
  valid_mask &= arr != data_missing_val
251
227
  if hasattr(arr, "mask"):
252
- valid_mask &= ~getattr(arr, "mask", False)
228
+ valid_mask &= ~arr.mask
253
229
 
254
- # 创建属性的副本以避免修改原始数据集
255
230
  attrs = da.attrs.copy()
256
231
  for k in ["_FillValue", "missing_value"]:
257
232
  if k in attrs:
@@ -268,23 +243,21 @@ def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='i
268
243
  scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
269
244
  arr_to_save = arr.copy()
270
245
 
271
- # 使用与DataArray相同的逻辑,使用_data_to_scale_offset处理数据
272
- # 处理自定义缺失值
246
+ # 只处理自定义缺失值,转换为NaN(让后面统一处理)
273
247
  if data_missing_val is not None:
274
- arr_to_save[arr == data_missing_val] = fill_value
275
- # 处理 NaN/inf
276
- arr_to_save[~np.isfinite(arr_to_save)] = fill_value
277
- new_values = _data_to_scale_offset(arr_to_save, scale, offset)
248
+ arr_to_save[arr == data_missing_val] = np.nan
249
+
250
+ # 进行压缩转换(_data_to_scale_offset会正确处理NaN和掩码)
251
+ new_values, fill_value = _data_to_scale_offset(arr_to_save, scale, offset, convert_dtype)
278
252
  new_da = xr.DataArray(new_values, dims=da.dims, coords=da.coords, attrs=attrs)
279
253
  new_da.attrs["scale_factor"] = float(scale)
280
254
  new_da.attrs["add_offset"] = float(offset)
281
- # 不设置_FillValue属性,改为使用missing_value
282
- # new_da.attrs["missing_value"] = -2147483648
283
255
  new_vars[var] = new_da
284
256
  encoding[var] = {
285
257
  "zlib": compile_switch,
286
258
  "complevel": 4,
287
259
  "dtype": nc_dtype,
260
+ "_FillValue": fill_value, # 使用计算出的填充值
288
261
  }
289
262
  else:
290
263
  new_vars[var] = xr.DataArray(arr, dims=da.dims, coords=da.coords, attrs=attrs)
@@ -292,7 +265,6 @@ def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='i
292
265
  # 确保坐标变量被正确复制
293
266
  new_ds = xr.Dataset(new_vars, coords=data.coords.copy())
294
267
  new_ds.to_netcdf(file, mode=mode, encoding=encoding if encoding else None)
295
- _nan_to_fillvalue(file, fill_value)
296
268
  return
297
269
 
298
270
  # 处理纯 numpy 数组情况
@@ -303,12 +275,12 @@ def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='i
303
275
  data = np.asarray(data)
304
276
  is_numeric = np.issubdtype(data.dtype, np.number)
305
277
 
278
+ # 处理缺失值
306
279
  if hasattr(data, "mask") and np.ma.is_masked(data):
307
280
  # 处理掩码数组,获取缺失值
308
281
  data = data.data
309
- missing_value = getattr(data, "missing_value", None)
310
- else:
311
- missing_value = None
282
+ if missing_value is None:
283
+ missing_value = getattr(data, "missing_value", None)
312
284
 
313
285
  try:
314
286
  with nc.Dataset(file, mode, format="NETCDF4") as ncfile:
@@ -336,45 +308,60 @@ def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='i
336
308
 
337
309
  # 确保有有效数据
338
310
  if not np.any(valid_mask):
339
- # 如果没有有效数据,不进行压缩,直接保存原始数据类型
311
+ # 如果没有有效数据,不进行压缩,直接保存原始数据类型
340
312
  dtype = _numpy_to_nc_type(data.dtype)
341
313
  var = ncfile.createVariable(varname, dtype, dims, zlib=False)
342
- # 确保没有 NaN
343
- clean_data = np.nan_to_num(data, nan=missing_value if missing_value is not None else fill_value)
314
+ # 确保没有 NaN,直接用0替换
315
+ clean_data = np.nan_to_num(data, nan=0.0)
344
316
  var[:] = clean_data
345
- return
346
-
347
- # 计算 scale 和 offset 仅使用有效区域数据
317
+ return # 计算 scale 和 offset 仅使用有效区域数据
348
318
  arr_valid = arr_to_save[valid_mask]
349
319
  scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
350
320
 
351
- # 执行压缩转换
352
- new_data = _data_to_scale_offset(arr_to_save, scale, offset)
321
+ # 只处理自定义缺失值,转换为NaN
322
+ if missing_value is not None:
323
+ arr_to_save[arr == missing_value] = np.nan
324
+
325
+ # 执行压缩转换(_data_to_scale_offset会正确处理NaN和掩码)
326
+ new_data, fill_value = _data_to_scale_offset(arr_to_save, scale, offset, convert_dtype)
353
327
 
354
328
  # 创建变量并设置属性
355
- var = ncfile.createVariable(varname, nc_dtype, dims, zlib=compile_switch)
329
+ var = ncfile.createVariable(varname, nc_dtype, dims, zlib=compile_switch, fill_value=fill_value)
356
330
  var.scale_factor = scale
357
331
  var.add_offset = offset
358
- var._FillValue = fill_value # 明确设置填充值
359
332
  var[:] = new_data
360
333
  else:
334
+ # 非压缩情况,直接保存但要处理特殊值
361
335
  dtype = _numpy_to_nc_type(data.dtype)
336
+
337
+ clean_data = data.copy()
338
+
339
+ # 处理自定义缺失值(转换为NaN)
340
+ if missing_value is not None:
341
+ clean_data[data == missing_value] = np.nan
342
+
343
+ # 对于整数类型,处理NaN和无穷值 - 用0替换
344
+ if not np.issubdtype(data.dtype, np.floating):
345
+ finite_mask = np.isfinite(clean_data)
346
+ if not np.all(finite_mask):
347
+ clean_data = clean_data.astype(float) # 转换为浮点型保持NaN
348
+
349
+ # 处理掩码(统一转换为NaN)
350
+ if hasattr(data, "mask") and np.ma.is_masked(data):
351
+ clean_data[data.mask] = np.nan
352
+
353
+ # 创建变量
362
354
  var = ncfile.createVariable(varname, dtype, dims, zlib=False)
363
- # 确保不写入 NaN
364
- if np.issubdtype(data.dtype, np.floating) and np.any(~np.isfinite(data)):
365
- fill_val = missing_value if missing_value is not None else fill_value
366
- var._FillValue = fill_val
367
- clean_data = np.nan_to_num(data, nan=fill_val)
368
- var[:] = clean_data
369
- else:
370
- var[:] = data
371
- # 最后确保所有 NaN 值被处理
372
- _nan_to_fillvalue(file, fill_value)
355
+ var[:] = clean_data
356
+ # 只对压缩数据调用_nan_to_fillvalue,处理掩码但保持NaN
357
+ if is_numeric and scale_offset_switch:
358
+ pass # 简化策略:不再需要后处理
373
359
  except Exception as e:
374
360
  raise RuntimeError(f"netCDF4 保存失败: {str(e)}") from e
375
361
 
376
362
 
377
363
 
364
+
378
365
  # 测试用例
379
366
  if __name__ == "__main__":
380
367
  # 示例文件路径,需根据实际情况修改
@@ -106,10 +106,10 @@ def replace_direct_content(source_file, target_dir, content_dict, key_value=Fals
106
106
  with open(target_file, "w") as f:
107
107
  f.write(content)
108
108
 
109
- print(f"[green]已将内容替换到新文件:{target_file}[/green]")
109
+ print(f"[green]Content replaced and saved to new file: {target_file}[/green]")
110
110
  return True
111
111
  except Exception as e:
112
- print(f"[red]替换内容时出错:{str(e)}[/red]")
112
+ print(f"[red]Error replacing content: {str(e)}[/red]")
113
113
  return False
114
114
 
115
115
 
oafuncs/oa_data.py CHANGED
@@ -1,11 +1,9 @@
1
- from typing import Any, List, Union, Literal
1
+ from typing import Any, List, Union
2
2
 
3
3
  import numpy as np
4
- import xarray as xr
5
- from rich import print
6
4
 
7
5
 
8
- __all__ = ["interp_along_dim", "interp_2d", "ensure_list", "mask_shapefile", "mask_land_ocean"]
6
+ __all__ = ["interp_along_dim", "interp_2d", "ensure_list"]
9
7
 
10
8
 
11
9
  def ensure_list(input_value: Any) -> List[str]:
@@ -146,125 +144,6 @@ def interp_2d(
146
144
  )
147
145
 
148
146
 
149
- def mask_shapefile(
150
- data_array: np.ndarray,
151
- longitudes: np.ndarray,
152
- latitudes: np.ndarray,
153
- shapefile_path: str,
154
- ) -> Union[xr.DataArray, None]:
155
- """
156
- Mask a 2D data array using a shapefile.
157
-
158
- Args:
159
- data_array (np.ndarray): 2D array of data to be masked.
160
- longitudes (np.ndarray): 1D array of longitudes.
161
- latitudes (np.ndarray): 1D array of latitudes.
162
- shapefile_path (str): Path to the shapefile used for masking.
163
-
164
- Returns:
165
- Union[xr.DataArray, None]: Masked xarray DataArray or None if an error occurs.
166
-
167
- Raises:
168
- FileNotFoundError: If the shapefile does not exist.
169
- ValueError: If the data dimensions do not match the coordinates.
170
-
171
- Examples:
172
- >>> data_array = np.random.rand(10, 10)
173
- >>> longitudes = np.linspace(-180, 180, 10)
174
- >>> latitudes = np.linspace(-90, 90, 10)
175
- >>> shapefile_path = "path/to/shapefile.shp"
176
- >>> masked_data = mask_shapefile(data_array, longitudes, latitudes, shapefile_path)
177
- >>> print(masked_data) # Expected output: Masked DataArray
178
-
179
- """
180
- import salem
181
- try:
182
- shp_f = salem.read_shapefile(shapefile_path)
183
- data_da = xr.DataArray(data_array, coords=[("latitude", latitudes), ("longitude", longitudes)])
184
- masked_data = data_da.salem.roi(shape=shp_f)
185
- return masked_data
186
- except Exception as e:
187
- print(f"[red]An error occurred: {e}[/red]")
188
- return None
189
-
190
-
191
-
192
- def _normalize_lon(lon: np.ndarray) -> np.ndarray:
193
- """将经度转换到 [-180, 180)。"""
194
- lon = np.asarray(lon, dtype=float)
195
- return np.where(lon >= 180, lon - 360, lon)
196
-
197
-
198
- def _land_sea_mask(
199
- lon: np.ndarray,
200
- lat: np.ndarray,
201
- keep: Literal["land", "ocean"],
202
- ) -> np.ndarray:
203
- """
204
- 根据 1-D 或 2-D 经纬度返回布尔掩膜。
205
- True 表示该位置 *保留*,False 表示该位置将被掩掉。
206
- """
207
- from global_land_mask import globe
208
-
209
- lon = _normalize_lon(lon)
210
- lat = np.asarray(lat, dtype=float)
211
-
212
- # 如果输入是 1-D,则网格化;2-D 则直接使用
213
- if lon.ndim == 1 and lat.ndim == 1:
214
- lon_2d, lat_2d = np.meshgrid(lon, lat)
215
- elif lon.ndim == 2 and lat.ndim == 2:
216
- lon_2d, lat_2d = lon, lat
217
- else:
218
- raise ValueError("经纬度必须是同维度的 1-D 或 2-D 数组")
219
-
220
- is_ocean = globe.is_ocean(lat_2d, lon_2d)
221
-
222
- if keep == "land":
223
- mask = ~is_ocean
224
- elif keep == "ocean":
225
- mask = is_ocean
226
- else:
227
- raise ValueError("keep 只能是 'land' 或 'ocean'")
228
-
229
- return mask
230
-
231
-
232
- def mask_land_ocean(
233
- data: xr.DataArray | xr.Dataset,
234
- lon: np.ndarray,
235
- lat: np.ndarray,
236
- *, # 强制关键字参数
237
- keep: Literal["land", "ocean"] = "land",
238
- ) -> xr.DataArray | xr.Dataset:
239
- """
240
- 根据海陆分布掩膜 xarray 对象。
241
-
242
- Parameters
243
- ----------
244
- data : xr.DataArray 或 xr.Dataset
245
- 至少包含 'lat' 和 'lon' 维度/坐标的数组。
246
- lon : array_like
247
- 经度,可以是 1-D 或 2-D。
248
- lat : array_like
249
- 纬度,可以是 1-D 或 2-D。
250
- keep : {'land', 'ocean'}, optional
251
- 指定要保留的部分,默认为 'land'。
252
-
253
- Returns
254
- -------
255
- 掩膜后的 xr.DataArray / xr.Dataset
256
- """
257
- mask = _land_sea_mask(lon, lat, keep)
258
-
259
- # 用 apply_ufunc 自动对齐并广播掩膜
260
- return xr.apply_ufunc(
261
- lambda x, m: x.where(m),
262
- data,
263
- xr.DataArray(mask, dims=("lat", "lon")),
264
- dask="parallelized",
265
- keep_attrs=True,
266
- )
267
-
268
147
 
269
148
  if __name__ == "__main__":
270
149
  pass
oafuncs/oa_file.py CHANGED
@@ -418,6 +418,9 @@ def replace_content(source_file: Union[str, os.PathLike], replacements: Dict[str
418
418
 
419
419
  if target_dir is None:
420
420
  target_dir = os.path.dirname(source_file)
421
+ # If source_file is just a filename without path, use current working directory
422
+ if not target_dir:
423
+ target_dir = os.getcwd()
421
424
  replace_direct_content(source_file, target_dir, replacements, key_value=use_key_value, new_name=new_filename)
422
425
 
423
426
 
oafuncs/oa_geo.py ADDED
@@ -0,0 +1,148 @@
1
+ from typing import Union, Literal
2
+
3
+ import numpy as np
4
+ import xarray as xr
5
+ from rich import print
6
+
7
+
8
+ __all__ = ["earth_distance", "mask_shapefile", "mask_land_ocean"]
9
+
10
+
11
+ def earth_distance(lon1, lat1, lon2, lat2):
12
+ """
13
+ 计算两点间的距离(km)
14
+ """
15
+ from math import asin, cos, radians, sin, sqrt
16
+ # 将经纬度转换为弧度
17
+ lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
18
+
19
+ # haversine公式
20
+ dlon = lon2 - lon1
21
+ dlat = lat2 - lat1
22
+ a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
23
+ c = 2 * asin(sqrt(a))
24
+ r = 6371 # 地球半径(公里)
25
+ return c * r
26
+
27
+
28
+ def mask_shapefile(
29
+ data_array: np.ndarray,
30
+ longitudes: np.ndarray,
31
+ latitudes: np.ndarray,
32
+ shapefile_path: str,
33
+ ) -> Union[xr.DataArray, None]:
34
+ """
35
+ Mask a 2D data array using a shapefile.
36
+
37
+ Args:
38
+ data_array (np.ndarray): 2D array of data to be masked.
39
+ longitudes (np.ndarray): 1D array of longitudes.
40
+ latitudes (np.ndarray): 1D array of latitudes.
41
+ shapefile_path (str): Path to the shapefile used for masking.
42
+
43
+ Returns:
44
+ Union[xr.DataArray, None]: Masked xarray DataArray or None if an error occurs.
45
+
46
+ Raises:
47
+ FileNotFoundError: If the shapefile does not exist.
48
+ ValueError: If the data dimensions do not match the coordinates.
49
+
50
+ Examples:
51
+ >>> data_array = np.random.rand(10, 10)
52
+ >>> longitudes = np.linspace(-180, 180, 10)
53
+ >>> latitudes = np.linspace(-90, 90, 10)
54
+ >>> shapefile_path = "path/to/shapefile.shp"
55
+ >>> masked_data = mask_shapefile(data_array, longitudes, latitudes, shapefile_path)
56
+ >>> print(masked_data) # Expected output: Masked DataArray
57
+
58
+ """
59
+ import salem
60
+ try:
61
+ shp_f = salem.read_shapefile(shapefile_path)
62
+ data_da = xr.DataArray(data_array, coords=[("latitude", latitudes), ("longitude", longitudes)])
63
+ masked_data = data_da.salem.roi(shape=shp_f)
64
+ return masked_data
65
+ except Exception as e:
66
+ print(f"[red]An error occurred: {e}[/red]")
67
+ return None
68
+
69
+
70
+
71
+ def _normalize_lon(lon: np.ndarray) -> np.ndarray:
72
+ """将经度转换到 [-180, 180)。"""
73
+ lon = np.asarray(lon, dtype=float)
74
+ return np.where(lon >= 180, lon - 360, lon)
75
+
76
+
77
+ def _land_sea_mask(
78
+ lon: np.ndarray,
79
+ lat: np.ndarray,
80
+ keep: Literal["land", "ocean"],
81
+ ) -> np.ndarray:
82
+ """
83
+ 根据 1-D 或 2-D 经纬度返回布尔掩膜。
84
+ True 表示该位置 *保留*,False 表示该位置将被掩掉。
85
+ """
86
+ from global_land_mask import globe
87
+
88
+ lon = _normalize_lon(lon)
89
+ lat = np.asarray(lat, dtype=float)
90
+
91
+ # 如果输入是 1-D,则网格化;2-D 则直接使用
92
+ if lon.ndim == 1 and lat.ndim == 1:
93
+ lon_2d, lat_2d = np.meshgrid(lon, lat)
94
+ elif lon.ndim == 2 and lat.ndim == 2:
95
+ lon_2d, lat_2d = lon, lat
96
+ else:
97
+ raise ValueError("经纬度必须是同维度的 1-D 或 2-D 数组")
98
+
99
+ is_ocean = globe.is_ocean(lat_2d, lon_2d)
100
+
101
+ if keep == "land":
102
+ mask = ~is_ocean
103
+ elif keep == "ocean":
104
+ mask = is_ocean
105
+ else:
106
+ raise ValueError("keep 只能是 'land' 或 'ocean'")
107
+
108
+ return mask
109
+
110
+
111
+ def mask_land_ocean(
112
+ data: xr.DataArray | xr.Dataset,
113
+ lon: np.ndarray,
114
+ lat: np.ndarray,
115
+ *, # 强制关键字参数
116
+ keep: Literal["land", "ocean"] = "land",
117
+ ) -> xr.DataArray | xr.Dataset:
118
+ """
119
+ 根据海陆分布掩膜 xarray 对象。
120
+
121
+ Parameters
122
+ ----------
123
+ data : xr.DataArray 或 xr.Dataset
124
+ 至少包含 'lat' 和 'lon' 维度/坐标的数组。
125
+ lon : array_like
126
+ 经度,可以是 1-D 或 2-D。
127
+ lat : array_like
128
+ 纬度,可以是 1-D 或 2-D。
129
+ keep : {'land', 'ocean'}, optional
130
+ 指定要保留的部分,默认为 'land'。
131
+
132
+ Returns
133
+ -------
134
+ 掩膜后的 xr.DataArray / xr.Dataset
135
+ """
136
+ mask = _land_sea_mask(lon, lat, keep)
137
+
138
+ # 用 apply_ufunc 自动对齐并广播掩膜
139
+ return xr.apply_ufunc(
140
+ lambda x, m: x.where(m),
141
+ data,
142
+ xr.DataArray(mask, dims=("lat", "lon")),
143
+ dask="parallelized",
144
+ keep_attrs=True,
145
+ )
146
+
147
+ if __name__ == "__main__":
148
+ pass
oafuncs/oa_nc.py CHANGED
@@ -15,10 +15,11 @@ def save(
15
15
  variable_name: Optional[str] = None,
16
16
  coordinates: Optional[dict] = None,
17
17
  write_mode: str = "w",
18
- convert_dtype: str = "int32",
18
+ convert_dtype: str = "int16",
19
19
  use_scale_offset: bool = True,
20
20
  use_compression: bool = True,
21
21
  preserve_mask_values: bool = True,
22
+ missing_value: Optional[Union[float, int]] = None,
22
23
  ) -> None:
23
24
  """
24
25
  Write data to a NetCDF file.
@@ -42,7 +43,7 @@ def save(
42
43
  """
43
44
  from ._script.netcdf_write import save_to_nc
44
45
 
45
- save_to_nc(file_path, data, variable_name, coordinates, write_mode, convert_dtype,use_scale_offset, use_compression, preserve_mask_values)
46
+ save_to_nc(file_path, data, variable_name, coordinates, write_mode, convert_dtype,use_scale_offset, use_compression, preserve_mask_values, missing_value)
46
47
  print(f"[green]Data successfully saved to {file_path}[/green]")
47
48
 
48
49
 
@@ -310,7 +311,7 @@ def draw(
310
311
  print("[red]No dataset or file provided.[/red]")
311
312
 
312
313
 
313
- def compress(src_path, dst_path=None,convert_dtype='int32'):
314
+ def compress(src_path, dst_path=None, convert_dtype='int16'):
314
315
  """
315
316
  压缩 NetCDF 文件,使用 scale_factor/add_offset 压缩数据。
316
317
  若 dst_path 省略,则自动生成新文件名,写出后删除原文件并将新文件改回原名。
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: oafuncs
3
- Version: 0.0.98.43
3
+ Version: 0.0.98.44
4
4
  Summary: Oceanic and Atmospheric Functions
5
5
  Home-page: https://github.com/Industry-Pays/OAFuncs
6
6
  Author: Kun Liu
@@ -65,8 +65,6 @@ Just for the convenience of daily use, some complex operations are integrated in
65
65
  The code will be optimized and updated from time to time, with additions, deletions, or modifications…
66
66
 
67
67
  Existing functions will not be completely removed, they might just have a different function name, or the parameter passing might have been optimized…
68
-
69
- Note: If there are any requirements, you can email to liukun0312@stu.ouc.edu.cn. Within my capabilities, I can consider implementing them.
70
68
  ```
71
69
 
72
70
  ## PyPI
@@ -81,6 +79,11 @@ https://pypi.org/project/oafuncs
81
79
  https://github.com/Industry-Pays/OAFuncs
82
80
  ```
83
81
 
82
+ ## Download information
83
+ ```html
84
+ https://pypistats.org/packages/oafuncs
85
+ ```
86
+
84
87
  ## Example
85
88
 
86
89
  ```python
@@ -182,3 +185,6 @@ query()
182
185
  <img title="" src="./oafuncs/data_store/OAFuncs.png" alt="">
183
186
 
184
187
  <img title="OAFuncs" src="https://raw.githubusercontent.com/Industry-Pays/OAFuncs/main/oafuncs/_data/oafuncs.png" alt="OAFuncs">
188
+
189
+ ## Wiki
190
+ 更多内容,查看[wiki](https://opendeep.wiki/Industry-Pays/OAFuncs/introduction)
@@ -1,11 +1,12 @@
1
- oafuncs/__init__.py,sha256=T_-VtnWWllV3Q91twT5Yt2sUapeA051QbPNnBxmg9nw,1456
1
+ oafuncs/__init__.py,sha256=7630YgWbWGClu4Us1H2SAq-_eh9WzFHGxXkIXMcxRu0,1542
2
2
  oafuncs/oa_cmap.py,sha256=JwZMJ36uNwiCnzXqEtH2_PpeLtEaRaXP9YeGSl0PJSU,13886
3
- oafuncs/oa_data.py,sha256=KtUIFvuCogO4OQ-6SOVUeTwRXiP0C1NC4Lfs2W-ahJM,10259
3
+ oafuncs/oa_data.py,sha256=CG2YHY_R6MFrPw3UznT4T8BE8yXdgBMnmdUAEdh9GAo,6506
4
4
  oafuncs/oa_date.py,sha256=aU2wVIWXyWoRiSQ9dg8sHvShFTxw86RrgbV3Q6tDjD4,6841
5
5
  oafuncs/oa_draw.py,sha256=zal0Y3RPpN0TCGN4Gw9qLtjQdT6V0ZqpSUBFVOPL0x4,13952
6
- oafuncs/oa_file.py,sha256=836R5tZvCRs-TskodAYaIRLwRr9pvQBEQH8-SmruGVc,17144
6
+ oafuncs/oa_file.py,sha256=l9HTAK8iC1Bp_K7Mk3AX1UKuTFZZ-2yq5Hq71hnigbo,17299
7
+ oafuncs/oa_geo.py,sha256=BWkvV6nW_c-UKqbgaoy4U1YQYUMzAQOJlK--XppNIms,4371
7
8
  oafuncs/oa_help.py,sha256=0J5VaZX-cB0c090KxgmktQJBc0o00FsY-4wB8l5y00k,4178
8
- oafuncs/oa_nc.py,sha256=PeHWZ8D4BgFiEvJNKg6p46I3c5OM12sipMt7He3qb-o,16205
9
+ oafuncs/oa_nc.py,sha256=j501NlTuvrDIwNLXbMfE7nPPXdbbL7u9PGDj2l5AtnI,16277
9
10
  oafuncs/oa_python.py,sha256=xYMQnM0cGq9xUCtcoMpnN0LG5Rc_s94tai5nC6CNJ3E,4831
10
11
  oafuncs/oa_tool.py,sha256=VHx15VqpbzNlVXh0-3nJqcDgLVaECMD1FvxJ_CrV39E,8046
11
12
  oafuncs/_data/hycom.png,sha256=MadKs6Gyj5n9-TOu7L4atQfTXtF9dvN9w-tdU9IfygI,10945710
@@ -15,11 +16,11 @@ oafuncs/_script/data_interp.py,sha256=gr1coA2N1mxzS4iv6S0C4lZpEQbuuHHNW-08RrhgPA
15
16
  oafuncs/_script/email.py,sha256=l5xDgdVj8O5V0J2SwjsHKdUuxOH2jZvwdMO_P0dImHU,2684
16
17
  oafuncs/_script/netcdf_merge.py,sha256=tM9ePqLiEsE7eIsNM5XjEYeXwxjYOdNz5ejnEuI7xKw,6066
17
18
  oafuncs/_script/netcdf_modify.py,sha256=XDlAEToe_lwfAetkBSENqU5df-wnH7MGuxNTjG1gwHY,4178
18
- oafuncs/_script/netcdf_write.py,sha256=CdehIHnWqGfmbPznOW0lYFTyEOHC5UrW4ly2stw86bQ,17745
19
+ oafuncs/_script/netcdf_write.py,sha256=EDNycnhlrW1c6zcpmpObQeszDRX_lRxjTL-j0G4HqjI,17420
19
20
  oafuncs/_script/parallel.py,sha256=VMNhK3PNcZrIj-ZxcmAWuU3mIfVsfztsk2Ceqwri4e4,10069
20
21
  oafuncs/_script/parallel_bak.py,sha256=2ySmYZ9e_PLhhMocWCCFWCYZD3Gs_mxl0HxEzbIuQvA,8861
21
22
  oafuncs/_script/plot_dataset.py,sha256=3BPQnx1jBeH-xl8u-j5A93nYevLuD4v3pGGGP7WiB20,16534
22
- oafuncs/_script/replace_file_content.py,sha256=wIwvaISFNYWG58BLZHZP9ZgbC5OhoZ-cpR3y25U1EUM,5601
23
+ oafuncs/_script/replace_file_content.py,sha256=MGsfNnTs6wRrHINygroRZNjDXQ4_Zhj9ebnxYP-hazY,5609
23
24
  oafuncs/oa_down/User_Agent-list.txt,sha256=pHaMlElMvZ8TG4vf4BqkZYKqe0JIGkr4kCN0lM1Y9FQ,514295
24
25
  oafuncs/oa_down/__init__.py,sha256=IT6oTqaxuV_mC6AwBut0HtkmnVtEu1MyX0x0oS7TKoA,218
25
26
  oafuncs/oa_down/hycom_3hourly.py,sha256=dFXSC_5o-Dic616KrLXir4tEHvCiZt8vGKPEYpXFMmA,57356
@@ -37,8 +38,8 @@ oafuncs/oa_sign/__init__.py,sha256=JSx1fcWpmNhQBvX_Bmq3xysfSkkFMrjbJASxV_V6aqE,1
37
38
  oafuncs/oa_sign/meteorological.py,sha256=3MSjy7HTcvz2zsITkjUMr_0Y027Gas1LFE9pk99990k,6110
38
39
  oafuncs/oa_sign/ocean.py,sha256=3uYEzaq-27yVy23IQoqy-clhWu1I_fhPFBAQyT-OF4M,5562
39
40
  oafuncs/oa_sign/scientific.py,sha256=moIl2MEY4uitbXoD596JmXookXGQtQsS-8_1NBBTx84,4689
40
- oafuncs-0.0.98.43.dist-info/licenses/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
41
- oafuncs-0.0.98.43.dist-info/METADATA,sha256=jzkc_JyBqwIsEsL8ljXcG2LbCnCNtCzYsKxyfYLA_xQ,4347
42
- oafuncs-0.0.98.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- oafuncs-0.0.98.43.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
44
- oafuncs-0.0.98.43.dist-info/RECORD,,
41
+ oafuncs-0.0.98.44.dist-info/licenses/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
42
+ oafuncs-0.0.98.44.dist-info/METADATA,sha256=yWxBsUGF1rlJBn42pXZyCUrgqhXWpyqc-l_CTyBEnSk,4384
43
+ oafuncs-0.0.98.44.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
+ oafuncs-0.0.98.44.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
45
+ oafuncs-0.0.98.44.dist-info/RECORD,,