oafuncs 0.0.98.2__py3-none-any.whl → 0.0.98.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,15 @@
1
1
  import os
2
+
2
3
  import netCDF4 as nc
3
4
  import numpy as np
4
5
  import xarray as xr
6
+ import warnings
7
+
8
+ warnings.filterwarnings("ignore", category=RuntimeWarning)
5
9
 
6
10
 
7
11
  def _numpy_to_nc_type(numpy_type):
8
- """将NumPy数据类型映射到NetCDF数据类型"""
12
+ """将 NumPy 数据类型映射到 NetCDF 数据类型"""
9
13
  numpy_to_nc = {
10
14
  "float32": "f4",
11
15
  "float64": "f8",
@@ -23,6 +27,12 @@ def _numpy_to_nc_type(numpy_type):
23
27
 
24
28
 
25
29
  def _calculate_scale_and_offset(data, n=16):
30
+ """
31
+ 计算数值型数据的 scale_factor 与 add_offset,
32
+ 将数据映射到 [0, 2**n - 1] 的范围。
33
+
34
+ 要求 data 为数值型的 NumPy 数组,不允许全 NaN 值。
35
+ """
26
36
  if not isinstance(data, np.ndarray):
27
37
  raise ValueError("Input data must be a NumPy array.")
28
38
 
@@ -32,85 +42,162 @@ def _calculate_scale_and_offset(data, n=16):
32
42
  if np.isnan(data_min) or np.isnan(data_max):
33
43
  raise ValueError("Input data contains NaN values.")
34
44
 
35
- scale_factor = (data_max - data_min) / (2**n - 1)
36
- add_offset = data_min + 2 ** (n - 1) * scale_factor
45
+ if data_max == data_min:
46
+ scale_factor = 1.0
47
+ add_offset = data_min
48
+ else:
49
+ scale_factor = (data_max - data_min) / (2**n - 1)
50
+ add_offset = data_min + 2 ** (n - 1) * scale_factor
37
51
  return scale_factor, add_offset
38
52
 
39
53
 
54
+ def _data_to_scale_offset(data, scale, offset):
55
+ """
56
+ 将数据转换为 scale_factor 和 add_offset 的形式。
57
+ 此处同时替换 NaN、正无穷和负无穷为填充值 -32767,
58
+ 以确保转换后的数据可安全转为 int16。
59
+ """
60
+ if not isinstance(data, np.ndarray):
61
+ raise ValueError("Input data must be a NumPy array.")
62
+
63
+ # 先计算转换后的数据
64
+ result = np.around((data - offset) / scale)
65
+ # 替换 NaN, 正负无穷(posinf, neginf)为 -32767
66
+ result = np.nan_to_num(result, nan=-32767, posinf=-32767, neginf=-32767)
67
+ result = np.clip(result, -32767, 32767) # 限制范围在 int16 的有效范围内
68
+ result = np.where(np.isfinite(result), result, -32767) # 替换无效值为 -32767
69
+ new_data = result.astype(np.int16)
70
+ return new_data
71
+
72
+
40
73
  def save_to_nc(file, data, varname=None, coords=None, mode="w", scale_offset_switch=True, compile_switch=True):
41
- # 处理xarray对象的情况(当varname和coords都为None时)
42
- if varname is None and coords is None:
43
- if not isinstance(data, (xr.DataArray, xr.Dataset)):
44
- raise ValueError("When varname and coords are not provided, data must be an xarray object")
74
+ """
75
+ 保存数据到 NetCDF 文件,支持 xarray 对象(DataArray Dataset)和 numpy 数组。
76
+
77
+ 仅对数据变量中数值型数据进行压缩转换(利用 scale_factor/add_offset 转换后转为 int16),
78
+ 非数值型数据以及所有坐标变量将禁用任何压缩,直接保存原始数据。
79
+
80
+ 参数:
81
+ - file: 保存文件的路径
82
+ - data: xarray.DataArray、xarray.Dataset 或 numpy 数组
83
+ - varname: 变量名(仅适用于传入 numpy 数组或 DataArray 时)
84
+ - coords: 坐标字典(numpy 数组分支时使用),所有坐标变量均不压缩
85
+ - mode: "w"(覆盖)或 "a"(追加)
86
+ - scale_offset_switch: 是否对数值型数据变量进行压缩转换
87
+ - compile_switch: 是否启用 NetCDF4 的 zlib 压缩(仅针对数值型数据有效)
88
+ """
89
+ # 处理 xarray 对象(DataArray 或 Dataset)的情况
90
+ if isinstance(data, (xr.DataArray, xr.Dataset)):
91
+ encoding = {} # 用于保存数据变量的编码信息
45
92
 
46
- encoding = {}
47
93
  if isinstance(data, xr.DataArray):
48
94
  if data.name is None:
49
95
  data = data.rename("data")
50
- varname = data.name
51
- encoding[varname] = {"zlib": compile_switch, "complevel": 4}
52
- if scale_offset_switch:
96
+ varname = data.name if varname is None else varname
97
+ # 判断数据是否为数值型
98
+ if np.issubdtype(data.values.dtype, np.number) and scale_offset_switch:
53
99
  scale, offset = _calculate_scale_and_offset(data.values)
54
- encoding[varname].update({"dtype": "int16", "scale_factor": scale, "add_offset": offset, "_FillValue": -32767})
100
+ new_values = _data_to_scale_offset(data.values, scale, offset)
101
+ # 生成新 DataArray,保留原坐标和属性,同时写入转换参数到属性中
102
+ new_da = data.copy(data=new_values)
103
+ new_da.attrs["scale_factor"] = float(scale)
104
+ new_da.attrs["add_offset"] = float(offset)
105
+ encoding[varname] = {
106
+ "zlib": compile_switch,
107
+ "complevel": 4,
108
+ "dtype": "int16",
109
+ "_FillValue": -32767,
110
+ }
111
+ new_da.to_dataset(name=varname).to_netcdf(file, mode=mode, encoding=encoding)
55
112
  else:
56
- encoding[varname].update({"dtype": "float32", "_FillValue": np.nan})
57
- else: # Dataset情况
113
+ data.to_dataset(name=varname).to_netcdf(file, mode=mode)
114
+ return
115
+
116
+ else:
117
+ # 处理 Dataset 的情况,仅处理 data_vars 数据变量,坐标变量保持原样
118
+ new_vars = {}
119
+ encoding = {}
58
120
  for var in data.data_vars:
59
- encoding[var] = {"zlib": compile_switch, "complevel": 4}
60
- if scale_offset_switch:
61
- scale, offset = _calculate_scale_and_offset(data[var].values)
62
- encoding[var].update({"dtype": "int16", "scale_factor": scale, "add_offset": offset, "_FillValue": -32767})
121
+ da = data[var]
122
+ if np.issubdtype(np.asarray(da.values).dtype, np.number) and scale_offset_switch:
123
+ scale, offset = _calculate_scale_and_offset(da.values)
124
+ new_values = _data_to_scale_offset(da.values, scale, offset)
125
+ new_da = xr.DataArray(new_values, dims=da.dims, coords=da.coords, attrs=da.attrs)
126
+ new_da.attrs["scale_factor"] = float(scale)
127
+ new_da.attrs["add_offset"] = float(offset)
128
+ new_vars[var] = new_da
129
+ encoding[var] = {
130
+ "zlib": compile_switch,
131
+ "complevel": 4,
132
+ "dtype": "int16",
133
+ "_FillValue": -32767,
134
+ }
63
135
  else:
64
- encoding[var].update({"dtype": "float32", "_FillValue": np.nan})
65
-
66
- try:
67
- data.to_netcdf(file, mode=mode, encoding=encoding)
68
- return
69
- except Exception as e:
70
- raise RuntimeError(f"Failed to save xarray object: {str(e)}") from e
136
+ new_vars[var] = da
137
+ new_ds = xr.Dataset(new_vars, coords=data.coords)
138
+ if encoding:
139
+ new_ds.to_netcdf(file, mode=mode, encoding=encoding)
140
+ else:
141
+ new_ds.to_netcdf(file, mode=mode)
142
+ return
71
143
 
72
- # 处理普通numpy数组的情况
144
+ # 处理纯 numpy 数组情况
73
145
  if mode == "w" and os.path.exists(file):
74
146
  os.remove(file)
75
147
  elif mode == "a" and not os.path.exists(file):
76
148
  mode = "w"
77
-
149
+ data = np.asarray(data)
150
+ is_numeric = np.issubdtype(data.dtype, np.number)
78
151
  try:
79
152
  with nc.Dataset(file, mode, format="NETCDF4") as ncfile:
80
- # 创建维度并写入坐标
153
+ # 坐标变量直接写入,不做压缩
81
154
  if coords is not None:
82
155
  for dim, values in coords.items():
83
156
  if dim not in ncfile.dimensions:
84
157
  ncfile.createDimension(dim, len(values))
85
- var = ncfile.createVariable(dim, _numpy_to_nc_type(values.dtype), (dim,))
86
- var[:] = values
158
+ var_obj = ncfile.createVariable(dim, _numpy_to_nc_type(np.asarray(values).dtype), (dim,))
159
+ var_obj[:] = values
87
160
 
88
- # 创建变量
89
161
  dims = list(coords.keys()) if coords else []
90
- if scale_offset_switch:
162
+ if is_numeric and scale_offset_switch:
91
163
  scale, offset = _calculate_scale_and_offset(data)
164
+ new_data = _data_to_scale_offset(data, scale, offset)
92
165
  var = ncfile.createVariable(varname, "i2", dims, fill_value=-32767, zlib=compile_switch)
93
166
  var.scale_factor = scale
94
167
  var.add_offset = offset
168
+ # Ensure no invalid values in new_data before assignment
169
+ var[:] = new_data
95
170
  else:
171
+ # 非数值型数据,禁止压缩
96
172
  dtype = _numpy_to_nc_type(data.dtype)
97
- var = ncfile.createVariable(varname, dtype, dims, zlib=compile_switch)
98
-
99
- var[:] = data
173
+ var = ncfile.createVariable(varname, dtype, dims, zlib=False)
174
+ var[:] = data
100
175
  except Exception as e:
101
- raise RuntimeError(f"Failed to save netCDF4 file: {str(e)}") from e
176
+ raise RuntimeError(f"netCDF4 保存失败: {str(e)}") from e
102
177
 
103
178
 
179
+ # 测试用例
104
180
  if __name__ == "__main__":
105
- # Example usage
106
- data = xr.open_dataset(r"F:\roms_rst.nc")["u"]
107
- save_to_nc(r"F:\test.nc", data)
108
-
109
- # xarray测试
110
- data = xr.DataArray(np.random.rand(10, 20), dims=("x", "y"), name="temperature")
111
- save_to_nc(r"F:\test_xarray.nc", data)
112
-
113
- # numpy测试
114
- arr = np.random.rand(5, 3)
115
- coords = {"x": np.arange(5), "y": np.arange(3)}
116
- save_to_nc(r"F:\test_numpy.nc", arr, varname="data", coords=coords)
181
+ # --------------------------------
182
+ # dataset
183
+ file = r"F:\roms_rst.nc"
184
+ ds = xr.open_dataset(file)
185
+ outfile = r"F:\roms_rst_test.nc"
186
+ save_to_nc(outfile, ds)
187
+ ds.close()
188
+ # --------------------------------
189
+ # dataarray
190
+ data = np.random.rand(4, 3, 2)
191
+ coords = {"x": np.arange(4), "y": np.arange(3), "z": np.arange(2)}
192
+ varname = "test_var"
193
+ data = xr.DataArray(data, dims=("x", "y", "z"), coords=coords, name=varname)
194
+ outfile = r"F:\test_dataarray.nc"
195
+ save_to_nc(outfile, data)
196
+ # --------------------------------
197
+ # numpy array
198
+ data = np.random.rand(4, 3, 2)
199
+ coords = {"x": np.arange(4), "y": np.arange(3), "z": np.arange(2)}
200
+ varname = "test_var"
201
+ outfile = r"F:\test_numpy.nc"
202
+ save_to_nc(outfile, data, varname=varname, coords=coords)
203
+ # --------------------------------