oafuncs 0.0.98.30__tar.gz → 0.0.98.55__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {oafuncs-0.0.98.30/oafuncs.egg-info → oafuncs-0.0.98.55}/PKG-INFO +10 -3
  2. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/README.md +8 -2
  3. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/__init__.py +5 -1
  4. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/cprogressbar.py +5 -2
  5. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/data_interp.py +0 -15
  6. oafuncs-0.0.98.55/oafuncs/_script/email.py +48 -0
  7. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/netcdf_modify.py +0 -15
  8. oafuncs-0.0.98.55/oafuncs/_script/netcdf_write.py +385 -0
  9. oafuncs-0.0.98.55/oafuncs/_script/parallel.py +269 -0
  10. oafuncs-0.0.98.30/oafuncs/_script/parallel.py → oafuncs-0.0.98.55/oafuncs/_script/parallel_bak.py +1 -1
  11. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/plot_dataset.py +9 -4
  12. oafuncs-0.0.98.55/oafuncs/_script/process_roms.py +620 -0
  13. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/replace_file_content.py +2 -17
  14. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_cmap.py +27 -2
  15. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_data.py +3 -45
  16. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_date.py +58 -54
  17. oafuncs-0.0.98.55/oafuncs/oa_down/__init__.py +8 -0
  18. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/hycom_3hourly.py +30 -7
  19. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/idm.py +0 -17
  20. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/literature.py +141 -55
  21. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/read_proxy.py +0 -15
  22. oafuncs-0.0.98.55/oafuncs/oa_down/user_agent.py +17 -0
  23. oafuncs-0.0.98.55/oafuncs/oa_draw.py +461 -0
  24. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_file.py +44 -21
  25. oafuncs-0.0.98.55/oafuncs/oa_geo.py +198 -0
  26. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_help.py +0 -15
  27. oafuncs-0.0.98.55/oafuncs/oa_linux.py +193 -0
  28. oafuncs-0.0.98.55/oafuncs/oa_model/roms.py +42 -0
  29. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_nc.py +38 -10
  30. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_python.py +0 -15
  31. oafuncs-0.0.98.55/oafuncs/oa_sign/__init__.py +6 -0
  32. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_sign/meteorological.py +0 -16
  33. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_sign/ocean.py +0 -16
  34. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_sign/scientific.py +0 -16
  35. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_tool.py +5 -5
  36. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55/oafuncs.egg-info}/PKG-INFO +10 -3
  37. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs.egg-info/SOURCES.txt +5 -4
  38. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs.egg-info/requires.txt +1 -0
  39. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/setup.py +9 -4
  40. oafuncs-0.0.98.30/oafuncs/_script/email.py +0 -116
  41. oafuncs-0.0.98.30/oafuncs/_script/netcdf_write.py +0 -467
  42. oafuncs-0.0.98.30/oafuncs/_script/parallel_test.py +0 -14
  43. oafuncs-0.0.98.30/oafuncs/oa_down/__init__.py +0 -22
  44. oafuncs-0.0.98.30/oafuncs/oa_down/hycom_3hourly_proxy.py +0 -1230
  45. oafuncs-0.0.98.30/oafuncs/oa_down/user_agent.py +0 -31
  46. oafuncs-0.0.98.30/oafuncs/oa_draw.py +0 -401
  47. oafuncs-0.0.98.30/oafuncs/oa_model/roms/__init__.py +0 -20
  48. oafuncs-0.0.98.30/oafuncs/oa_model/roms/test.py +0 -19
  49. oafuncs-0.0.98.30/oafuncs/oa_sign/__init__.py +0 -21
  50. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/LICENSE.txt +0 -0
  51. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/MANIFEST.in +0 -0
  52. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_data/hycom.png +0 -0
  53. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_data/oafuncs.png +0 -0
  54. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/netcdf_merge.py +0 -0
  55. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/User_Agent-list.txt +0 -0
  56. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/test_ua.py +0 -0
  57. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_model/__init__.py +0 -0
  58. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_model/wrf/__init__.py +0 -0
  59. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_model/wrf/little_r.py +0 -0
  60. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs.egg-info/dependency_links.txt +0 -0
  61. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs.egg-info/top_level.txt +0 -0
  62. {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: oafuncs
3
- Version: 0.0.98.30
3
+ Version: 0.0.98.55
4
4
  Summary: Oceanic and Atmospheric Functions
5
5
  Home-page: https://github.com/Industry-Pays/OAFuncs
6
6
  Author: Kun Liu
@@ -24,6 +24,7 @@ Requires-Dist: pandas
24
24
  Requires-Dist: xarray
25
25
  Requires-Dist: rich
26
26
  Requires-Dist: pathlib
27
+ Requires-Dist: lxml
27
28
  Requires-Dist: requests
28
29
  Requires-Dist: bs4
29
30
  Requires-Dist: httpx
@@ -64,8 +65,6 @@ Just for the convenience of daily use, some complex operations are integrated in
64
65
  The code will be optimized and updated from time to time, with additions, deletions, or modifications…
65
66
 
66
67
  Existing functions will not be completely removed, they might just have a different function name, or the parameter passing might have been optimized…
67
-
68
- Note: If there are any requirements, you can email to liukun0312@stu.ouc.edu.cn. Within my capabilities, I can consider implementing them.
69
68
  ```
70
69
 
71
70
  ## PyPI
@@ -80,6 +79,11 @@ https://pypi.org/project/oafuncs
80
79
  https://github.com/Industry-Pays/OAFuncs
81
80
  ```
82
81
 
82
+ ## Download information
83
+ ```html
84
+ https://pypistats.org/packages/oafuncs
85
+ ```
86
+
83
87
  ## Example
84
88
 
85
89
  ```python
@@ -181,3 +185,6 @@ query()
181
185
  <img title="" src="./oafuncs/data_store/OAFuncs.png" alt="">
182
186
 
183
187
  <img title="OAFuncs" src="https://raw.githubusercontent.com/Industry-Pays/OAFuncs/main/oafuncs/_data/oafuncs.png" alt="OAFuncs">
188
+
189
+ ## Wiki
190
+ 更多内容,查看[wiki_old](https://opendeep.wiki/Industry-Pays/OAFuncs/introduction) or [wiki_new](https://deepwiki.com/Industry-Pays/OAFuncs)
@@ -12,8 +12,6 @@ Just for the convenience of daily use, some complex operations are integrated in
12
12
  The code will be optimized and updated from time to time, with additions, deletions, or modifications…
13
13
 
14
14
  Existing functions will not be completely removed, they might just have a different function name, or the parameter passing might have been optimized…
15
-
16
- Note: If there are any requirements, you can email to liukun0312@stu.ouc.edu.cn. Within my capabilities, I can consider implementing them.
17
15
  ```
18
16
 
19
17
  ## PyPI
@@ -28,6 +26,11 @@ https://pypi.org/project/oafuncs
28
26
  https://github.com/Industry-Pays/OAFuncs
29
27
  ```
30
28
 
29
+ ## Download information
30
+ ```html
31
+ https://pypistats.org/packages/oafuncs
32
+ ```
33
+
31
34
  ## Example
32
35
 
33
36
  ```python
@@ -129,3 +132,6 @@ query()
129
132
  <img title="" src="./oafuncs/data_store/OAFuncs.png" alt="">
130
133
 
131
134
  <img title="OAFuncs" src="https://raw.githubusercontent.com/Industry-Pays/OAFuncs/main/oafuncs/_data/oafuncs.png" alt="OAFuncs">
135
+
136
+ ## Wiki
137
+ 更多内容,查看[wiki_old](https://opendeep.wiki/Industry-Pays/OAFuncs/introduction) or [wiki_new](https://deepwiki.com/Industry-Pays/OAFuncs)
@@ -40,4 +40,8 @@ from .oa_tool import *
40
40
  # from ._script import *
41
41
  # ------------------- 2025-03-16 15:56:01 -------------------
42
42
  from .oa_date import *
43
- # ------------------- 2025-03-27 16:56:57 -------------------
43
+ # ------------------- 2025-03-27 16:56:57 -------------------
44
+ from .oa_geo import *
45
+ # ------------------- 2025-09-04 14:08:26 -------------------
46
+ from .oa_linux import *
47
+ # ------------------- 2025-09-14 12:30:00 -------------------
@@ -189,7 +189,8 @@ class ColorProgressBar:
189
189
  self._is_jupyter = "ipykernel" in sys.modules
190
190
 
191
191
  # 输出样式
192
- filled_list = ["▊", "█", "▓", "▒", "░", "#", "=", ">", "▌", "▍", "▎", "▏", "*"]
192
+ # filled_list = ["▊", "█", "▓", "▒", "░", "#", "=", ">", "▌", "▍", "▎", "▏", "*"]
193
+ filled_list = ["█", "▓", "▒", "░", "#", "=", ">", "*"]
193
194
  self.filled = random.choice(filled_list)
194
195
 
195
196
  def _generate_gradient(self) -> Optional[List[str]]:
@@ -300,11 +301,13 @@ class ColorProgressBar:
300
301
  # 获取终端宽度
301
302
  try:
302
303
  term_width = self.bar_length or (shutil.get_terminal_size().columns if self._is_terminal else 80)
304
+ # print(f'Terminal width: {term_width}') # 调试输出
303
305
  except (AttributeError, OSError):
304
306
  term_width = 80 # 默认终端宽度
305
307
 
306
308
  # 确保有效宽度不小于最低限制
307
- effective_width = max(15, term_width - 40)
309
+ # effective_width = max(15, term_width - 40)
310
+ effective_width = max(15, int(term_width * 0.6)) # 保留40个字符用于其他信息
308
311
  if effective_width < 10:
309
312
  warnings.warn("Terminal width is too small for proper progress bar rendering.")
310
313
  effective_width = 10 # 设置最低宽度限制
@@ -1,18 +1,3 @@
1
- #!/usr/bin/env python
2
- # coding=utf-8
3
- """
4
- Author: Liu Kun && 16031215@qq.com
5
- Date: 2025-04-25 16:22:52
6
- LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2025-04-26 19:21:31
8
- FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\_script\\data_interp.py
9
- Description:
10
- EditPlatform: vscode
11
- ComputerInfo: XPS 15 9510
12
- SystemInfo: Windows 11
13
- Python Version: 3.12
14
- """
15
-
16
1
  from typing import List, Union
17
2
 
18
3
  import numpy as np
@@ -0,0 +1,48 @@
1
+ from rich import print
2
+
3
+
4
+
5
+ def _send_message(msg_from, password, msg_to, title, content):
6
+ from email.header import Header
7
+ from email.mime.multipart import MIMEMultipart
8
+ from email.mime.text import MIMEText
9
+ import smtplib
10
+ # 1. 连接邮箱服务器
11
+ con = smtplib.SMTP_SSL("smtp.qq.com", 465)
12
+
13
+ # 2. 登录邮箱
14
+ # msg_from, password = _email_info()
15
+ # con.login(msg_from, _decode_password(password))
16
+ con.login(msg_from, password)
17
+
18
+ # 3. 准备数据
19
+ # 创建邮件对象
20
+ msg = MIMEMultipart()
21
+
22
+ # 设置邮件主题
23
+ subject = Header(title, "utf-8").encode()
24
+ msg["Subject"] = subject
25
+
26
+ # 设置邮件发送者
27
+ msg["From"] = msg_from
28
+
29
+ # 设置邮件接受者
30
+ msg["To"] = msg_to
31
+
32
+ # or
33
+ # content = '发送内容'
34
+ msg.attach(MIMEText(content, "plain", "utf-8"))
35
+
36
+ # 4.发送邮件
37
+ con.sendmail(msg_from, msg_to, msg.as_string())
38
+ con.quit()
39
+
40
+ print(f"已通过{msg_from}成功向{msg_to}发送邮件!")
41
+ print("发送内容为:\n{}\n\n".format(content))
42
+
43
+
44
+
45
+
46
+
47
+ if __name__ == "__main__":
48
+ pass
@@ -1,18 +1,3 @@
1
- #!/usr/bin/env python
2
- # coding=utf-8
3
- """
4
- Author: Liu Kun && 16031215@qq.com
5
- Date: 2025-04-05 14:00:50
6
- LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2025-04-05 14:00:50
8
- FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\_script\\netcdf_modify.py
9
- Description:
10
- EditPlatform: vscode
11
- ComputerInfo: XPS 15 9510
12
- SystemInfo: Windows 11
13
- Python Version: 3.12
14
- """
15
-
16
1
  import os
17
2
 
18
3
  import netCDF4 as nc
@@ -0,0 +1,385 @@
1
+ import os
2
+ import warnings
3
+ import numpy as np
4
+ import xarray as xr
5
+ import netCDF4 as nc
6
+
7
+ warnings.filterwarnings("ignore", category=RuntimeWarning)
8
+
9
+ def _get_dtype_info(dtype):
10
+ """
11
+ 根据输入的 dtype 返回其 numpy_type, clip_min, clip_max。
12
+ 支持 int8, int16, int32, int64 四种整数类型。
13
+ 简化处理:不使用fill_value,所有特殊值统一为NaN。
14
+ 使用完整的数据类型范围,不预留填充值空间。
15
+ """
16
+ dtype_map = {
17
+ "int8": (np.int8, np.iinfo(np.int8).min, np.iinfo(np.int8).max),
18
+ "int16": (np.int16, np.iinfo(np.int16).min, np.iinfo(np.int16).max),
19
+ "int32": (np.int32, np.iinfo(np.int32).min, np.iinfo(np.int32).max),
20
+ "int64": (np.int64, np.iinfo(np.int64).min, np.iinfo(np.int64).max),
21
+ }
22
+ if dtype not in dtype_map:
23
+ raise ValueError(f"Unsupported dtype: {dtype}. Supported types are 'int8', 'int16', 'int32', and 'int64'.")
24
+
25
+ return dtype_map[dtype]
26
+
27
+
28
+ def _numpy_to_nc_type(numpy_type):
29
+ """将 NumPy 数据类型映射到 NetCDF 数据类型"""
30
+ numpy_to_nc = {
31
+ "float32": "f4",
32
+ "float64": "f8",
33
+ "int8": "i1",
34
+ "int16": "i2",
35
+ "int32": "i4",
36
+ "int64": "i8",
37
+ "uint8": "u1",
38
+ "uint16": "u2",
39
+ "uint32": "u4",
40
+ "uint64": "u8",
41
+ }
42
+ numpy_type_str = str(numpy_type) if not isinstance(numpy_type, str) else numpy_type
43
+ return numpy_to_nc.get(numpy_type_str, "f4")
44
+
45
+
46
+ def _calculate_scale_and_offset(data, dtype="int32"):
47
+ """
48
+ 只对有效数据(非NaN、非无穷值、非自定义缺失值)计算scale_factor和add_offset。
49
+ 为填充值保留最小值位置,有效数据范围为 [clip_min+1, clip_max]。
50
+ """
51
+ if not isinstance(data, np.ndarray):
52
+ raise ValueError("Input data must be a NumPy array.")
53
+
54
+ np_dtype, clip_min, clip_max = _get_dtype_info(dtype)
55
+
56
+ # 创建有效数据掩码,只排除NaN和无穷值
57
+ valid_mask = np.isfinite(data)
58
+ if hasattr(data, "mask") and np.ma.is_masked(data):
59
+ valid_mask &= ~data.mask
60
+
61
+ # 如果没有有效数据,返回默认值
62
+ if not np.any(valid_mask):
63
+ return 1.0, 0.0
64
+
65
+ # 基于有效数据计算最小值和最大值
66
+ data_min = np.min(data[valid_mask])
67
+ data_max = np.max(data[valid_mask])
68
+
69
+ # 防止 scale 为 0
70
+ if data_max == data_min:
71
+ scale_factor = 1.0
72
+ add_offset = data_min
73
+ else:
74
+ # 使用数据中心点作为offset
75
+ add_offset = (data_max + data_min) / 2.0
76
+
77
+ # 计算数据范围相对于中心点的最大偏移
78
+ max_deviation = max(abs(data_max - add_offset), abs(data_min - add_offset))
79
+
80
+ # 可用的整数范围(为填充值保留最小值)
81
+ available_range = min(abs(clip_min + 1), abs(clip_max))
82
+ scale_factor = max_deviation / available_range
83
+
84
+ return scale_factor, add_offset
85
+
86
+
87
+ def _data_to_scale_offset(data, scale, offset, dtype="int32"):
88
+ """
89
+ 将数据应用 scale 和 offset 转换,转换为整型以实现压缩。
90
+ NaN、inf 和掩码值将被转换为指定数据类型的最小值作为填充值。
91
+
92
+ 转换公式:scaled_value = (original_value - add_offset) / scale_factor
93
+ 返回整型数组,用最小值表示无效数据
94
+ """
95
+ if not isinstance(data, np.ndarray):
96
+ raise ValueError("Input data must be a NumPy array.")
97
+
98
+ np_dtype, clip_min, clip_max = _get_dtype_info(dtype)
99
+ fill_value = clip_min # 使用数据类型的最小值作为填充值
100
+
101
+ # 创建输出数组,初始化为填充值
102
+ result = np.full(data.shape, fill_value, dtype=np_dtype)
103
+
104
+ # 只对有限值进行转换
105
+ valid_mask = np.isfinite(data)
106
+
107
+ # 对于掩码数组,排除掩码区域
108
+ if hasattr(data, "mask") and np.ma.is_masked(data):
109
+ valid_mask &= ~data.mask
110
+
111
+ if np.any(valid_mask):
112
+ # 进行scale/offset转换
113
+ scaled = (data[valid_mask] - offset) / scale
114
+ # 四舍五入并转换为目标整型,同时确保在有效范围内
115
+ scaled_int = np.round(scaled).astype(np_dtype)
116
+ # 由于我们使用了最小值作为填充值,所以有效数据范围是 [clip_min+1, clip_max]
117
+ scaled_int = np.clip(scaled_int, clip_min + 1, clip_max)
118
+ result[valid_mask] = scaled_int
119
+
120
+ return result, fill_value
121
+
122
+
123
+ def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='int16', scale_offset_switch=True, compile_switch=True, preserve_mask_values=True, missing_value=None):
124
+ """
125
+ 保存数据到 NetCDF 文件,支持 xarray 对象(DataArray 或 Dataset)和 numpy 数组。
126
+
127
+ 仅对数据变量中数值型数据进行压缩转换(利用 scale_factor/add_offset 转换后转为指定整数类型),
128
+ 非数值型数据以及所有坐标变量将禁用任何压缩,直接保存原始数据。
129
+
130
+ 简化处理:所有特殊值(missing_value、掩码、无穷值等)统一转换为NaN处理。
131
+
132
+ 参数:
133
+ - file: 保存文件的路径
134
+ - data: xarray.DataArray、xarray.Dataset 或 numpy 数组
135
+ - varname: 变量名(仅适用于传入 numpy 数组或 DataArray 时)
136
+ - coords: 坐标字典(numpy 数组分支时使用),所有坐标变量均不压缩
137
+ - mode: "w"(覆盖)或 "a"(追加)
138
+ - convert_dtype: 转换为的数值类型("int8", "int16", "int32", "int64"),默认为 "int32"
139
+ - scale_offset_switch: 是否对数值型数据变量进行压缩转换
140
+ - compile_switch: 是否启用 NetCDF4 的 zlib 压缩(仅针对数值型数据有效)
141
+ - preserve_mask_values: 是否保留掩码区域的原始值(True)或将其替换为缺省值(False)
142
+ - missing_value: 自定义缺失值,将被替换为 NaN
143
+ """
144
+ if convert_dtype not in ["int8", "int16", "int32", "int64"]:
145
+ convert_dtype = "int32"
146
+ nc_dtype = _numpy_to_nc_type(convert_dtype)
147
+
148
+ # ----------------------------------------------------------------------------
149
+ # 处理 xarray 对象(DataArray 或 Dataset)
150
+ if isinstance(data, (xr.DataArray, xr.Dataset)):
151
+ encoding = {}
152
+ if isinstance(data, xr.DataArray):
153
+ if data.name is None:
154
+ data = data.rename("data")
155
+ varname = data.name if varname is None else varname
156
+ arr = np.array(data.values)
157
+ data_missing_val = data.attrs.get("missing_value", None)
158
+
159
+ valid_mask = np.ones(arr.shape, dtype=bool)
160
+ if arr.dtype.kind in ["f", "i", "u"]:
161
+ valid_mask = np.isfinite(arr)
162
+ if data_missing_val is not None:
163
+ valid_mask &= arr != data_missing_val
164
+ if hasattr(arr, "mask"):
165
+ valid_mask &= ~arr.mask
166
+
167
+ if np.issubdtype(arr.dtype, np.number) and scale_offset_switch:
168
+ # 确保有有效数据用于计算scale/offset
169
+ if not np.any(valid_mask):
170
+ # 如果没有有效数据,不进行压缩转换
171
+ for k in ["_FillValue", "missing_value"]:
172
+ if k in data.attrs:
173
+ del data.attrs[k]
174
+ data.to_dataset(name=varname).to_netcdf(file, mode=mode)
175
+ return
176
+
177
+ arr_valid = arr[valid_mask]
178
+ scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
179
+
180
+ # 创建需要转换的数据副本,但不修改特殊值
181
+ arr_to_save = arr.copy()
182
+
183
+ # 只处理自定义缺失值,转换为NaN(让后面统一处理)
184
+ if data_missing_val is not None:
185
+ arr_to_save[arr == data_missing_val] = np.nan
186
+
187
+ # 进行压缩转换(_data_to_scale_offset会正确处理NaN和掩码)
188
+ new_values, fill_value = _data_to_scale_offset(arr_to_save, scale, offset, convert_dtype)
189
+ new_da = data.copy(data=new_values)
190
+
191
+ # 清除原有的填充值属性,设置新的压缩属性
192
+ for k in ["_FillValue", "missing_value"]:
193
+ if k in new_da.attrs:
194
+ del new_da.attrs[k]
195
+
196
+ new_da.attrs["scale_factor"] = float(scale)
197
+ new_da.attrs["add_offset"] = float(offset)
198
+
199
+ encoding[varname] = {
200
+ "zlib": compile_switch,
201
+ "complevel": 4,
202
+ "dtype": nc_dtype,
203
+ "_FillValue": fill_value, # 使用计算出的填充值
204
+ }
205
+ new_da.to_dataset(name=varname).to_netcdf(file, mode=mode, encoding=encoding)
206
+ else:
207
+ # 对于非数值数据或不压缩的情况,移除填充值属性防止冲突
208
+ for k in ["_FillValue", "missing_value"]:
209
+ if k in data.attrs:
210
+ del data.attrs[k]
211
+ data.to_dataset(name=varname).to_netcdf(file, mode=mode)
212
+ return
213
+
214
+ else: # Dataset 情况
215
+ new_vars = {}
216
+ encoding = {}
217
+ for var in data.data_vars:
218
+ da = data[var]
219
+ arr = np.array(da.values)
220
+ data_missing_val = da.attrs.get("missing_value", None)
221
+
222
+ valid_mask = np.ones(arr.shape, dtype=bool)
223
+ if arr.dtype.kind in ["f", "i", "u"]:
224
+ valid_mask = np.isfinite(arr)
225
+ if data_missing_val is not None:
226
+ valid_mask &= arr != data_missing_val
227
+ if hasattr(arr, "mask"):
228
+ valid_mask &= ~arr.mask
229
+
230
+ attrs = da.attrs.copy()
231
+ for k in ["_FillValue", "missing_value"]:
232
+ if k in attrs:
233
+ del attrs[k]
234
+
235
+ if np.issubdtype(arr.dtype, np.number) and scale_offset_switch:
236
+ # 处理边缘情况:检查是否有有效数据
237
+ if not np.any(valid_mask):
238
+ # 如果没有有效数据,创建一个简单的拷贝,不做转换
239
+ new_vars[var] = xr.DataArray(arr, dims=da.dims, coords=da.coords, attrs=attrs)
240
+ continue
241
+
242
+ arr_valid = arr[valid_mask]
243
+ scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
244
+ arr_to_save = arr.copy()
245
+
246
+ # 只处理自定义缺失值,转换为NaN(让后面统一处理)
247
+ if data_missing_val is not None:
248
+ arr_to_save[arr == data_missing_val] = np.nan
249
+
250
+ # 进行压缩转换(_data_to_scale_offset会正确处理NaN和掩码)
251
+ new_values, fill_value = _data_to_scale_offset(arr_to_save, scale, offset, convert_dtype)
252
+ new_da = xr.DataArray(new_values, dims=da.dims, coords=da.coords, attrs=attrs)
253
+ new_da.attrs["scale_factor"] = float(scale)
254
+ new_da.attrs["add_offset"] = float(offset)
255
+ new_vars[var] = new_da
256
+ encoding[var] = {
257
+ "zlib": compile_switch,
258
+ "complevel": 4,
259
+ "dtype": nc_dtype,
260
+ "_FillValue": fill_value, # 使用计算出的填充值
261
+ }
262
+ else:
263
+ new_vars[var] = xr.DataArray(arr, dims=da.dims, coords=da.coords, attrs=attrs)
264
+
265
+ # 确保坐标变量被正确复制
266
+ new_ds = xr.Dataset(new_vars, coords=data.coords.copy())
267
+ new_ds.to_netcdf(file, mode=mode, encoding=encoding if encoding else None)
268
+ return
269
+
270
+ # 处理纯 numpy 数组情况
271
+ if mode == "w" and os.path.exists(file):
272
+ os.remove(file)
273
+ elif mode == "a" and not os.path.exists(file):
274
+ mode = "w"
275
+ data = np.asarray(data)
276
+ is_numeric = np.issubdtype(data.dtype, np.number)
277
+
278
+ # 处理缺失值
279
+ if hasattr(data, "mask") and np.ma.is_masked(data):
280
+ # 处理掩码数组,获取缺失值
281
+ data = data.data
282
+ if missing_value is None:
283
+ missing_value = getattr(data, "missing_value", None)
284
+
285
+ try:
286
+ with nc.Dataset(file, mode, format="NETCDF4") as ncfile:
287
+ if coords is not None:
288
+ for dim, values in coords.items():
289
+ if dim not in ncfile.dimensions:
290
+ ncfile.createDimension(dim, len(values))
291
+ var_obj = ncfile.createVariable(dim, _numpy_to_nc_type(np.asarray(values).dtype), (dim,))
292
+ var_obj[:] = values
293
+
294
+ dims = list(coords.keys()) if coords else []
295
+ if is_numeric and scale_offset_switch:
296
+ arr = np.array(data)
297
+
298
+ # 构建有效掩码,但不排除掩码区域的数值(如果 preserve_mask_values 为 True)
299
+ valid_mask = np.isfinite(arr) # 排除 NaN 和无限值
300
+ if missing_value is not None:
301
+ valid_mask &= arr != missing_value # 排除明确的缺失值
302
+
303
+ # 如果不保留掩码区域的值,则将掩码区域视为无效
304
+ if not preserve_mask_values and hasattr(arr, "mask"):
305
+ valid_mask &= ~arr.mask
306
+
307
+ arr_to_save = arr.copy()
308
+
309
+ # 确保有有效数据
310
+ if not np.any(valid_mask):
311
+ # 如果没有有效数据,不进行压缩,直接保存原始数据类型
312
+ dtype = _numpy_to_nc_type(data.dtype)
313
+ var = ncfile.createVariable(varname, dtype, dims, zlib=False)
314
+ # 确保没有 NaN,直接用0替换
315
+ clean_data = np.nan_to_num(data, nan=0.0)
316
+ var[:] = clean_data
317
+ return # 计算 scale 和 offset 仅使用有效区域数据
318
+ arr_valid = arr_to_save[valid_mask]
319
+ scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
320
+
321
+ # 只处理自定义缺失值,转换为NaN
322
+ if missing_value is not None:
323
+ arr_to_save[arr == missing_value] = np.nan
324
+
325
+ # 执行压缩转换(_data_to_scale_offset会正确处理NaN和掩码)
326
+ new_data, fill_value = _data_to_scale_offset(arr_to_save, scale, offset, convert_dtype)
327
+
328
+ # 创建变量并设置属性
329
+ var = ncfile.createVariable(varname, nc_dtype, dims, zlib=compile_switch, fill_value=fill_value)
330
+ var.scale_factor = scale
331
+ var.add_offset = offset
332
+ var[:] = new_data
333
+ else:
334
+ # 非压缩情况,直接保存但要处理特殊值
335
+ dtype = _numpy_to_nc_type(data.dtype)
336
+
337
+ clean_data = data.copy()
338
+
339
+ # 处理自定义缺失值(转换为NaN)
340
+ if missing_value is not None:
341
+ clean_data[data == missing_value] = np.nan
342
+
343
+ # 对于整数类型,处理NaN和无穷值 - 用0替换
344
+ if not np.issubdtype(data.dtype, np.floating):
345
+ finite_mask = np.isfinite(clean_data)
346
+ if not np.all(finite_mask):
347
+ clean_data = clean_data.astype(float) # 转换为浮点型保持NaN
348
+
349
+ # 处理掩码(统一转换为NaN)
350
+ if hasattr(data, "mask") and np.ma.is_masked(data):
351
+ clean_data[data.mask] = np.nan
352
+
353
+ # 创建变量
354
+ var = ncfile.createVariable(varname, dtype, dims, zlib=False)
355
+ var[:] = clean_data
356
+ # 只对压缩数据调用_nan_to_fillvalue,处理掩码但保持NaN
357
+ if is_numeric and scale_offset_switch:
358
+ pass # 简化策略:不再需要后处理
359
+ except Exception as e:
360
+ raise RuntimeError(f"netCDF4 保存失败: {str(e)}") from e
361
+
362
+
363
+
364
+
365
+ # 测试用例
366
+ if __name__ == "__main__":
367
+ # 示例文件路径,需根据实际情况修改
368
+ file = "dataset_test.nc"
369
+ ds = xr.open_dataset(file)
370
+ outfile = "dataset_test_compressed.nc"
371
+ save_to_nc(outfile, ds)
372
+ ds.close()
373
+
374
+ # dataarray
375
+ data = np.random.rand(4, 3, 2)
376
+ coords = {"x": np.arange(4), "y": np.arange(3), "z": np.arange(2)}
377
+ varname = "test_var"
378
+ data = xr.DataArray(data, dims=("x", "y", "z"), coords=coords, name=varname)
379
+ outfile = "test_dataarray.nc"
380
+ save_to_nc(outfile, data)
381
+
382
+ # numpy array with custom missing value
383
+ coords = {"dim0": np.arange(5)}
384
+ data = np.array([1, 2, -999, 4, np.nan])
385
+ save_to_nc("test_numpy_missing.nc", data, varname="data", coords=coords, missing_value=-999)