oafuncs 0.0.98.30__tar.gz → 0.0.98.55__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oafuncs-0.0.98.30/oafuncs.egg-info → oafuncs-0.0.98.55}/PKG-INFO +10 -3
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/README.md +8 -2
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/__init__.py +5 -1
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/cprogressbar.py +5 -2
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/data_interp.py +0 -15
- oafuncs-0.0.98.55/oafuncs/_script/email.py +48 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/netcdf_modify.py +0 -15
- oafuncs-0.0.98.55/oafuncs/_script/netcdf_write.py +385 -0
- oafuncs-0.0.98.55/oafuncs/_script/parallel.py +269 -0
- oafuncs-0.0.98.30/oafuncs/_script/parallel.py → oafuncs-0.0.98.55/oafuncs/_script/parallel_bak.py +1 -1
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/plot_dataset.py +9 -4
- oafuncs-0.0.98.55/oafuncs/_script/process_roms.py +620 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/replace_file_content.py +2 -17
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_cmap.py +27 -2
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_data.py +3 -45
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_date.py +58 -54
- oafuncs-0.0.98.55/oafuncs/oa_down/__init__.py +8 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/hycom_3hourly.py +30 -7
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/idm.py +0 -17
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/literature.py +141 -55
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/read_proxy.py +0 -15
- oafuncs-0.0.98.55/oafuncs/oa_down/user_agent.py +17 -0
- oafuncs-0.0.98.55/oafuncs/oa_draw.py +461 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_file.py +44 -21
- oafuncs-0.0.98.55/oafuncs/oa_geo.py +198 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_help.py +0 -15
- oafuncs-0.0.98.55/oafuncs/oa_linux.py +193 -0
- oafuncs-0.0.98.55/oafuncs/oa_model/roms.py +42 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_nc.py +38 -10
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_python.py +0 -15
- oafuncs-0.0.98.55/oafuncs/oa_sign/__init__.py +6 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_sign/meteorological.py +0 -16
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_sign/ocean.py +0 -16
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_sign/scientific.py +0 -16
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_tool.py +5 -5
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55/oafuncs.egg-info}/PKG-INFO +10 -3
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs.egg-info/SOURCES.txt +5 -4
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs.egg-info/requires.txt +1 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/setup.py +9 -4
- oafuncs-0.0.98.30/oafuncs/_script/email.py +0 -116
- oafuncs-0.0.98.30/oafuncs/_script/netcdf_write.py +0 -467
- oafuncs-0.0.98.30/oafuncs/_script/parallel_test.py +0 -14
- oafuncs-0.0.98.30/oafuncs/oa_down/__init__.py +0 -22
- oafuncs-0.0.98.30/oafuncs/oa_down/hycom_3hourly_proxy.py +0 -1230
- oafuncs-0.0.98.30/oafuncs/oa_down/user_agent.py +0 -31
- oafuncs-0.0.98.30/oafuncs/oa_draw.py +0 -401
- oafuncs-0.0.98.30/oafuncs/oa_model/roms/__init__.py +0 -20
- oafuncs-0.0.98.30/oafuncs/oa_model/roms/test.py +0 -19
- oafuncs-0.0.98.30/oafuncs/oa_sign/__init__.py +0 -21
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/LICENSE.txt +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/MANIFEST.in +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_data/hycom.png +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_data/oafuncs.png +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/_script/netcdf_merge.py +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/User_Agent-list.txt +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_down/test_ua.py +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_model/__init__.py +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_model/wrf/__init__.py +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs/oa_model/wrf/little_r.py +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs.egg-info/dependency_links.txt +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/oafuncs.egg-info/top_level.txt +0 -0
- {oafuncs-0.0.98.30 → oafuncs-0.0.98.55}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: oafuncs
|
|
3
|
-
Version: 0.0.98.
|
|
3
|
+
Version: 0.0.98.55
|
|
4
4
|
Summary: Oceanic and Atmospheric Functions
|
|
5
5
|
Home-page: https://github.com/Industry-Pays/OAFuncs
|
|
6
6
|
Author: Kun Liu
|
|
@@ -24,6 +24,7 @@ Requires-Dist: pandas
|
|
|
24
24
|
Requires-Dist: xarray
|
|
25
25
|
Requires-Dist: rich
|
|
26
26
|
Requires-Dist: pathlib
|
|
27
|
+
Requires-Dist: lxml
|
|
27
28
|
Requires-Dist: requests
|
|
28
29
|
Requires-Dist: bs4
|
|
29
30
|
Requires-Dist: httpx
|
|
@@ -64,8 +65,6 @@ Just for the convenience of daily use, some complex operations are integrated in
|
|
|
64
65
|
The code will be optimized and updated from time to time, with additions, deletions, or modifications…
|
|
65
66
|
|
|
66
67
|
Existing functions will not be completely removed, they might just have a different function name, or the parameter passing might have been optimized…
|
|
67
|
-
|
|
68
|
-
Note: If there are any requirements, you can email to liukun0312@stu.ouc.edu.cn. Within my capabilities, I can consider implementing them.
|
|
69
68
|
```
|
|
70
69
|
|
|
71
70
|
## PyPI
|
|
@@ -80,6 +79,11 @@ https://pypi.org/project/oafuncs
|
|
|
80
79
|
https://github.com/Industry-Pays/OAFuncs
|
|
81
80
|
```
|
|
82
81
|
|
|
82
|
+
## Download information
|
|
83
|
+
```html
|
|
84
|
+
https://pypistats.org/packages/oafuncs
|
|
85
|
+
```
|
|
86
|
+
|
|
83
87
|
## Example
|
|
84
88
|
|
|
85
89
|
```python
|
|
@@ -181,3 +185,6 @@ query()
|
|
|
181
185
|
<img title="" src="./oafuncs/data_store/OAFuncs.png" alt="">
|
|
182
186
|
|
|
183
187
|
<img title="OAFuncs" src="https://raw.githubusercontent.com/Industry-Pays/OAFuncs/main/oafuncs/_data/oafuncs.png" alt="OAFuncs">
|
|
188
|
+
|
|
189
|
+
## Wiki
|
|
190
|
+
更多内容,查看[wiki_old](https://opendeep.wiki/Industry-Pays/OAFuncs/introduction) or [wiki_new](https://deepwiki.com/Industry-Pays/OAFuncs)
|
|
@@ -12,8 +12,6 @@ Just for the convenience of daily use, some complex operations are integrated in
|
|
|
12
12
|
The code will be optimized and updated from time to time, with additions, deletions, or modifications…
|
|
13
13
|
|
|
14
14
|
Existing functions will not be completely removed, they might just have a different function name, or the parameter passing might have been optimized…
|
|
15
|
-
|
|
16
|
-
Note: If there are any requirements, you can email to liukun0312@stu.ouc.edu.cn. Within my capabilities, I can consider implementing them.
|
|
17
15
|
```
|
|
18
16
|
|
|
19
17
|
## PyPI
|
|
@@ -28,6 +26,11 @@ https://pypi.org/project/oafuncs
|
|
|
28
26
|
https://github.com/Industry-Pays/OAFuncs
|
|
29
27
|
```
|
|
30
28
|
|
|
29
|
+
## Download information
|
|
30
|
+
```html
|
|
31
|
+
https://pypistats.org/packages/oafuncs
|
|
32
|
+
```
|
|
33
|
+
|
|
31
34
|
## Example
|
|
32
35
|
|
|
33
36
|
```python
|
|
@@ -129,3 +132,6 @@ query()
|
|
|
129
132
|
<img title="" src="./oafuncs/data_store/OAFuncs.png" alt="">
|
|
130
133
|
|
|
131
134
|
<img title="OAFuncs" src="https://raw.githubusercontent.com/Industry-Pays/OAFuncs/main/oafuncs/_data/oafuncs.png" alt="OAFuncs">
|
|
135
|
+
|
|
136
|
+
## Wiki
|
|
137
|
+
更多内容,查看[wiki_old](https://opendeep.wiki/Industry-Pays/OAFuncs/introduction) or [wiki_new](https://deepwiki.com/Industry-Pays/OAFuncs)
|
|
@@ -40,4 +40,8 @@ from .oa_tool import *
|
|
|
40
40
|
# from ._script import *
|
|
41
41
|
# ------------------- 2025-03-16 15:56:01 -------------------
|
|
42
42
|
from .oa_date import *
|
|
43
|
-
# ------------------- 2025-03-27 16:56:57 -------------------
|
|
43
|
+
# ------------------- 2025-03-27 16:56:57 -------------------
|
|
44
|
+
from .oa_geo import *
|
|
45
|
+
# ------------------- 2025-09-04 14:08:26 -------------------
|
|
46
|
+
from .oa_linux import *
|
|
47
|
+
# ------------------- 2025-09-14 12:30:00 -------------------
|
|
@@ -189,7 +189,8 @@ class ColorProgressBar:
|
|
|
189
189
|
self._is_jupyter = "ipykernel" in sys.modules
|
|
190
190
|
|
|
191
191
|
# 输出样式
|
|
192
|
-
filled_list = ["▊", "█", "▓", "▒", "░", "#", "=", ">", "▌", "▍", "▎", "▏", "*"]
|
|
192
|
+
# filled_list = ["▊", "█", "▓", "▒", "░", "#", "=", ">", "▌", "▍", "▎", "▏", "*"]
|
|
193
|
+
filled_list = ["█", "▓", "▒", "░", "#", "=", ">", "*"]
|
|
193
194
|
self.filled = random.choice(filled_list)
|
|
194
195
|
|
|
195
196
|
def _generate_gradient(self) -> Optional[List[str]]:
|
|
@@ -300,11 +301,13 @@ class ColorProgressBar:
|
|
|
300
301
|
# 获取终端宽度
|
|
301
302
|
try:
|
|
302
303
|
term_width = self.bar_length or (shutil.get_terminal_size().columns if self._is_terminal else 80)
|
|
304
|
+
# print(f'Terminal width: {term_width}') # 调试输出
|
|
303
305
|
except (AttributeError, OSError):
|
|
304
306
|
term_width = 80 # 默认终端宽度
|
|
305
307
|
|
|
306
308
|
# 确保有效宽度不小于最低限制
|
|
307
|
-
effective_width = max(15, term_width - 40)
|
|
309
|
+
# effective_width = max(15, term_width - 40)
|
|
310
|
+
effective_width = max(15, int(term_width * 0.6)) # 保留40个字符用于其他信息
|
|
308
311
|
if effective_width < 10:
|
|
309
312
|
warnings.warn("Terminal width is too small for proper progress bar rendering.")
|
|
310
313
|
effective_width = 10 # 设置最低宽度限制
|
|
@@ -1,18 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# coding=utf-8
|
|
3
|
-
"""
|
|
4
|
-
Author: Liu Kun && 16031215@qq.com
|
|
5
|
-
Date: 2025-04-25 16:22:52
|
|
6
|
-
LastEditors: Liu Kun && 16031215@qq.com
|
|
7
|
-
LastEditTime: 2025-04-26 19:21:31
|
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\_script\\data_interp.py
|
|
9
|
-
Description:
|
|
10
|
-
EditPlatform: vscode
|
|
11
|
-
ComputerInfo: XPS 15 9510
|
|
12
|
-
SystemInfo: Windows 11
|
|
13
|
-
Python Version: 3.12
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
1
|
from typing import List, Union
|
|
17
2
|
|
|
18
3
|
import numpy as np
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from rich import print
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def _send_message(msg_from, password, msg_to, title, content):
|
|
6
|
+
from email.header import Header
|
|
7
|
+
from email.mime.multipart import MIMEMultipart
|
|
8
|
+
from email.mime.text import MIMEText
|
|
9
|
+
import smtplib
|
|
10
|
+
# 1. 连接邮箱服务器
|
|
11
|
+
con = smtplib.SMTP_SSL("smtp.qq.com", 465)
|
|
12
|
+
|
|
13
|
+
# 2. 登录邮箱
|
|
14
|
+
# msg_from, password = _email_info()
|
|
15
|
+
# con.login(msg_from, _decode_password(password))
|
|
16
|
+
con.login(msg_from, password)
|
|
17
|
+
|
|
18
|
+
# 3. 准备数据
|
|
19
|
+
# 创建邮件对象
|
|
20
|
+
msg = MIMEMultipart()
|
|
21
|
+
|
|
22
|
+
# 设置邮件主题
|
|
23
|
+
subject = Header(title, "utf-8").encode()
|
|
24
|
+
msg["Subject"] = subject
|
|
25
|
+
|
|
26
|
+
# 设置邮件发送者
|
|
27
|
+
msg["From"] = msg_from
|
|
28
|
+
|
|
29
|
+
# 设置邮件接受者
|
|
30
|
+
msg["To"] = msg_to
|
|
31
|
+
|
|
32
|
+
# or
|
|
33
|
+
# content = '发送内容'
|
|
34
|
+
msg.attach(MIMEText(content, "plain", "utf-8"))
|
|
35
|
+
|
|
36
|
+
# 4.发送邮件
|
|
37
|
+
con.sendmail(msg_from, msg_to, msg.as_string())
|
|
38
|
+
con.quit()
|
|
39
|
+
|
|
40
|
+
print(f"已通过{msg_from}成功向{msg_to}发送邮件!")
|
|
41
|
+
print("发送内容为:\n{}\n\n".format(content))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if __name__ == "__main__":
|
|
48
|
+
pass
|
|
@@ -1,18 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# coding=utf-8
|
|
3
|
-
"""
|
|
4
|
-
Author: Liu Kun && 16031215@qq.com
|
|
5
|
-
Date: 2025-04-05 14:00:50
|
|
6
|
-
LastEditors: Liu Kun && 16031215@qq.com
|
|
7
|
-
LastEditTime: 2025-04-05 14:00:50
|
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\_script\\netcdf_modify.py
|
|
9
|
-
Description:
|
|
10
|
-
EditPlatform: vscode
|
|
11
|
-
ComputerInfo: XPS 15 9510
|
|
12
|
-
SystemInfo: Windows 11
|
|
13
|
-
Python Version: 3.12
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
1
|
import os
|
|
17
2
|
|
|
18
3
|
import netCDF4 as nc
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import warnings
|
|
3
|
+
import numpy as np
|
|
4
|
+
import xarray as xr
|
|
5
|
+
import netCDF4 as nc
|
|
6
|
+
|
|
7
|
+
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
|
8
|
+
|
|
9
|
+
def _get_dtype_info(dtype):
|
|
10
|
+
"""
|
|
11
|
+
根据输入的 dtype 返回其 numpy_type, clip_min, clip_max。
|
|
12
|
+
支持 int8, int16, int32, int64 四种整数类型。
|
|
13
|
+
简化处理:不使用fill_value,所有特殊值统一为NaN。
|
|
14
|
+
使用完整的数据类型范围,不预留填充值空间。
|
|
15
|
+
"""
|
|
16
|
+
dtype_map = {
|
|
17
|
+
"int8": (np.int8, np.iinfo(np.int8).min, np.iinfo(np.int8).max),
|
|
18
|
+
"int16": (np.int16, np.iinfo(np.int16).min, np.iinfo(np.int16).max),
|
|
19
|
+
"int32": (np.int32, np.iinfo(np.int32).min, np.iinfo(np.int32).max),
|
|
20
|
+
"int64": (np.int64, np.iinfo(np.int64).min, np.iinfo(np.int64).max),
|
|
21
|
+
}
|
|
22
|
+
if dtype not in dtype_map:
|
|
23
|
+
raise ValueError(f"Unsupported dtype: {dtype}. Supported types are 'int8', 'int16', 'int32', and 'int64'.")
|
|
24
|
+
|
|
25
|
+
return dtype_map[dtype]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _numpy_to_nc_type(numpy_type):
|
|
29
|
+
"""将 NumPy 数据类型映射到 NetCDF 数据类型"""
|
|
30
|
+
numpy_to_nc = {
|
|
31
|
+
"float32": "f4",
|
|
32
|
+
"float64": "f8",
|
|
33
|
+
"int8": "i1",
|
|
34
|
+
"int16": "i2",
|
|
35
|
+
"int32": "i4",
|
|
36
|
+
"int64": "i8",
|
|
37
|
+
"uint8": "u1",
|
|
38
|
+
"uint16": "u2",
|
|
39
|
+
"uint32": "u4",
|
|
40
|
+
"uint64": "u8",
|
|
41
|
+
}
|
|
42
|
+
numpy_type_str = str(numpy_type) if not isinstance(numpy_type, str) else numpy_type
|
|
43
|
+
return numpy_to_nc.get(numpy_type_str, "f4")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _calculate_scale_and_offset(data, dtype="int32"):
|
|
47
|
+
"""
|
|
48
|
+
只对有效数据(非NaN、非无穷值、非自定义缺失值)计算scale_factor和add_offset。
|
|
49
|
+
为填充值保留最小值位置,有效数据范围为 [clip_min+1, clip_max]。
|
|
50
|
+
"""
|
|
51
|
+
if not isinstance(data, np.ndarray):
|
|
52
|
+
raise ValueError("Input data must be a NumPy array.")
|
|
53
|
+
|
|
54
|
+
np_dtype, clip_min, clip_max = _get_dtype_info(dtype)
|
|
55
|
+
|
|
56
|
+
# 创建有效数据掩码,只排除NaN和无穷值
|
|
57
|
+
valid_mask = np.isfinite(data)
|
|
58
|
+
if hasattr(data, "mask") and np.ma.is_masked(data):
|
|
59
|
+
valid_mask &= ~data.mask
|
|
60
|
+
|
|
61
|
+
# 如果没有有效数据,返回默认值
|
|
62
|
+
if not np.any(valid_mask):
|
|
63
|
+
return 1.0, 0.0
|
|
64
|
+
|
|
65
|
+
# 基于有效数据计算最小值和最大值
|
|
66
|
+
data_min = np.min(data[valid_mask])
|
|
67
|
+
data_max = np.max(data[valid_mask])
|
|
68
|
+
|
|
69
|
+
# 防止 scale 为 0
|
|
70
|
+
if data_max == data_min:
|
|
71
|
+
scale_factor = 1.0
|
|
72
|
+
add_offset = data_min
|
|
73
|
+
else:
|
|
74
|
+
# 使用数据中心点作为offset
|
|
75
|
+
add_offset = (data_max + data_min) / 2.0
|
|
76
|
+
|
|
77
|
+
# 计算数据范围相对于中心点的最大偏移
|
|
78
|
+
max_deviation = max(abs(data_max - add_offset), abs(data_min - add_offset))
|
|
79
|
+
|
|
80
|
+
# 可用的整数范围(为填充值保留最小值)
|
|
81
|
+
available_range = min(abs(clip_min + 1), abs(clip_max))
|
|
82
|
+
scale_factor = max_deviation / available_range
|
|
83
|
+
|
|
84
|
+
return scale_factor, add_offset
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _data_to_scale_offset(data, scale, offset, dtype="int32"):
|
|
88
|
+
"""
|
|
89
|
+
将数据应用 scale 和 offset 转换,转换为整型以实现压缩。
|
|
90
|
+
NaN、inf 和掩码值将被转换为指定数据类型的最小值作为填充值。
|
|
91
|
+
|
|
92
|
+
转换公式:scaled_value = (original_value - add_offset) / scale_factor
|
|
93
|
+
返回整型数组,用最小值表示无效数据
|
|
94
|
+
"""
|
|
95
|
+
if not isinstance(data, np.ndarray):
|
|
96
|
+
raise ValueError("Input data must be a NumPy array.")
|
|
97
|
+
|
|
98
|
+
np_dtype, clip_min, clip_max = _get_dtype_info(dtype)
|
|
99
|
+
fill_value = clip_min # 使用数据类型的最小值作为填充值
|
|
100
|
+
|
|
101
|
+
# 创建输出数组,初始化为填充值
|
|
102
|
+
result = np.full(data.shape, fill_value, dtype=np_dtype)
|
|
103
|
+
|
|
104
|
+
# 只对有限值进行转换
|
|
105
|
+
valid_mask = np.isfinite(data)
|
|
106
|
+
|
|
107
|
+
# 对于掩码数组,排除掩码区域
|
|
108
|
+
if hasattr(data, "mask") and np.ma.is_masked(data):
|
|
109
|
+
valid_mask &= ~data.mask
|
|
110
|
+
|
|
111
|
+
if np.any(valid_mask):
|
|
112
|
+
# 进行scale/offset转换
|
|
113
|
+
scaled = (data[valid_mask] - offset) / scale
|
|
114
|
+
# 四舍五入并转换为目标整型,同时确保在有效范围内
|
|
115
|
+
scaled_int = np.round(scaled).astype(np_dtype)
|
|
116
|
+
# 由于我们使用了最小值作为填充值,所以有效数据范围是 [clip_min+1, clip_max]
|
|
117
|
+
scaled_int = np.clip(scaled_int, clip_min + 1, clip_max)
|
|
118
|
+
result[valid_mask] = scaled_int
|
|
119
|
+
|
|
120
|
+
return result, fill_value
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def save_to_nc(file, data, varname=None, coords=None, mode="w", convert_dtype='int16', scale_offset_switch=True, compile_switch=True, preserve_mask_values=True, missing_value=None):
|
|
124
|
+
"""
|
|
125
|
+
保存数据到 NetCDF 文件,支持 xarray 对象(DataArray 或 Dataset)和 numpy 数组。
|
|
126
|
+
|
|
127
|
+
仅对数据变量中数值型数据进行压缩转换(利用 scale_factor/add_offset 转换后转为指定整数类型),
|
|
128
|
+
非数值型数据以及所有坐标变量将禁用任何压缩,直接保存原始数据。
|
|
129
|
+
|
|
130
|
+
简化处理:所有特殊值(missing_value、掩码、无穷值等)统一转换为NaN处理。
|
|
131
|
+
|
|
132
|
+
参数:
|
|
133
|
+
- file: 保存文件的路径
|
|
134
|
+
- data: xarray.DataArray、xarray.Dataset 或 numpy 数组
|
|
135
|
+
- varname: 变量名(仅适用于传入 numpy 数组或 DataArray 时)
|
|
136
|
+
- coords: 坐标字典(numpy 数组分支时使用),所有坐标变量均不压缩
|
|
137
|
+
- mode: "w"(覆盖)或 "a"(追加)
|
|
138
|
+
- convert_dtype: 转换为的数值类型("int8", "int16", "int32", "int64"),默认为 "int32"
|
|
139
|
+
- scale_offset_switch: 是否对数值型数据变量进行压缩转换
|
|
140
|
+
- compile_switch: 是否启用 NetCDF4 的 zlib 压缩(仅针对数值型数据有效)
|
|
141
|
+
- preserve_mask_values: 是否保留掩码区域的原始值(True)或将其替换为缺省值(False)
|
|
142
|
+
- missing_value: 自定义缺失值,将被替换为 NaN
|
|
143
|
+
"""
|
|
144
|
+
if convert_dtype not in ["int8", "int16", "int32", "int64"]:
|
|
145
|
+
convert_dtype = "int32"
|
|
146
|
+
nc_dtype = _numpy_to_nc_type(convert_dtype)
|
|
147
|
+
|
|
148
|
+
# ----------------------------------------------------------------------------
|
|
149
|
+
# 处理 xarray 对象(DataArray 或 Dataset)
|
|
150
|
+
if isinstance(data, (xr.DataArray, xr.Dataset)):
|
|
151
|
+
encoding = {}
|
|
152
|
+
if isinstance(data, xr.DataArray):
|
|
153
|
+
if data.name is None:
|
|
154
|
+
data = data.rename("data")
|
|
155
|
+
varname = data.name if varname is None else varname
|
|
156
|
+
arr = np.array(data.values)
|
|
157
|
+
data_missing_val = data.attrs.get("missing_value", None)
|
|
158
|
+
|
|
159
|
+
valid_mask = np.ones(arr.shape, dtype=bool)
|
|
160
|
+
if arr.dtype.kind in ["f", "i", "u"]:
|
|
161
|
+
valid_mask = np.isfinite(arr)
|
|
162
|
+
if data_missing_val is not None:
|
|
163
|
+
valid_mask &= arr != data_missing_val
|
|
164
|
+
if hasattr(arr, "mask"):
|
|
165
|
+
valid_mask &= ~arr.mask
|
|
166
|
+
|
|
167
|
+
if np.issubdtype(arr.dtype, np.number) and scale_offset_switch:
|
|
168
|
+
# 确保有有效数据用于计算scale/offset
|
|
169
|
+
if not np.any(valid_mask):
|
|
170
|
+
# 如果没有有效数据,不进行压缩转换
|
|
171
|
+
for k in ["_FillValue", "missing_value"]:
|
|
172
|
+
if k in data.attrs:
|
|
173
|
+
del data.attrs[k]
|
|
174
|
+
data.to_dataset(name=varname).to_netcdf(file, mode=mode)
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
arr_valid = arr[valid_mask]
|
|
178
|
+
scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
|
|
179
|
+
|
|
180
|
+
# 创建需要转换的数据副本,但不修改特殊值
|
|
181
|
+
arr_to_save = arr.copy()
|
|
182
|
+
|
|
183
|
+
# 只处理自定义缺失值,转换为NaN(让后面统一处理)
|
|
184
|
+
if data_missing_val is not None:
|
|
185
|
+
arr_to_save[arr == data_missing_val] = np.nan
|
|
186
|
+
|
|
187
|
+
# 进行压缩转换(_data_to_scale_offset会正确处理NaN和掩码)
|
|
188
|
+
new_values, fill_value = _data_to_scale_offset(arr_to_save, scale, offset, convert_dtype)
|
|
189
|
+
new_da = data.copy(data=new_values)
|
|
190
|
+
|
|
191
|
+
# 清除原有的填充值属性,设置新的压缩属性
|
|
192
|
+
for k in ["_FillValue", "missing_value"]:
|
|
193
|
+
if k in new_da.attrs:
|
|
194
|
+
del new_da.attrs[k]
|
|
195
|
+
|
|
196
|
+
new_da.attrs["scale_factor"] = float(scale)
|
|
197
|
+
new_da.attrs["add_offset"] = float(offset)
|
|
198
|
+
|
|
199
|
+
encoding[varname] = {
|
|
200
|
+
"zlib": compile_switch,
|
|
201
|
+
"complevel": 4,
|
|
202
|
+
"dtype": nc_dtype,
|
|
203
|
+
"_FillValue": fill_value, # 使用计算出的填充值
|
|
204
|
+
}
|
|
205
|
+
new_da.to_dataset(name=varname).to_netcdf(file, mode=mode, encoding=encoding)
|
|
206
|
+
else:
|
|
207
|
+
# 对于非数值数据或不压缩的情况,移除填充值属性防止冲突
|
|
208
|
+
for k in ["_FillValue", "missing_value"]:
|
|
209
|
+
if k in data.attrs:
|
|
210
|
+
del data.attrs[k]
|
|
211
|
+
data.to_dataset(name=varname).to_netcdf(file, mode=mode)
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
else: # Dataset 情况
|
|
215
|
+
new_vars = {}
|
|
216
|
+
encoding = {}
|
|
217
|
+
for var in data.data_vars:
|
|
218
|
+
da = data[var]
|
|
219
|
+
arr = np.array(da.values)
|
|
220
|
+
data_missing_val = da.attrs.get("missing_value", None)
|
|
221
|
+
|
|
222
|
+
valid_mask = np.ones(arr.shape, dtype=bool)
|
|
223
|
+
if arr.dtype.kind in ["f", "i", "u"]:
|
|
224
|
+
valid_mask = np.isfinite(arr)
|
|
225
|
+
if data_missing_val is not None:
|
|
226
|
+
valid_mask &= arr != data_missing_val
|
|
227
|
+
if hasattr(arr, "mask"):
|
|
228
|
+
valid_mask &= ~arr.mask
|
|
229
|
+
|
|
230
|
+
attrs = da.attrs.copy()
|
|
231
|
+
for k in ["_FillValue", "missing_value"]:
|
|
232
|
+
if k in attrs:
|
|
233
|
+
del attrs[k]
|
|
234
|
+
|
|
235
|
+
if np.issubdtype(arr.dtype, np.number) and scale_offset_switch:
|
|
236
|
+
# 处理边缘情况:检查是否有有效数据
|
|
237
|
+
if not np.any(valid_mask):
|
|
238
|
+
# 如果没有有效数据,创建一个简单的拷贝,不做转换
|
|
239
|
+
new_vars[var] = xr.DataArray(arr, dims=da.dims, coords=da.coords, attrs=attrs)
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
arr_valid = arr[valid_mask]
|
|
243
|
+
scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
|
|
244
|
+
arr_to_save = arr.copy()
|
|
245
|
+
|
|
246
|
+
# 只处理自定义缺失值,转换为NaN(让后面统一处理)
|
|
247
|
+
if data_missing_val is not None:
|
|
248
|
+
arr_to_save[arr == data_missing_val] = np.nan
|
|
249
|
+
|
|
250
|
+
# 进行压缩转换(_data_to_scale_offset会正确处理NaN和掩码)
|
|
251
|
+
new_values, fill_value = _data_to_scale_offset(arr_to_save, scale, offset, convert_dtype)
|
|
252
|
+
new_da = xr.DataArray(new_values, dims=da.dims, coords=da.coords, attrs=attrs)
|
|
253
|
+
new_da.attrs["scale_factor"] = float(scale)
|
|
254
|
+
new_da.attrs["add_offset"] = float(offset)
|
|
255
|
+
new_vars[var] = new_da
|
|
256
|
+
encoding[var] = {
|
|
257
|
+
"zlib": compile_switch,
|
|
258
|
+
"complevel": 4,
|
|
259
|
+
"dtype": nc_dtype,
|
|
260
|
+
"_FillValue": fill_value, # 使用计算出的填充值
|
|
261
|
+
}
|
|
262
|
+
else:
|
|
263
|
+
new_vars[var] = xr.DataArray(arr, dims=da.dims, coords=da.coords, attrs=attrs)
|
|
264
|
+
|
|
265
|
+
# 确保坐标变量被正确复制
|
|
266
|
+
new_ds = xr.Dataset(new_vars, coords=data.coords.copy())
|
|
267
|
+
new_ds.to_netcdf(file, mode=mode, encoding=encoding if encoding else None)
|
|
268
|
+
return
|
|
269
|
+
|
|
270
|
+
# 处理纯 numpy 数组情况
|
|
271
|
+
if mode == "w" and os.path.exists(file):
|
|
272
|
+
os.remove(file)
|
|
273
|
+
elif mode == "a" and not os.path.exists(file):
|
|
274
|
+
mode = "w"
|
|
275
|
+
data = np.asarray(data)
|
|
276
|
+
is_numeric = np.issubdtype(data.dtype, np.number)
|
|
277
|
+
|
|
278
|
+
# 处理缺失值
|
|
279
|
+
if hasattr(data, "mask") and np.ma.is_masked(data):
|
|
280
|
+
# 处理掩码数组,获取缺失值
|
|
281
|
+
data = data.data
|
|
282
|
+
if missing_value is None:
|
|
283
|
+
missing_value = getattr(data, "missing_value", None)
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
with nc.Dataset(file, mode, format="NETCDF4") as ncfile:
|
|
287
|
+
if coords is not None:
|
|
288
|
+
for dim, values in coords.items():
|
|
289
|
+
if dim not in ncfile.dimensions:
|
|
290
|
+
ncfile.createDimension(dim, len(values))
|
|
291
|
+
var_obj = ncfile.createVariable(dim, _numpy_to_nc_type(np.asarray(values).dtype), (dim,))
|
|
292
|
+
var_obj[:] = values
|
|
293
|
+
|
|
294
|
+
dims = list(coords.keys()) if coords else []
|
|
295
|
+
if is_numeric and scale_offset_switch:
|
|
296
|
+
arr = np.array(data)
|
|
297
|
+
|
|
298
|
+
# 构建有效掩码,但不排除掩码区域的数值(如果 preserve_mask_values 为 True)
|
|
299
|
+
valid_mask = np.isfinite(arr) # 排除 NaN 和无限值
|
|
300
|
+
if missing_value is not None:
|
|
301
|
+
valid_mask &= arr != missing_value # 排除明确的缺失值
|
|
302
|
+
|
|
303
|
+
# 如果不保留掩码区域的值,则将掩码区域视为无效
|
|
304
|
+
if not preserve_mask_values and hasattr(arr, "mask"):
|
|
305
|
+
valid_mask &= ~arr.mask
|
|
306
|
+
|
|
307
|
+
arr_to_save = arr.copy()
|
|
308
|
+
|
|
309
|
+
# 确保有有效数据
|
|
310
|
+
if not np.any(valid_mask):
|
|
311
|
+
# 如果没有有效数据,不进行压缩,直接保存原始数据类型
|
|
312
|
+
dtype = _numpy_to_nc_type(data.dtype)
|
|
313
|
+
var = ncfile.createVariable(varname, dtype, dims, zlib=False)
|
|
314
|
+
# 确保没有 NaN,直接用0替换
|
|
315
|
+
clean_data = np.nan_to_num(data, nan=0.0)
|
|
316
|
+
var[:] = clean_data
|
|
317
|
+
return # 计算 scale 和 offset 仅使用有效区域数据
|
|
318
|
+
arr_valid = arr_to_save[valid_mask]
|
|
319
|
+
scale, offset = _calculate_scale_and_offset(arr_valid, convert_dtype)
|
|
320
|
+
|
|
321
|
+
# 只处理自定义缺失值,转换为NaN
|
|
322
|
+
if missing_value is not None:
|
|
323
|
+
arr_to_save[arr == missing_value] = np.nan
|
|
324
|
+
|
|
325
|
+
# 执行压缩转换(_data_to_scale_offset会正确处理NaN和掩码)
|
|
326
|
+
new_data, fill_value = _data_to_scale_offset(arr_to_save, scale, offset, convert_dtype)
|
|
327
|
+
|
|
328
|
+
# 创建变量并设置属性
|
|
329
|
+
var = ncfile.createVariable(varname, nc_dtype, dims, zlib=compile_switch, fill_value=fill_value)
|
|
330
|
+
var.scale_factor = scale
|
|
331
|
+
var.add_offset = offset
|
|
332
|
+
var[:] = new_data
|
|
333
|
+
else:
|
|
334
|
+
# 非压缩情况,直接保存但要处理特殊值
|
|
335
|
+
dtype = _numpy_to_nc_type(data.dtype)
|
|
336
|
+
|
|
337
|
+
clean_data = data.copy()
|
|
338
|
+
|
|
339
|
+
# 处理自定义缺失值(转换为NaN)
|
|
340
|
+
if missing_value is not None:
|
|
341
|
+
clean_data[data == missing_value] = np.nan
|
|
342
|
+
|
|
343
|
+
# 对于整数类型,处理NaN和无穷值 - 用0替换
|
|
344
|
+
if not np.issubdtype(data.dtype, np.floating):
|
|
345
|
+
finite_mask = np.isfinite(clean_data)
|
|
346
|
+
if not np.all(finite_mask):
|
|
347
|
+
clean_data = clean_data.astype(float) # 转换为浮点型保持NaN
|
|
348
|
+
|
|
349
|
+
# 处理掩码(统一转换为NaN)
|
|
350
|
+
if hasattr(data, "mask") and np.ma.is_masked(data):
|
|
351
|
+
clean_data[data.mask] = np.nan
|
|
352
|
+
|
|
353
|
+
# 创建变量
|
|
354
|
+
var = ncfile.createVariable(varname, dtype, dims, zlib=False)
|
|
355
|
+
var[:] = clean_data
|
|
356
|
+
# 只对压缩数据调用_nan_to_fillvalue,处理掩码但保持NaN
|
|
357
|
+
if is_numeric and scale_offset_switch:
|
|
358
|
+
pass # 简化策略:不再需要后处理
|
|
359
|
+
except Exception as e:
|
|
360
|
+
raise RuntimeError(f"netCDF4 保存失败: {str(e)}") from e
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
# 测试用例
|
|
366
|
+
if __name__ == "__main__":
|
|
367
|
+
# 示例文件路径,需根据实际情况修改
|
|
368
|
+
file = "dataset_test.nc"
|
|
369
|
+
ds = xr.open_dataset(file)
|
|
370
|
+
outfile = "dataset_test_compressed.nc"
|
|
371
|
+
save_to_nc(outfile, ds)
|
|
372
|
+
ds.close()
|
|
373
|
+
|
|
374
|
+
# dataarray
|
|
375
|
+
data = np.random.rand(4, 3, 2)
|
|
376
|
+
coords = {"x": np.arange(4), "y": np.arange(3), "z": np.arange(2)}
|
|
377
|
+
varname = "test_var"
|
|
378
|
+
data = xr.DataArray(data, dims=("x", "y", "z"), coords=coords, name=varname)
|
|
379
|
+
outfile = "test_dataarray.nc"
|
|
380
|
+
save_to_nc(outfile, data)
|
|
381
|
+
|
|
382
|
+
# numpy array with custom missing value
|
|
383
|
+
coords = {"dim0": np.arange(5)}
|
|
384
|
+
data = np.array([1, 2, -999, 4, np.nan])
|
|
385
|
+
save_to_nc("test_numpy_missing.nc", data, varname="data", coords=coords, missing_value=-999)
|