oafuncs 0.0.92__py2.py3-none-any.whl → 0.0.94__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_data.py +22 -147
- oafuncs/oa_down/hycom_3hourly.py +92 -101
- oafuncs/oa_down/hycom_3hourly_20250129.py +1307 -0
- oafuncs/oa_down/literature.py +7 -7
- oafuncs/oa_file.py +6 -2
- oafuncs/oa_nc.py +46 -28
- {oafuncs-0.0.92.dist-info → oafuncs-0.0.94.dist-info}/METADATA +1 -1
- {oafuncs-0.0.92.dist-info → oafuncs-0.0.94.dist-info}/RECORD +11 -10
- {oafuncs-0.0.92.dist-info → oafuncs-0.0.94.dist-info}/LICENSE.txt +0 -0
- {oafuncs-0.0.92.dist-info → oafuncs-0.0.94.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.92.dist-info → oafuncs-0.0.94.dist-info}/top_level.txt +0 -0
oafuncs/oa_data.py
CHANGED
@@ -18,11 +18,31 @@ import multiprocessing as mp
|
|
18
18
|
from concurrent.futures import ThreadPoolExecutor
|
19
19
|
|
20
20
|
import numpy as np
|
21
|
-
from rich import print
|
22
21
|
from scipy.interpolate import griddata
|
23
22
|
|
24
23
|
|
25
|
-
__all__ = ["interp_2d"]
|
24
|
+
__all__ = ["interp_2d", "ensure_list"]
|
25
|
+
|
26
|
+
|
27
|
+
def ensure_list(input_data):
|
28
|
+
"""
|
29
|
+
Ensures that the input is converted into a list.
|
30
|
+
|
31
|
+
If the input is already a list, it returns it directly.
|
32
|
+
If the input is a string, it wraps it in a list and returns.
|
33
|
+
For other types of input, it converts them to a string and then wraps in a list.
|
34
|
+
|
35
|
+
:param input_data: The input which can be a list, a string, or any other type.
|
36
|
+
:return: A list containing the input or the string representation of the input.
|
37
|
+
"""
|
38
|
+
if isinstance(input_data, list):
|
39
|
+
return input_data
|
40
|
+
elif isinstance(input_data, str):
|
41
|
+
return [input_data]
|
42
|
+
else:
|
43
|
+
# For non-list and non-string inputs, convert to string and wrap in a list
|
44
|
+
return [str(input_data)]
|
45
|
+
|
26
46
|
|
27
47
|
|
28
48
|
def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", parallel=True):
|
@@ -93,151 +113,6 @@ def interp_2d(target_x, target_y, origin_x, origin_y, data, method="linear", par
|
|
93
113
|
|
94
114
|
|
95
115
|
|
96
|
-
|
97
|
-
|
98
|
-
# ---------------------------------------------------------------------------------- not used below ----------------------------------------------------------------------------------
|
99
|
-
# ** 高维插值函数,插值最后两个维度
|
100
|
-
def interp_2d_20241213(target_x, target_y, origin_x, origin_y, data, method="linear"):
|
101
|
-
"""
|
102
|
-
高维插值函数,默认插值最后两个维度,传输数据前请确保数据的维度正确
|
103
|
-
参数:
|
104
|
-
target_y (array-like): 目标经度网格 1D 或 2D
|
105
|
-
target_x (array-like): 目标纬度网格 1D 或 2D
|
106
|
-
origin_y (array-like): 初始经度网格 1D 或 2D
|
107
|
-
origin_x (array-like): 初始纬度网格 1D 或 2D
|
108
|
-
data (array-like): 数据 (*, lat, lon) 2D, 3D, 4D
|
109
|
-
method (str, optional): 插值方法,可选 'linear', 'nearest', 'cubic' 等,默认为 'linear'
|
110
|
-
返回:
|
111
|
-
array-like: 插值结果
|
112
|
-
"""
|
113
|
-
|
114
|
-
# 确保目标网格和初始网格都是二维的
|
115
|
-
if len(target_y.shape) == 1:
|
116
|
-
target_x, target_y = np.meshgrid(target_x, target_y)
|
117
|
-
if len(origin_y.shape) == 1:
|
118
|
-
origin_x, origin_y = np.meshgrid(origin_x, origin_y)
|
119
|
-
|
120
|
-
dims = data.shape
|
121
|
-
len_dims = len(dims)
|
122
|
-
# print(dims[-2:])
|
123
|
-
# 根据经纬度网格判断输入数据的形状是否匹配
|
124
|
-
|
125
|
-
if origin_x.shape != dims[-2:] or origin_y.shape != dims[-2:]:
|
126
|
-
print(origin_x.shape, dims[-2:])
|
127
|
-
raise ValueError("Shape of data does not match shape of origin_x or origin_y.")
|
128
|
-
|
129
|
-
# 将目标网格展平成一维数组
|
130
|
-
target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
|
131
|
-
|
132
|
-
# 将初始网格展平成一维数组
|
133
|
-
origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
|
134
|
-
|
135
|
-
# 进行插值
|
136
|
-
if len_dims == 2:
|
137
|
-
interpolated_data = griddata(origin_points, np.ravel(data), target_points, method=method)
|
138
|
-
interpolated_data = np.reshape(interpolated_data, target_y.shape)
|
139
|
-
elif len_dims == 3:
|
140
|
-
interpolated_data = []
|
141
|
-
for i in range(dims[0]):
|
142
|
-
dt = griddata(origin_points, np.ravel(data[i, :, :]), target_points, method=method)
|
143
|
-
interpolated_data.append(np.reshape(dt, target_y.shape))
|
144
|
-
print(f"Interpolating {i + 1}/{dims[0]}...")
|
145
|
-
interpolated_data = np.array(interpolated_data)
|
146
|
-
elif len_dims == 4:
|
147
|
-
interpolated_data = []
|
148
|
-
for i in range(dims[0]):
|
149
|
-
interpolated_data.append([])
|
150
|
-
for j in range(dims[1]):
|
151
|
-
dt = griddata(origin_points, np.ravel(data[i, j, :, :]), target_points, method=method)
|
152
|
-
interpolated_data[i].append(np.reshape(dt, target_y.shape))
|
153
|
-
print(f"\rInterpolating {i * dims[1] + j + 1}/{dims[0] * dims[1]}...", end="")
|
154
|
-
print("\n")
|
155
|
-
interpolated_data = np.array(interpolated_data)
|
156
|
-
|
157
|
-
return interpolated_data
|
158
|
-
|
159
|
-
|
160
|
-
# ** 高维插值函数,插值最后两个维度,使用多线程进行插值
|
161
|
-
# 在本地电脑上可以提速三倍左右,超算上暂时无法加速
|
162
|
-
def interp_2d_parallel_20241213(target_x, target_y, origin_x, origin_y, data, method="linear"):
|
163
|
-
"""
|
164
|
-
param {*} target_x 目标经度网格 1D 或 2D
|
165
|
-
param {*} target_y 目标纬度网格 1D 或 2D
|
166
|
-
param {*} origin_x 初始经度网格 1D 或 2D
|
167
|
-
param {*} origin_y 初始纬度网格 1D 或 2D
|
168
|
-
param {*} data 数据 (*, lat, lon) 2D, 3D, 4D
|
169
|
-
param {*} method 插值方法,可选 'linear', 'nearest', 'cubic' 等,默认为 'linear'
|
170
|
-
return {*} 插值结果
|
171
|
-
description : 高维插值函数,默认插值最后两个维度,传输数据前请确保数据的维度正确
|
172
|
-
example : interpolated_data = interp_2d_parallel(target_x, target_y, origin_x, origin_y, data, method='linear')
|
173
|
-
"""
|
174
|
-
|
175
|
-
def interp_single2d(target_y, target_x, origin_y, origin_x, data, method="linear"):
|
176
|
-
target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
|
177
|
-
origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
|
178
|
-
|
179
|
-
dt = griddata(origin_points, np.ravel(data[:, :]), target_points, method=method)
|
180
|
-
return np.reshape(dt, target_y.shape)
|
181
|
-
|
182
|
-
def interp_single3d(i, target_y, target_x, origin_y, origin_x, data, method="linear"):
|
183
|
-
target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
|
184
|
-
origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
|
185
|
-
|
186
|
-
dt = griddata(origin_points, np.ravel(data[i, :, :]), target_points, method=method)
|
187
|
-
return np.reshape(dt, target_y.shape)
|
188
|
-
|
189
|
-
def interp_single4d(i, j, target_y, target_x, origin_y, origin_x, data, method="linear"):
|
190
|
-
target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
|
191
|
-
origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
|
192
|
-
|
193
|
-
dt = griddata(origin_points, np.ravel(data[i, j, :, :]), target_points, method=method)
|
194
|
-
return np.reshape(dt, target_y.shape)
|
195
|
-
|
196
|
-
if len(target_y.shape) == 1:
|
197
|
-
target_x, target_y = np.meshgrid(target_x, target_y)
|
198
|
-
if len(origin_y.shape) == 1:
|
199
|
-
origin_x, origin_y = np.meshgrid(origin_x, origin_y)
|
200
|
-
|
201
|
-
dims = data.shape
|
202
|
-
len_dims = len(dims)
|
203
|
-
|
204
|
-
if origin_x.shape != dims[-2:] or origin_y.shape != dims[-2:]:
|
205
|
-
raise ValueError("数据形状与 origin_x 或 origin_y 的形状不匹配.")
|
206
|
-
|
207
|
-
interpolated_data = []
|
208
|
-
|
209
|
-
# 使用多线程进行插值
|
210
|
-
with ThreadPoolExecutor(max_workers=mp.cpu_count() - 2) as executor:
|
211
|
-
print(f"Using {mp.cpu_count() - 2} threads...")
|
212
|
-
if len_dims == 2:
|
213
|
-
interpolated_data = list(executor.map(interp_single2d, [target_y], [target_x], [origin_y], [origin_x], [data], [method]))
|
214
|
-
elif len_dims == 3:
|
215
|
-
interpolated_data = list(executor.map(interp_single3d, [i for i in range(dims[0])], [target_y] * dims[0], [target_x] * dims[0], [origin_y] * dims[0], [origin_x] * dims[0], [data] * dims[0], [method] * dims[0]))
|
216
|
-
elif len_dims == 4:
|
217
|
-
interpolated_data = list(
|
218
|
-
executor.map(
|
219
|
-
interp_single4d,
|
220
|
-
[i for i in range(dims[0]) for j in range(dims[1])],
|
221
|
-
[j for i in range(dims[0]) for j in range(dims[1])],
|
222
|
-
[target_y] * dims[0] * dims[1],
|
223
|
-
[target_x] * dims[0] * dims[1],
|
224
|
-
[origin_y] * dims[0] * dims[1],
|
225
|
-
[origin_x] * dims[0] * dims[1],
|
226
|
-
[data] * dims[0] * dims[1],
|
227
|
-
[method] * dims[0] * dims[1],
|
228
|
-
)
|
229
|
-
)
|
230
|
-
interpolated_data = np.array(interpolated_data).reshape(dims[0], dims[1], target_y.shape[0], target_x.shape[1])
|
231
|
-
|
232
|
-
interpolated_data = np.array(interpolated_data)
|
233
|
-
|
234
|
-
return interpolated_data
|
235
|
-
|
236
|
-
|
237
|
-
def _test_sum(a, b):
|
238
|
-
return a + b
|
239
|
-
|
240
|
-
|
241
116
|
if __name__ == "__main__":
|
242
117
|
|
243
118
|
pass
|
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -2,10 +2,10 @@
|
|
2
2
|
# coding=utf-8
|
3
3
|
"""
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
|
-
Date:
|
5
|
+
Date: 2025-01-29 19:05:09
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2025-01-
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\
|
7
|
+
LastEditTime: 2025-01-29 19:05:10
|
8
|
+
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly_20250129 copy.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
@@ -13,6 +13,9 @@ SystemInfo: Windows 11
|
|
13
13
|
Python Version: 3.12
|
14
14
|
"""
|
15
15
|
|
16
|
+
|
17
|
+
|
18
|
+
|
16
19
|
import datetime
|
17
20
|
import os
|
18
21
|
import random
|
@@ -24,19 +27,19 @@ from pathlib import Path
|
|
24
27
|
from threading import Lock
|
25
28
|
|
26
29
|
import matplotlib.pyplot as plt
|
30
|
+
import netCDF4 as nc
|
27
31
|
import numpy as np
|
28
32
|
import pandas as pd
|
29
|
-
import xarray as xr
|
30
33
|
import requests
|
34
|
+
import xarray as xr
|
31
35
|
from rich import print
|
32
36
|
from rich.progress import Progress
|
33
|
-
import netCDF4 as nc
|
34
37
|
|
38
|
+
from oafuncs.oa_down.idm import downloader as idm_downloader
|
35
39
|
from oafuncs.oa_down.user_agent import get_ua
|
36
40
|
from oafuncs.oa_file import file_size, mean_size
|
37
41
|
from oafuncs.oa_nc import check as check_nc
|
38
42
|
from oafuncs.oa_nc import modify as modify_nc
|
39
|
-
from oafuncs.oa_down.idm import downloader as idm_downloader
|
40
43
|
|
41
44
|
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
|
42
45
|
|
@@ -575,26 +578,20 @@ def _check_existing_file(file_full_path, avg_size):
|
|
575
578
|
if abs(delta_size_ratio) > 0.025:
|
576
579
|
if check_nc(file_full_path):
|
577
580
|
# print(f"File size is abnormal but can be opened normally, file size: {fsize:.2f} KB")
|
578
|
-
|
579
|
-
return False
|
580
|
-
else:
|
581
|
-
return True
|
581
|
+
return True
|
582
582
|
else:
|
583
583
|
print(f"File size is abnormal and cannot be opened, {file_full_path}: {fsize:.2f} KB")
|
584
584
|
return False
|
585
585
|
else:
|
586
|
-
|
587
|
-
return False
|
588
|
-
else:
|
589
|
-
return True
|
586
|
+
return True
|
590
587
|
else:
|
591
588
|
return False
|
592
589
|
|
593
590
|
|
594
591
|
def _get_mean_size30(store_path, same_file):
|
595
592
|
if same_file not in fsize_dict.keys():
|
596
|
-
|
597
|
-
|
593
|
+
# print(f'Same file name: {same_file}')
|
594
|
+
fsize_dict[same_file] = {"size": 0, "count": 0}
|
598
595
|
|
599
596
|
if fsize_dict[same_file]["count"] < 30 or fsize_dict[same_file]["size"] == 0:
|
600
597
|
# 更新30次文件最小值,后续认为可以代表所有文件,不再更新占用时间
|
@@ -609,7 +606,7 @@ def _get_mean_size30(store_path, same_file):
|
|
609
606
|
|
610
607
|
def _get_mean_size_move(same_file, current_file):
|
611
608
|
# 获取锁
|
612
|
-
with fsize_dict_lock:
|
609
|
+
with fsize_dict_lock: # 全局锁,确保同一时间只能有一个线程访问
|
613
610
|
# 初始化字典中的值,如果文件不在字典中
|
614
611
|
if same_file not in fsize_dict.keys():
|
615
612
|
fsize_dict[same_file] = {"size_list": [], "mean_size": 1.0}
|
@@ -698,7 +695,6 @@ def _correct_time(nc_file):
|
|
698
695
|
modify_nc(nc_file, "time", None, time_difference)
|
699
696
|
|
700
697
|
|
701
|
-
|
702
698
|
def _download_file(target_url, store_path, file_name, check=False):
|
703
699
|
# Check if the file exists
|
704
700
|
fname = Path(store_path) / file_name
|
@@ -707,13 +703,28 @@ def _download_file(target_url, store_path, file_name, check=False):
|
|
707
703
|
# same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
|
708
704
|
same_file = "_".join(file_name_split) + "*nc"
|
709
705
|
|
710
|
-
if
|
711
|
-
if
|
712
|
-
|
706
|
+
if match_time is not None:
|
707
|
+
if check_nc(fname):
|
708
|
+
if not _check_ftime(fname, if_print=True):
|
709
|
+
if match_time:
|
710
|
+
_correct_time(fname)
|
711
|
+
count_dict['skip'] += 1
|
712
|
+
else:
|
713
|
+
_clear_existing_file(fname)
|
714
|
+
# print(f"[bold #ffe5c0]File time error, {fname}")
|
715
|
+
count_dict["no_data"] += 1
|
716
|
+
else:
|
717
|
+
count_dict["skip"] += 1
|
718
|
+
print(f"[bold green]{file_name} is correct")
|
719
|
+
return
|
720
|
+
|
721
|
+
if check:
|
722
|
+
if same_file not in fsize_dict.keys(): # 对第一个文件单独进行检查,因为没有大小可以对比
|
723
|
+
check_nc(fname, delete_switch=True)
|
713
724
|
|
714
725
|
# set_min_size = _get_mean_size30(store_path, same_file) # 原方案,只30次取平均值;若遇变化,无法判断
|
715
726
|
get_mean_size = _get_mean_size_move(same_file, fname)
|
716
|
-
|
727
|
+
|
717
728
|
if _check_existing_file(fname, get_mean_size):
|
718
729
|
count_dict["skip"] += 1
|
719
730
|
return
|
@@ -767,7 +778,7 @@ def _download_file(target_url, store_path, file_name, check=False):
|
|
767
778
|
break
|
768
779
|
if request_times > 0:
|
769
780
|
# print(f'\r正在重试第 {request_times} 次', end="")
|
770
|
-
print(f"[bold #ffe5c0]Retrying the {order_list[request_times-1]} time...")
|
781
|
+
print(f"[bold #ffe5c0]Retrying the {order_list[request_times - 1]} time...")
|
771
782
|
# 尝试下载文件
|
772
783
|
try:
|
773
784
|
headers = {"User-Agent": get_ua()}
|
@@ -788,9 +799,6 @@ def _download_file(target_url, store_path, file_name, check=False):
|
|
788
799
|
f.write(chunk)
|
789
800
|
|
790
801
|
f.close()
|
791
|
-
|
792
|
-
if not _check_ftime(fname):
|
793
|
-
_correct_time(fname)
|
794
802
|
|
795
803
|
# print(f'\r文件 {fname} 下载成功', end="")
|
796
804
|
if os.path.exists(fname):
|
@@ -923,11 +931,11 @@ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
|
|
923
931
|
var = current_group[0]
|
924
932
|
submit_url = _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
|
925
933
|
file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
|
926
|
-
old_str = f
|
927
|
-
new_str = f
|
934
|
+
old_str = f"var={variable_info[var]['var_name']}"
|
935
|
+
new_str = f"var={variable_info[var]['var_name']}"
|
928
936
|
if len(current_group) > 1:
|
929
937
|
for v in current_group[1:]:
|
930
|
-
new_str = f
|
938
|
+
new_str = f"{new_str}&var={variable_info[v]['var_name']}"
|
931
939
|
submit_url = submit_url.replace(old_str, new_str)
|
932
940
|
# file_name = f'HYCOM_{'-'.join([variable_info[v]["var_name"] for v in current_group])}_{download_time}.nc'
|
933
941
|
file_name = f"HYCOM_{key}_{download_time}.nc"
|
@@ -1023,7 +1031,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
1023
1031
|
# 串行方式
|
1024
1032
|
for i, time_str in enumerate(time_list):
|
1025
1033
|
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, check)
|
1026
|
-
progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")
|
1034
|
+
progress.update(task, advance=1, description=f"[cyan]Downloading... {i + 1}/{len(time_list)}")
|
1027
1035
|
else:
|
1028
1036
|
# 并行方式
|
1029
1037
|
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
@@ -1041,7 +1049,7 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
1041
1049
|
time_str_end_index = int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))
|
1042
1050
|
time_str_end = time_list[time_str_end_index]
|
1043
1051
|
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
|
1044
|
-
progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")
|
1052
|
+
progress.update(task, advance=1, description=f"[cyan]Downloading... {i + 1}/{total_num}")
|
1045
1053
|
else:
|
1046
1054
|
# 并行方式
|
1047
1055
|
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
@@ -1051,10 +1059,10 @@ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_mi
|
|
1051
1059
|
for feature in as_completed(futures):
|
1052
1060
|
_done_callback(feature, progress, task, len(time_list), counter_lock)
|
1053
1061
|
else:
|
1054
|
-
print("Please ensure the time_s is no more than time_e")
|
1062
|
+
print("[bold red]Please ensure the time_s is no more than time_e")
|
1055
1063
|
|
1056
1064
|
|
1057
|
-
def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, idm_engine=None):
|
1065
|
+
def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1, idm_engine=None, fill_time=None):
|
1058
1066
|
"""
|
1059
1067
|
Description:
|
1060
1068
|
Download the data of single time or a series of time
|
@@ -1076,6 +1084,7 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
|
|
1076
1084
|
check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
|
1077
1085
|
ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
|
1078
1086
|
idm_engine: str, the IDM engine, default is None, if set, the IDM will be used to download the data; example: "D:\\Programs\\Internet Download Manager\\IDMan.exe"
|
1087
|
+
fill_time: bool or None, the mode to fill the time, default is None. None: only download the data; True: modify the real time of data to the time in the file name; False: check the time in the file name and the real time of data, if not match, delete the file
|
1079
1088
|
|
1080
1089
|
Returns:
|
1081
1090
|
None
|
@@ -1123,7 +1132,7 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
|
|
1123
1132
|
os.makedirs(str(store_path), exist_ok=True)
|
1124
1133
|
|
1125
1134
|
if num_workers is not None:
|
1126
|
-
num_workers = max(min(num_workers, 10), 1)
|
1135
|
+
num_workers = max(min(num_workers, 10), 1) # 暂时不限制最大值,再检查的时候可以多开一些线程
|
1127
1136
|
# num_workers = int(max(num_workers, 1))
|
1128
1137
|
time_s = str(time_s)
|
1129
1138
|
if len(time_s) == 8:
|
@@ -1143,42 +1152,48 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
|
|
1143
1152
|
|
1144
1153
|
global fsize_dict
|
1145
1154
|
fsize_dict = {}
|
1146
|
-
|
1155
|
+
|
1147
1156
|
global fsize_dict_lock
|
1148
1157
|
fsize_dict_lock = Lock()
|
1149
1158
|
|
1159
|
+
if fill_time is not None:
|
1160
|
+
num_workers = 1
|
1161
|
+
|
1150
1162
|
global use_idm, given_idm_engine, idm_download_list
|
1151
1163
|
if idm_engine is not None:
|
1152
1164
|
use_idm = True
|
1165
|
+
num_workers = 1
|
1153
1166
|
given_idm_engine = idm_engine
|
1154
1167
|
idm_download_list = []
|
1155
1168
|
else:
|
1156
1169
|
use_idm = False
|
1157
|
-
|
1170
|
+
|
1171
|
+
global match_time
|
1172
|
+
match_time = fill_time
|
1158
1173
|
|
1159
1174
|
_download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
|
1160
|
-
|
1161
|
-
if
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
count_dict["success"] += 1
|
1170
|
-
else:
|
1171
|
-
wait_success += 1
|
1172
|
-
time.sleep(3)
|
1173
|
-
if wait_success >= 20:
|
1175
|
+
|
1176
|
+
if idm_engine is not None:
|
1177
|
+
if idm_download_list:
|
1178
|
+
for f in idm_download_list:
|
1179
|
+
wait_success = 0
|
1180
|
+
success = False
|
1181
|
+
while not success:
|
1182
|
+
if check_nc(f):
|
1183
|
+
count_dict["success"] += 1
|
1174
1184
|
success = True
|
1175
|
-
|
1176
|
-
|
1185
|
+
else:
|
1186
|
+
wait_success += 1
|
1187
|
+
time.sleep(3)
|
1188
|
+
if wait_success >= 20:
|
1189
|
+
success = True
|
1190
|
+
# print(f'{f} download failed')
|
1191
|
+
count_dict["fail"] += 1
|
1177
1192
|
|
1178
1193
|
count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
|
1179
|
-
|
1180
1194
|
print("[bold #ecdbfe]-" * 160)
|
1181
|
-
print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}")
|
1195
|
+
print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}\nNo data: {count_dict['no_data']}")
|
1196
|
+
print("[bold #ecdbfe]-" * 160)
|
1182
1197
|
if count_dict["fail"] > 0:
|
1183
1198
|
print("[bold #be5528]Please try again to download the failed data later")
|
1184
1199
|
if count_dict["no_data"] > 0:
|
@@ -1241,10 +1256,6 @@ def how_to_use():
|
|
1241
1256
|
|
1242
1257
|
|
1243
1258
|
if __name__ == "__main__":
|
1244
|
-
time_s, time_e = "2018010800", "2024083121"
|
1245
|
-
merge_name = f"{time_s}_{time_e}" # 合并后的文件名
|
1246
|
-
root_path = r"G:\Data\HYCOM\3hourly"
|
1247
|
-
location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
|
1248
1259
|
download_dict = {
|
1249
1260
|
"water_u": {"simple_name": "u", "download": 1},
|
1250
1261
|
"water_v": {"simple_name": "v", "download": 1},
|
@@ -1259,51 +1270,31 @@ if __name__ == "__main__":
|
|
1259
1270
|
|
1260
1271
|
var_list = [var_name for var_name in download_dict.keys() if download_dict[var_name]["download"]]
|
1261
1272
|
|
1262
|
-
|
1263
|
-
# if you wanna download all depth or level, set both False
|
1264
|
-
depth = None # or 0-5000 meters
|
1265
|
-
level = None # or 1-40 levels
|
1266
|
-
num_workers = 1
|
1267
|
-
|
1268
|
-
check = True
|
1269
|
-
ftimes = 1
|
1270
|
-
idm_engine = r"D:\Programs\Internet Download Manager\IDMan.exe"
|
1271
|
-
|
1272
|
-
download_switch, single_var = True, False
|
1273
|
-
combine_switch = False
|
1274
|
-
copy_switch, copy_dir = False, r"G:\Data\HYCOM\3hourly"
|
1273
|
+
single_var = False
|
1275
1274
|
|
1276
1275
|
# draw_time_range(pic_save_folder=r'I:\Delete')
|
1277
1276
|
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1277
|
+
options = {
|
1278
|
+
"var": var_list,
|
1279
|
+
"time_s": "2018010100",
|
1280
|
+
"time_e": "2020123121",
|
1281
|
+
"store_path": r"F:\Data\HYCOM\3hourly",
|
1282
|
+
"lon_min": 105,
|
1283
|
+
"lon_max": 130,
|
1284
|
+
"lat_min": 15,
|
1285
|
+
"lat_max": 45,
|
1286
|
+
"num_workers": 3,
|
1287
|
+
"check": True,
|
1288
|
+
"depth": None, # or 0-5000 meters
|
1289
|
+
"level": None, # or 1-40 levels
|
1290
|
+
"ftimes": 1,
|
1291
|
+
# "idm_engine": r"D:\Programs\Internet Download Manager\IDMan.exe", # 查漏补缺不建议开启
|
1292
|
+
"fill_time": False,
|
1293
|
+
}
|
1284
1294
|
|
1285
|
-
|
1286
|
-
time_list = get_time_list(time_s, time_e, 3, 'hour')
|
1295
|
+
if single_var:
|
1287
1296
|
for var_name in var_list:
|
1288
|
-
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
merge_path_name = Path(root_path)/f'HYCOM_{var_name}_{merge_name}.nc'
|
1293
|
-
else:
|
1294
|
-
# 如果混合,需要看情况获取文件列表
|
1295
|
-
fname = ''
|
1296
|
-
if var_name in ['water_u', 'water_v', 'water_u_bottom', 'water_v_bottom']:
|
1297
|
-
fname = 'uv3z'
|
1298
|
-
elif var_name in ['water_temp', 'salinity', 'water_temp_bottom', 'salinity_bottom']:
|
1299
|
-
fname = 'ts3z'
|
1300
|
-
elif var_name in ['surf_el']:
|
1301
|
-
fname = 'surf_el'
|
1302
|
-
for time_str in time_list:
|
1303
|
-
file_list.append(Path(root_path)/f'HYCOM_{fname}_{time_str}.nc')
|
1304
|
-
merge_path_name = Path(root_path)/f'HYCOM_{fname}_{merge_name}.nc'
|
1305
|
-
if combine_switch:
|
1306
|
-
# 这里的var_name必须是官方变量名,不能再是简写了
|
1307
|
-
merge(file_list, var_name, 'time', merge_path_name)
|
1308
|
-
if copy_switch:
|
1309
|
-
copy_file(merge_path_name, copy_dir) """
|
1297
|
+
options["var"] = var_name
|
1298
|
+
download(**options)
|
1299
|
+
else:
|
1300
|
+
download(**options)
|