oafuncs 0.0.76__py2.py3-none-any.whl → 0.0.78__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_cmap.py +73 -24
- oafuncs/oa_down/User_Agent-list.txt +59 -12
- oafuncs/oa_down/__init__.py +2 -3
- oafuncs/oa_down/hycom_3hourly.py +322 -281
- oafuncs/oa_down/test.py +24 -1
- oafuncs/oa_file.py +40 -4
- oafuncs/oa_nc.py +79 -73
- {oafuncs-0.0.76.dist-info → oafuncs-0.0.78.dist-info}/METADATA +9 -6
- {oafuncs-0.0.76.dist-info → oafuncs-0.0.78.dist-info}/RECORD +12 -13
- oafuncs/oa_down/refs_pdf.py +0 -338
- {oafuncs-0.0.76.dist-info → oafuncs-0.0.78.dist-info}/LICENSE.txt +0 -0
- {oafuncs-0.0.76.dist-info → oafuncs-0.0.78.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.76.dist-info → oafuncs-0.0.78.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -1,17 +1,18 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# coding=utf-8
|
3
|
-
|
3
|
+
"""
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-11-01 10:31:09
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-12-
|
7
|
+
LastEditTime: 2024-12-08 10:20:45
|
8
8
|
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
12
12
|
SystemInfo: Windows 11
|
13
13
|
Python Version: 3.12
|
14
|
-
|
14
|
+
"""
|
15
|
+
|
15
16
|
import datetime
|
16
17
|
import os
|
17
18
|
import random
|
@@ -29,22 +30,22 @@ from rich.progress import Progress
|
|
29
30
|
|
30
31
|
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
|
31
32
|
|
32
|
-
__all__ = [
|
33
|
+
__all__ = ["draw_time_range", "download", "how_to_use", "get_time_list", "get_ua"]
|
33
34
|
|
34
35
|
# time resolution
|
35
|
-
data_info = {
|
36
|
+
data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
|
36
37
|
|
37
38
|
# hourly data
|
38
39
|
# dataset: GLBv0.08, GLBu0.08, GLBy0.08
|
39
|
-
data_info[
|
40
|
+
data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}}
|
40
41
|
|
41
42
|
# version
|
42
43
|
# version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
|
43
|
-
data_info[
|
44
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
|
44
45
|
# version of GLBu0.08: 93.0
|
45
|
-
data_info[
|
46
|
+
data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
|
46
47
|
# version of GLBy0.08: 93.0
|
47
|
-
data_info[
|
48
|
+
data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
|
48
49
|
|
49
50
|
# info details
|
50
51
|
# time range
|
@@ -52,29 +53,29 @@ data_info['hourly']['dataset']['GLBy0.08']['version'] = {'93.0': {}}
|
|
52
53
|
# 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
|
53
54
|
# 目前只纠正了GLBv0.08 93.0的时间范围,具体到小时了
|
54
55
|
# 其他数据集的时刻暂时默认为00起,21止
|
55
|
-
data_info[
|
56
|
-
data_info[
|
57
|
-
data_info[
|
58
|
-
data_info[
|
59
|
-
data_info[
|
60
|
-
data_info[
|
61
|
-
data_info[
|
56
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "19940101", "time_end": "20151231"}
|
57
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "20140701", "time_end": "20160430"}
|
58
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "20160501", "time_end": "20170131"}
|
59
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "20170201", "time_end": "20170531"}
|
60
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "20170601", "time_end": "20170930"}
|
61
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "20171001", "time_end": "20171231"}
|
62
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
|
62
63
|
# GLBu0.08
|
63
|
-
data_info[
|
64
|
+
data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "20180919", "time_end": "20181208"}
|
64
65
|
# GLBy0.08
|
65
|
-
data_info[
|
66
|
+
data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "20181204", "time_end": "20300904"}
|
66
67
|
|
67
68
|
# variable
|
68
69
|
variable_info = {
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
70
|
+
"u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
|
71
|
+
"v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
|
72
|
+
"temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
|
73
|
+
"salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
|
74
|
+
"ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
|
75
|
+
"u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
|
76
|
+
"v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
|
77
|
+
"temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
|
78
|
+
"salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
|
78
79
|
}
|
79
80
|
|
80
81
|
# classification method
|
@@ -82,15 +83,15 @@ variable_info = {
|
|
82
83
|
# same_path: the data of different years is stored in the same file
|
83
84
|
# var_different: the data of different variables is stored in different files
|
84
85
|
# var_year_different: the data of different variables and years is stored in different files
|
85
|
-
data_info[
|
86
|
-
data_info[
|
87
|
-
data_info[
|
88
|
-
data_info[
|
89
|
-
data_info[
|
90
|
-
data_info[
|
91
|
-
data_info[
|
92
|
-
data_info[
|
93
|
-
data_info[
|
86
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
|
87
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
|
88
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
|
89
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
|
90
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
|
91
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
|
92
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
|
93
|
+
data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
|
94
|
+
data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
|
94
95
|
|
95
96
|
# download info
|
96
97
|
# base url
|
@@ -98,64 +99,61 @@ data_info['hourly']['dataset']['GLBy0.08']['version']['93.0']['classification']
|
|
98
99
|
url_53x = {}
|
99
100
|
for y_53x in range(1994, 2016):
|
100
101
|
# r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
|
101
|
-
url_53x[str(y_53x)] = rf
|
102
|
-
data_info[
|
102
|
+
url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
|
103
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
|
103
104
|
# GLBv0.08 56.3
|
104
|
-
data_info[
|
105
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
|
105
106
|
# GLBv0.08 57.2
|
106
|
-
data_info[
|
107
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
|
107
108
|
# GLBv0.08 92.8
|
108
109
|
url_928 = {
|
109
|
-
|
110
|
-
|
111
|
-
|
110
|
+
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
|
111
|
+
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
|
112
|
+
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
|
112
113
|
}
|
113
|
-
data_info[
|
114
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
|
114
115
|
# GLBv0.08 57.7
|
115
|
-
data_info[
|
116
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
|
116
117
|
# GLBv0.08 92.9
|
117
118
|
url_929 = {
|
118
|
-
|
119
|
-
|
120
|
-
|
119
|
+
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
|
120
|
+
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
|
121
|
+
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
|
121
122
|
}
|
122
|
-
data_info[
|
123
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
|
123
124
|
# GLBv0.08 93.0
|
124
125
|
url_930_v = {
|
125
|
-
|
126
|
-
|
127
|
-
|
126
|
+
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
|
127
|
+
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
|
128
|
+
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
|
128
129
|
}
|
129
|
-
data_info[
|
130
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
|
130
131
|
# GLBu0.08 93.0
|
131
132
|
url_930_u = {
|
132
|
-
|
133
|
-
|
134
|
-
|
133
|
+
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
|
134
|
+
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
|
135
|
+
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
|
135
136
|
}
|
136
|
-
data_info[
|
137
|
+
data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
|
137
138
|
# GLBy0.08 93.0
|
138
139
|
uv3z_930_y = {}
|
139
140
|
ts3z_930_y = {}
|
140
141
|
ssh_930_y = {}
|
141
142
|
for y_930_y in range(2018, 2025):
|
142
|
-
uv3z_930_y[str(
|
143
|
-
|
144
|
-
|
145
|
-
y_930_y)] = rf'https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?'
|
146
|
-
ssh_930_y[str(
|
147
|
-
y_930_y)] = rf'https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?'
|
143
|
+
uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
|
144
|
+
ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
|
145
|
+
ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
|
148
146
|
url_930_y = {
|
149
|
-
|
150
|
-
|
151
|
-
|
147
|
+
"uv3z": uv3z_930_y,
|
148
|
+
"ts3z": ts3z_930_y,
|
149
|
+
"ssh": ssh_930_y,
|
152
150
|
}
|
153
|
-
data_info[
|
151
|
+
data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
|
154
152
|
|
155
153
|
var_group = {
|
156
|
-
|
157
|
-
|
158
|
-
|
154
|
+
"uv3z": ["u", "v", "u_b", "v_b"],
|
155
|
+
"ts3z": ["temp", "salt", "temp_b", "salt_b"],
|
156
|
+
"ssh": ["ssh"],
|
159
157
|
}
|
160
158
|
|
161
159
|
|
@@ -164,21 +162,23 @@ def draw_time_range(pic_save_folder=None):
|
|
164
162
|
os.makedirs(pic_save_folder, exist_ok=True)
|
165
163
|
# Converting the data into a format suitable for plotting
|
166
164
|
data = []
|
167
|
-
for dataset, versions in data_info[
|
168
|
-
for version, time_range in versions[
|
169
|
-
t_s = time_range[
|
170
|
-
t_e = time_range[
|
165
|
+
for dataset, versions in data_info["hourly"]["dataset"].items():
|
166
|
+
for version, time_range in versions["version"].items():
|
167
|
+
t_s = time_range["time_range"]["time_start"]
|
168
|
+
t_e = time_range["time_range"]["time_end"]
|
171
169
|
if len(t_s) == 8:
|
172
|
-
t_s = t_s +
|
170
|
+
t_s = t_s + "00"
|
173
171
|
if len(t_e) == 8:
|
174
|
-
t_e = t_e +
|
175
|
-
t_s, t_e = t_s +
|
176
|
-
data.append(
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
172
|
+
t_e = t_e + "21"
|
173
|
+
t_s, t_e = t_s + "0000", t_e + "0000"
|
174
|
+
data.append(
|
175
|
+
{
|
176
|
+
"dataset": dataset,
|
177
|
+
"version": version,
|
178
|
+
"start_date": pd.to_datetime(t_s),
|
179
|
+
"end_date": pd.to_datetime(t_e),
|
180
|
+
}
|
181
|
+
)
|
182
182
|
|
183
183
|
# Creating a DataFrame
|
184
184
|
df = pd.DataFrame(data)
|
@@ -187,7 +187,7 @@ def draw_time_range(pic_save_folder=None):
|
|
187
187
|
plt.figure(figsize=(12, 6))
|
188
188
|
|
189
189
|
# Combined labels for datasets and versions
|
190
|
-
combined_labels = [f"{dataset}_{version}" for dataset, version in zip(df[
|
190
|
+
combined_labels = [f"{dataset}_{version}" for dataset, version in zip(df["dataset"], df["version"])]
|
191
191
|
|
192
192
|
colors = plt.cm.viridis(np.linspace(0, 1, len(combined_labels)))
|
193
193
|
|
@@ -197,40 +197,40 @@ def draw_time_range(pic_save_folder=None):
|
|
197
197
|
# Plotting each time range
|
198
198
|
k = 1
|
199
199
|
for _, row in df.iterrows():
|
200
|
-
plt.plot([row[
|
200
|
+
plt.plot([row["start_date"], row["end_date"]], [k, k], color=label_colors[f"{row['dataset']}_{row['version']}"], linewidth=6)
|
201
201
|
# plt.text(row['end_date'], k,
|
202
202
|
# f"{row['version']}", ha='right', color='black')
|
203
|
-
ymdh_s = row[
|
204
|
-
ymdh_e = row[
|
203
|
+
ymdh_s = row["start_date"].strftime("%Y-%m-%d %H")
|
204
|
+
ymdh_e = row["end_date"].strftime("%Y-%m-%d %H")
|
205
205
|
if k == 1 or k == len(combined_labels):
|
206
|
-
plt.text(row[
|
207
|
-
plt.text(row[
|
206
|
+
plt.text(row["start_date"], k + 0.125, f"{ymdh_s}", ha="left", color="black")
|
207
|
+
plt.text(row["end_date"], k + 0.125, f"{ymdh_e}", ha="right", color="black")
|
208
208
|
else:
|
209
|
-
plt.text(row[
|
210
|
-
plt.text(row[
|
209
|
+
plt.text(row["start_date"], k + 0.125, f"{ymdh_s}", ha="right", color="black")
|
210
|
+
plt.text(row["end_date"], k + 0.125, f"{ymdh_e}", ha="left", color="black")
|
211
211
|
k += 1
|
212
212
|
|
213
213
|
# Setting the y-axis labels
|
214
|
-
plt.yticks(range(1, len(combined_labels)+1), combined_labels)
|
215
|
-
plt.xlabel(
|
216
|
-
plt.ylabel(
|
217
|
-
plt.title(
|
214
|
+
plt.yticks(range(1, len(combined_labels) + 1), combined_labels)
|
215
|
+
plt.xlabel("Time")
|
216
|
+
plt.ylabel("Dataset - Version")
|
217
|
+
plt.title("Time Range of Different Versions of Datasets")
|
218
218
|
plt.xticks(rotation=45)
|
219
219
|
plt.grid(True)
|
220
220
|
plt.tight_layout()
|
221
221
|
if pic_save_folder:
|
222
|
-
plt.savefig(Path(pic_save_folder) /
|
223
|
-
print(f
|
222
|
+
plt.savefig(Path(pic_save_folder) / "HYCOM_time_range.png")
|
223
|
+
print(f"[bold green]HYCOM_time_range.png has been saved in {pic_save_folder}")
|
224
224
|
else:
|
225
|
-
plt.savefig(
|
226
|
-
print(
|
227
|
-
print(f
|
225
|
+
plt.savefig("HYCOM_time_range.png")
|
226
|
+
print("[bold green]HYCOM_time_range.png has been saved in the current folder")
|
227
|
+
print(f"Curren folder: {os.getcwd()}")
|
228
228
|
# plt.show()
|
229
229
|
plt.close()
|
230
230
|
|
231
231
|
|
232
|
-
def get_time_list(time_s, time_e, delta, interval_type=
|
233
|
-
|
232
|
+
def get_time_list(time_s, time_e, delta, interval_type="hour"):
|
233
|
+
"""
|
234
234
|
Description: get a list of time strings from time_s to time_e with a specified interval
|
235
235
|
Args:
|
236
236
|
time_s: start time string, e.g. '2023080203' for hours or '20230802' for days
|
@@ -239,14 +239,14 @@ def get_time_list(time_s, time_e, delta, interval_type='hour'):
|
|
239
239
|
interval_type: 'hour' for hour interval, 'day' for day interval
|
240
240
|
Returns:
|
241
241
|
dt_list: a list of time strings
|
242
|
-
|
242
|
+
"""
|
243
243
|
time_s, time_e = str(time_s), str(time_e)
|
244
|
-
if interval_type ==
|
245
|
-
time_format =
|
246
|
-
delta_type =
|
247
|
-
elif interval_type ==
|
248
|
-
time_format =
|
249
|
-
delta_type =
|
244
|
+
if interval_type == "hour":
|
245
|
+
time_format = "%Y%m%d%H"
|
246
|
+
delta_type = "hours"
|
247
|
+
elif interval_type == "day":
|
248
|
+
time_format = "%Y%m%d"
|
249
|
+
delta_type = "days"
|
250
250
|
# Ensure time strings are in the correct format for days
|
251
251
|
time_s = time_s[:8]
|
252
252
|
time_e = time_e[:8]
|
@@ -264,50 +264,50 @@ def get_time_list(time_s, time_e, delta, interval_type='hour'):
|
|
264
264
|
def transform_time(time_str):
|
265
265
|
# old_time = '2023080203'
|
266
266
|
# time_new = '2023-08-02T03%3A00%3A00Z'
|
267
|
-
time_new = f
|
267
|
+
time_new = f"{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z"
|
268
268
|
return time_new
|
269
269
|
|
270
270
|
|
271
|
-
def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_str_end=None, mode=
|
271
|
+
def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_str_end=None, mode="single_depth", depth=None, level_num=None):
|
272
272
|
query_dict = {
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
273
|
+
"var": variable_info[var]["var_name"],
|
274
|
+
"north": lat_max,
|
275
|
+
"west": lon_min,
|
276
|
+
"east": lon_max,
|
277
|
+
"south": lat_min,
|
278
|
+
"horizStride": 1,
|
279
|
+
"time": None,
|
280
|
+
"time_start": None,
|
281
|
+
"time_end": None,
|
282
|
+
"timeStride": None,
|
283
|
+
"vertCoord": None,
|
284
|
+
"vertStride": None,
|
285
|
+
"addLatLon": "true",
|
286
|
+
"accept": "netcdf4",
|
287
287
|
}
|
288
288
|
|
289
289
|
if time_str_end is not None:
|
290
|
-
query_dict[
|
291
|
-
query_dict[
|
292
|
-
query_dict[
|
290
|
+
query_dict["time_start"] = transform_time(time_str_ymdh)
|
291
|
+
query_dict["time_end"] = transform_time(time_str_end)
|
292
|
+
query_dict["timeStride"] = 1
|
293
293
|
else:
|
294
|
-
query_dict[
|
294
|
+
query_dict["time"] = transform_time(time_str_ymdh)
|
295
295
|
|
296
296
|
def get_nearest_level_index(depth):
|
297
297
|
level_depth = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 125.0, 150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0, 1250.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000]
|
298
|
-
return min(range(len(level_depth)), key=lambda i: abs(level_depth[i]-depth))
|
298
|
+
return min(range(len(level_depth)), key=lambda i: abs(level_depth[i] - depth))
|
299
299
|
|
300
|
-
if var not in [
|
301
|
-
if mode ==
|
300
|
+
if var not in ["ssh", "u_b", "v_b", "temp_b", "salt_b"] and var in ["u", "v", "temp", "salt"]:
|
301
|
+
if mode == "depth":
|
302
302
|
if depth < 0 or depth > 5000:
|
303
|
-
print(
|
304
|
-
query_dict[
|
305
|
-
elif mode ==
|
303
|
+
print("Please ensure the depth is in the range of 0-5000 m")
|
304
|
+
query_dict["vertCoord"] = get_nearest_level_index(depth) + 1
|
305
|
+
elif mode == "level":
|
306
306
|
if level_num < 1 or level_num > 40:
|
307
|
-
print(
|
308
|
-
query_dict[
|
309
|
-
elif mode ==
|
310
|
-
query_dict[
|
307
|
+
print("Please ensure the level_num is in the range of 1-40")
|
308
|
+
query_dict["vertCoord"] = max(1, min(level_num, 40))
|
309
|
+
elif mode == "full":
|
310
|
+
query_dict["vertStride"] = 1
|
311
311
|
else:
|
312
312
|
raise ValueError("Invalid mode. Choose from 'depth', 'level', or 'full'")
|
313
313
|
|
@@ -328,13 +328,13 @@ def check_time_in_dataset_and_version(time_input, time_end=None):
|
|
328
328
|
else:
|
329
329
|
time_start = int(time_input)
|
330
330
|
time_end = int(time_end)
|
331
|
-
time_input_str = f
|
331
|
+
time_input_str = f"{time_input}-{time_end}"
|
332
332
|
|
333
333
|
# 根据时间长度补全时间格式
|
334
334
|
if len(str(time_start)) == 8:
|
335
|
-
time_start = str(time_start) +
|
335
|
+
time_start = str(time_start) + "00"
|
336
336
|
if len(str(time_end)) == 8:
|
337
|
-
time_end = str(time_end) +
|
337
|
+
time_end = str(time_end) + "21"
|
338
338
|
time_start, time_end = int(time_start), int(time_end)
|
339
339
|
|
340
340
|
d_list = []
|
@@ -343,75 +343,75 @@ def check_time_in_dataset_and_version(time_input, time_end=None):
|
|
343
343
|
have_data = False
|
344
344
|
|
345
345
|
# 遍历数据集和版本
|
346
|
-
for dataset_name in data_info[
|
347
|
-
for version_name in data_info[
|
348
|
-
time_s, time_e = list(data_info[
|
346
|
+
for dataset_name in data_info["hourly"]["dataset"].keys():
|
347
|
+
for version_name in data_info["hourly"]["dataset"][dataset_name]["version"].keys():
|
348
|
+
time_s, time_e = list(data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"].values())
|
349
349
|
time_s, time_e = str(time_s), str(time_e)
|
350
350
|
if len(time_s) == 8:
|
351
|
-
time_s = time_s +
|
351
|
+
time_s = time_s + "00"
|
352
352
|
if len(time_e) == 8:
|
353
|
-
time_e = time_e +
|
353
|
+
time_e = time_e + "21"
|
354
354
|
# 检查时间是否在数据集的时间范围内
|
355
355
|
if is_single_time:
|
356
356
|
if time_start >= int(time_s) and time_start <= int(time_e):
|
357
357
|
d_list.append(dataset_name)
|
358
358
|
v_list.append(version_name)
|
359
|
-
trange_list.append(f
|
359
|
+
trange_list.append(f"{time_s}-{time_e}")
|
360
360
|
have_data = True
|
361
361
|
else:
|
362
362
|
if time_start >= int(time_s) and time_end <= int(time_e):
|
363
363
|
d_list.append(dataset_name)
|
364
364
|
v_list.append(version_name)
|
365
|
-
trange_list.append(f
|
365
|
+
trange_list.append(f"{time_s}-{time_e}")
|
366
366
|
have_data = True
|
367
367
|
|
368
368
|
# 输出结果
|
369
|
-
print(f
|
369
|
+
print(f"[bold red]{time_input_str} is in the following dataset and version:")
|
370
370
|
if have_data:
|
371
371
|
for d, v, trange in zip(d_list, v_list, trange_list):
|
372
|
-
print(f
|
372
|
+
print(f"[bold blue]{d} {v} {trange}")
|
373
373
|
return True
|
374
374
|
else:
|
375
|
-
print(f
|
375
|
+
print(f"[bold red]{time_input_str} is not in any dataset and version")
|
376
376
|
return False
|
377
377
|
|
378
378
|
|
379
379
|
def ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time_input, time_end=None):
|
380
380
|
# 根据时间长度补全时间格式
|
381
381
|
if len(str(time_input)) == 8:
|
382
|
-
time_input = str(time_input) +
|
382
|
+
time_input = str(time_input) + "00"
|
383
383
|
time_start = int(time_input)
|
384
384
|
if time_end is not None:
|
385
385
|
if len(str(time_end)) == 8:
|
386
|
-
time_end = str(time_end) +
|
386
|
+
time_end = str(time_end) + "21"
|
387
387
|
time_end = int(time_end)
|
388
388
|
else:
|
389
389
|
time_end = time_start
|
390
390
|
|
391
391
|
# 检查指定的数据集和版本是否存在
|
392
|
-
if dataset_name not in data_info[
|
393
|
-
print(f
|
392
|
+
if dataset_name not in data_info["hourly"]["dataset"]:
|
393
|
+
print(f"[bold red]Dataset {dataset_name} not found.")
|
394
394
|
return False
|
395
|
-
if version_name not in data_info[
|
396
|
-
print(f
|
395
|
+
if version_name not in data_info["hourly"]["dataset"][dataset_name]["version"]:
|
396
|
+
print(f"[bold red]Version {version_name} not found in dataset {dataset_name}.")
|
397
397
|
return False
|
398
398
|
|
399
399
|
# 获取指定数据集和版本的时间范围
|
400
|
-
time_range = data_info[
|
400
|
+
time_range = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"]
|
401
401
|
time_s, time_e = list(time_range.values())
|
402
402
|
time_s, time_e = str(time_s), str(time_e)
|
403
403
|
if len(time_s) == 8:
|
404
|
-
time_s = time_s +
|
404
|
+
time_s = time_s + "00"
|
405
405
|
if len(time_e) == 8:
|
406
|
-
time_e = time_e +
|
406
|
+
time_e = time_e + "21"
|
407
407
|
time_s, time_e = int(time_s), int(time_e)
|
408
408
|
|
409
409
|
# 检查时间是否在指定数据集和版本的时间范围内
|
410
410
|
if time_start >= time_s and time_end <= time_e:
|
411
|
-
print(f
|
411
|
+
print(f"[bold blue]Time {time_input} to {time_end} is within dataset {dataset_name} and version {version_name}.")
|
412
412
|
return True
|
413
413
|
else:
|
414
|
-
print(f
|
414
|
+
print(f"[bold red]Time {time_input} to {time_end} is not within dataset {dataset_name} and version {version_name}.")
|
415
415
|
return False
|
416
416
|
|
417
417
|
|
@@ -420,7 +420,7 @@ def direct_choose_dataset_and_version(time_input, time_end=None):
|
|
420
420
|
# 示例结构:data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
|
421
421
|
|
422
422
|
if len(str(time_input)) == 8:
|
423
|
-
time_input = str(time_input) +
|
423
|
+
time_input = str(time_input) + "00"
|
424
424
|
|
425
425
|
# 如果 time_end 是 None,则将 time_input 的值赋给它
|
426
426
|
if time_end is None:
|
@@ -431,14 +431,14 @@ def direct_choose_dataset_and_version(time_input, time_end=None):
|
|
431
431
|
|
432
432
|
dataset_name_out, version_name_out = None, None
|
433
433
|
|
434
|
-
for dataset_name in data_info[
|
435
|
-
for version_name in data_info[
|
436
|
-
[time_s, time_e] = list(data_info[
|
434
|
+
for dataset_name in data_info["hourly"]["dataset"].keys():
|
435
|
+
for version_name in data_info["hourly"]["dataset"][dataset_name]["version"].keys():
|
436
|
+
[time_s, time_e] = list(data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"].values())
|
437
437
|
time_s, time_e = str(time_s), str(time_e)
|
438
438
|
if len(time_s) == 8:
|
439
|
-
time_s = time_s +
|
439
|
+
time_s = time_s + "00"
|
440
440
|
if len(time_e) == 8:
|
441
|
-
time_e = time_e +
|
441
|
+
time_e = time_e + "21"
|
442
442
|
time_s, time_e = int(time_s), int(time_e)
|
443
443
|
|
444
444
|
# 检查时间是否在数据集版本的时间范围内
|
@@ -448,55 +448,55 @@ def direct_choose_dataset_and_version(time_input, time_end=None):
|
|
448
448
|
dataset_name_out, version_name_out = dataset_name, version_name
|
449
449
|
|
450
450
|
if dataset_name_out is not None and version_name_out is not None:
|
451
|
-
print(f
|
451
|
+
print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
|
452
452
|
|
453
453
|
# 如果没有找到匹配的数据集和版本,会返回 None
|
454
454
|
return dataset_name_out, version_name_out
|
455
455
|
|
456
456
|
|
457
457
|
def get_base_url(dataset_name, version_name, var, year_str):
|
458
|
-
url_dict = data_info[
|
459
|
-
classification_method = data_info[
|
460
|
-
if classification_method ==
|
458
|
+
url_dict = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["url"]
|
459
|
+
classification_method = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["classification"]
|
460
|
+
if classification_method == "year_different":
|
461
461
|
base_url = url_dict[str(year_str)]
|
462
|
-
elif classification_method ==
|
462
|
+
elif classification_method == "same_path":
|
463
463
|
base_url = url_dict
|
464
|
-
elif classification_method ==
|
464
|
+
elif classification_method == "var_different":
|
465
465
|
base_url = None
|
466
466
|
for key, value in var_group.items():
|
467
467
|
if var in value:
|
468
468
|
base_url = url_dict[key]
|
469
469
|
break
|
470
470
|
if base_url is None:
|
471
|
-
print(
|
472
|
-
elif classification_method ==
|
471
|
+
print("Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]")
|
472
|
+
elif classification_method == "var_year_different":
|
473
473
|
base_url = None
|
474
474
|
for key, value in var_group.items():
|
475
475
|
if var in value:
|
476
476
|
base_url = url_dict[key][str(year_str)]
|
477
477
|
break
|
478
478
|
if base_url is None:
|
479
|
-
print(
|
479
|
+
print("Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]")
|
480
480
|
return base_url
|
481
481
|
|
482
482
|
|
483
483
|
def get_submit_url(dataset_name, version_name, var, year_str, query_dict):
|
484
484
|
base_url = get_base_url(dataset_name, version_name, var, year_str)
|
485
|
-
if isinstance(query_dict[
|
486
|
-
query_dict[
|
487
|
-
target_url = base_url +
|
485
|
+
if isinstance(query_dict["var"], str):
|
486
|
+
query_dict["var"] = [query_dict["var"]]
|
487
|
+
target_url = base_url + "&".join(f"var={var}" for var in query_dict["var"]) + "&" + "&".join(f"{key}={value}" for key, value in query_dict.items() if key != "var")
|
488
488
|
return target_url
|
489
489
|
|
490
490
|
|
491
491
|
def clear_existing_file(file_full_path):
|
492
492
|
if os.path.exists(file_full_path):
|
493
493
|
os.remove(file_full_path)
|
494
|
-
print(f
|
494
|
+
print(f"{file_full_path} has been removed")
|
495
495
|
|
496
496
|
|
497
497
|
def check_existing_file(file_full_path):
|
498
498
|
if os.path.exists(file_full_path):
|
499
|
-
print(f
|
499
|
+
print(f"[bold #FFA54F]{file_full_path} exists")
|
500
500
|
return True
|
501
501
|
else:
|
502
502
|
# print(f'{file_full_path} does not exist')
|
@@ -505,9 +505,9 @@ def check_existing_file(file_full_path):
|
|
505
505
|
|
506
506
|
def get_ua():
|
507
507
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
508
|
-
ua_file_txt = os.path.join(current_dir,
|
508
|
+
ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
|
509
509
|
|
510
|
-
with open(ua_file_txt,
|
510
|
+
with open(ua_file_txt, "r") as f:
|
511
511
|
ua_list = f.readlines()
|
512
512
|
# 去掉换行符和空行
|
513
513
|
ua_list = [line.strip() for line in ua_list if line.strip()]
|
@@ -515,28 +515,24 @@ def get_ua():
|
|
515
515
|
return random.choice(ua_list)
|
516
516
|
|
517
517
|
|
518
|
-
|
519
518
|
def get_proxy():
|
520
519
|
# 获取当前脚本的绝对路径
|
521
520
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
522
521
|
# 构建ip.txt的绝对路径
|
523
|
-
ip_file_txt = os.path.join(script_dir,
|
524
|
-
with open(ip_file_txt,
|
522
|
+
ip_file_txt = os.path.join(script_dir, "ip.txt")
|
523
|
+
with open(ip_file_txt, "r") as f:
|
525
524
|
ips = f.readlines()
|
526
525
|
ip_list = []
|
527
526
|
for ip in ips:
|
528
527
|
ip_list.append(ip.strip())
|
529
528
|
choose_ip = random.choice(ip_list)
|
530
|
-
proxies = {
|
531
|
-
'http': 'http://' + choose_ip,
|
532
|
-
'https': 'https://' + choose_ip
|
533
|
-
}
|
529
|
+
proxies = {"http": "http://" + choose_ip, "https": "https://" + choose_ip}
|
534
530
|
# print(f'Using proxy: {proxies}')
|
535
531
|
return proxies
|
536
532
|
|
537
533
|
|
538
534
|
def dlownload_file(target_url, store_path, file_name, check=False):
|
539
|
-
print(f
|
535
|
+
print(f"[bold #f0f6d0]Requesting {file_name}...")
|
540
536
|
# 创建会话
|
541
537
|
s = requests.Session()
|
542
538
|
download_success = False
|
@@ -547,7 +543,7 @@ def dlownload_file(target_url, store_path, file_name, check=False):
|
|
547
543
|
import re
|
548
544
|
|
549
545
|
# 定义正则表达式,匹配YYYYMMDDHH格式的时间
|
550
|
-
time_pattern = r
|
546
|
+
time_pattern = r"\d{10}"
|
551
547
|
|
552
548
|
# 定义两个字符串
|
553
549
|
# str1 = 'HYCOM_water_u_2018010100_2018010112.nc'
|
@@ -560,46 +556,62 @@ def dlownload_file(target_url, store_path, file_name, check=False):
|
|
560
556
|
num_times_str = len(times_in_str)
|
561
557
|
|
562
558
|
if num_times_str > 1:
|
563
|
-
delta_t = datetime.datetime.strptime(times_in_str[1],
|
559
|
+
delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
|
564
560
|
delta_t = delta_t.total_seconds() / 3600
|
565
561
|
delta_t = delta_t / 3 + 1
|
566
562
|
else:
|
567
563
|
delta_t = 1
|
568
564
|
|
569
|
-
return int(delta_t*
|
565
|
+
return int(delta_t * 180)
|
570
566
|
|
571
567
|
max_timeout = calculate_wait_time(file_name)
|
572
568
|
|
573
569
|
if check:
|
574
570
|
if check_existing_file(filename):
|
571
|
+
count_dict['skip'] += 1
|
575
572
|
return
|
576
573
|
clear_existing_file(filename)
|
577
574
|
# print(f'Download_start_time: {datetime.datetime.now()}')
|
578
575
|
download_time_s = datetime.datetime.now()
|
579
|
-
order_list = [
|
576
|
+
order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
|
580
577
|
while not download_success:
|
581
578
|
if request_times >= 10:
|
582
579
|
# print(f'下载失败,已重试 {request_times} 次\n可先跳过,后续再试')
|
583
|
-
print(f
|
580
|
+
print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
|
581
|
+
count_dict["fail"] += 1
|
584
582
|
break
|
585
583
|
if request_times > 0:
|
586
584
|
# print(f'\r正在重试第 {request_times} 次', end="")
|
587
|
-
print(f
|
585
|
+
print(f"[bold #ffe5c0]Retrying the {order_list[request_times-1]} time...")
|
588
586
|
# 尝试下载文件
|
589
587
|
try:
|
590
|
-
headers = {
|
591
|
-
response = s.get(target_url, headers=headers, timeout=random.randint(5, max_timeout))
|
588
|
+
headers = {"User-Agent": get_ua()}
|
589
|
+
""" response = s.get(target_url, headers=headers, timeout=random.randint(5, max_timeout))
|
592
590
|
response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
|
593
591
|
|
594
592
|
# 保存文件
|
595
593
|
with open(filename, 'wb') as f:
|
596
|
-
f.write(response.content)
|
594
|
+
f.write(response.content) """
|
595
|
+
|
596
|
+
response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout)) # 启用流式传输
|
597
|
+
response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
|
598
|
+
|
599
|
+
# 保存文件
|
600
|
+
with open(filename, "wb") as f:
|
601
|
+
print(f"[bold #96cbd7]Downloading {file_name}...")
|
602
|
+
for chunk in response.iter_content(chunk_size=1024):
|
603
|
+
if chunk:
|
604
|
+
f.write(chunk)
|
605
|
+
|
606
|
+
f.close()
|
607
|
+
|
597
608
|
# print(f'\r文件 {filename} 下载成功', end="")
|
598
609
|
if os.path.exists(filename):
|
599
610
|
download_success = True
|
600
611
|
download_time_e = datetime.datetime.now()
|
601
612
|
download_delta = download_time_e - download_time_s
|
602
|
-
print(f
|
613
|
+
print(f"[#3dfc40]File [bold #dfff73]{filename} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
|
614
|
+
count_dict["success"] += 1
|
603
615
|
# print(f'Download_end_time: {datetime.datetime.now()}')
|
604
616
|
|
605
617
|
except requests.exceptions.HTTPError as errh:
|
@@ -636,17 +648,17 @@ def check_dataset_version(dataset_name, version_name, download_time, download_ti
|
|
636
648
|
download_time_str = str(download_time)
|
637
649
|
|
638
650
|
if len(download_time_str) == 8:
|
639
|
-
download_time_str = download_time_str +
|
651
|
+
download_time_str = download_time_str + "00"
|
640
652
|
|
641
653
|
# 检查小时是否有效(如果需要的话)
|
642
654
|
if download_time_end is None and not check_hour_is_valid(download_time_str):
|
643
|
-
print(
|
644
|
-
raise ValueError(
|
655
|
+
print("Please ensure the hour is 00, 03, 06, 09, 12, 15, 18, 21")
|
656
|
+
raise ValueError("The hour is invalid")
|
645
657
|
|
646
658
|
# 根据是否检查整个天来设置时间范围
|
647
659
|
if download_time_end is not None:
|
648
660
|
if len(str(download_time_end)) == 8:
|
649
|
-
download_time_end = str(download_time_end) +
|
661
|
+
download_time_end = str(download_time_end) + "21"
|
650
662
|
have_data = check_time_in_dataset_and_version(download_time_str, download_time_end)
|
651
663
|
if have_data:
|
652
664
|
return direct_choose_dataset_and_version(download_time_str, download_time_end)
|
@@ -661,26 +673,25 @@ def check_dataset_version(dataset_name, version_name, download_time, download_ti
|
|
661
673
|
def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end=None):
|
662
674
|
year_str = str(download_time)[:4]
|
663
675
|
if depth is not None and level_num is not None:
|
664
|
-
print(
|
665
|
-
print(
|
666
|
-
which_mode =
|
676
|
+
print("Please ensure the depth or level_num is None")
|
677
|
+
print("Progress will use the depth")
|
678
|
+
which_mode = "depth"
|
667
679
|
elif depth is not None and level_num is None:
|
668
|
-
print(f
|
669
|
-
which_mode =
|
680
|
+
print(f"Data of single depth (~{depth} m) will be downloaded...")
|
681
|
+
which_mode = "depth"
|
670
682
|
elif level_num is not None and depth is None:
|
671
|
-
print(f
|
672
|
-
which_mode =
|
683
|
+
print(f"Data of single level ({level_num}) will be downloaded...")
|
684
|
+
which_mode = "level"
|
673
685
|
else:
|
674
|
-
print(
|
675
|
-
which_mode =
|
686
|
+
print("Full depth or full level data will be downloaded...")
|
687
|
+
which_mode = "full"
|
676
688
|
query_dict = get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, download_time_end, which_mode, depth, level_num)
|
677
|
-
submit_url = get_submit_url(
|
678
|
-
dataset_name, version_name, var, year_str, query_dict)
|
689
|
+
submit_url = get_submit_url(dataset_name, version_name, var, year_str, query_dict)
|
679
690
|
return submit_url
|
680
691
|
|
681
692
|
|
682
|
-
def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time=
|
683
|
-
print(
|
693
|
+
def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time="2024083100", download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
|
694
|
+
print("[bold #ecdbfe]-" * 160)
|
684
695
|
download_time = str(download_time)
|
685
696
|
if download_time_end is not None:
|
686
697
|
download_time_end = str(download_time_end)
|
@@ -688,6 +699,11 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
|
|
688
699
|
else:
|
689
700
|
dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time)
|
690
701
|
if dataset_name is None and version_name is None:
|
702
|
+
count_dict["no_data"] += 1
|
703
|
+
if download_time_end is not None:
|
704
|
+
count_dict["no_data_list"].append(f"{download_time}-{download_time_end}")
|
705
|
+
else:
|
706
|
+
count_dict["no_data_list"].append(download_time)
|
691
707
|
return
|
692
708
|
|
693
709
|
if isinstance(var, str):
|
@@ -721,24 +737,24 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
|
|
721
737
|
new_str = f'{new_str}&var={variable_info[v]["var_name"]}'
|
722
738
|
submit_url = submit_url.replace(old_str, new_str)
|
723
739
|
# file_name = f'HYCOM_{'-'.join([variable_info[v]["var_name"] for v in current_group])}_{download_time}.nc'
|
724
|
-
file_name = f
|
740
|
+
file_name = f"HYCOM_{key}_{download_time}.nc"
|
725
741
|
if download_time_end is not None:
|
726
|
-
file_name = f
|
742
|
+
file_name = f"HYCOM_{key}_{download_time}_{download_time_end}.nc"
|
727
743
|
dlownload_file(submit_url, store_path, file_name, check)
|
728
744
|
|
729
745
|
|
730
746
|
def convert_full_name_to_short_name(full_name):
|
731
747
|
for var, info in variable_info.items():
|
732
|
-
if full_name == info[
|
748
|
+
if full_name == info["var_name"] or full_name == info["standard_name"] or full_name == var:
|
733
749
|
return var
|
734
|
-
print(
|
735
|
-
print(
|
736
|
-
print(
|
750
|
+
print("[bold #FFE4E1]Please ensure the var is in:\n[bold blue]u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b")
|
751
|
+
print("or")
|
752
|
+
print("[bold blue]water_u, water_v, water_temp, salinity, surf_el, water_u_bottom, water_v_bottom, water_temp_bottom, salinity_bottom")
|
737
753
|
return False
|
738
754
|
|
739
755
|
|
740
756
|
def download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check):
|
741
|
-
|
757
|
+
"""
|
742
758
|
# 并行下载任务
|
743
759
|
# 这个函数是为了并行下载而设置的,是必须的,直接调用direct_download并行下载会出问题
|
744
760
|
|
@@ -746,13 +762,13 @@ def download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_ma
|
|
746
762
|
本情况下,download_task函数的作用是将每个下载任务封装起来,包括它所需的所有参数。
|
747
763
|
这样,每个任务都是独立的,有自己的参数和数据,不会与其他任务共享或修改任何数据。
|
748
764
|
因此,即使多个任务同时执行,也不会出现数据交互错乱的问题。
|
749
|
-
|
765
|
+
"""
|
750
766
|
|
751
767
|
prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
|
752
768
|
|
753
769
|
|
754
|
-
def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90,
|
755
|
-
|
770
|
+
def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
|
771
|
+
"""
|
756
772
|
Description:
|
757
773
|
Download the data of single time or a series of time
|
758
774
|
|
@@ -773,13 +789,13 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
|
|
773
789
|
|
774
790
|
Returns:
|
775
791
|
None
|
776
|
-
|
792
|
+
"""
|
777
793
|
ymdh_time_s, ymdh_time_e = str(time_s), str(time_e)
|
778
794
|
if ymdh_time_s == ymdh_time_e:
|
779
795
|
prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name)
|
780
796
|
elif int(ymdh_time_s) < int(ymdh_time_e):
|
781
|
-
print(
|
782
|
-
time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3,
|
797
|
+
print("Downloading a series of files...")
|
798
|
+
time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3, "hour")
|
783
799
|
with Progress() as progress:
|
784
800
|
task = progress.add_task("[cyan]Downloading...", total=len(time_list))
|
785
801
|
if ftimes == 1:
|
@@ -787,35 +803,35 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
|
|
787
803
|
# 串行方式
|
788
804
|
for i, time_str in enumerate(time_list):
|
789
805
|
prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, check)
|
790
|
-
progress.update(task, advance=1, description=f
|
806
|
+
progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")
|
791
807
|
else:
|
792
808
|
# 并行方式
|
793
809
|
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
794
810
|
futures = [executor.submit(download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
|
795
811
|
for i, future in enumerate(futures):
|
796
|
-
future.add_done_callback(lambda _: progress.update(task, advance=1, description=f
|
812
|
+
future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}"))
|
797
813
|
else:
|
798
|
-
new_time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3*ftimes,
|
814
|
+
new_time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3 * ftimes, "hour")
|
799
815
|
total_num = len(new_time_list)
|
800
816
|
if num_workers is None or num_workers <= 1:
|
801
817
|
# 串行方式
|
802
818
|
for i, time_str in enumerate(new_time_list):
|
803
|
-
time_str_end_index = int(min(len(time_list)-1, int(i*ftimes+ftimes-1)))
|
819
|
+
time_str_end_index = int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))
|
804
820
|
time_str_end = time_list[time_str_end_index]
|
805
821
|
prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
|
806
|
-
progress.update(task, advance=1, description=f
|
822
|
+
progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")
|
807
823
|
else:
|
808
824
|
# 并行方式
|
809
825
|
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
810
|
-
futures = [executor.submit(download_task, var, new_time_list[i], time_list[int(min(len(time_list)-1, int(i*ftimes+ftimes-1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
|
826
|
+
futures = [executor.submit(download_task, var, new_time_list[i], time_list[int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
|
811
827
|
for i, future in enumerate(futures):
|
812
|
-
future.add_done_callback(lambda _: progress.update(task, advance=1, description=f
|
828
|
+
future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}"))
|
813
829
|
else:
|
814
|
-
print(
|
830
|
+
print("Please ensure the time_s is no more than time_e")
|
815
831
|
|
816
832
|
|
817
|
-
def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90,
|
818
|
-
|
833
|
+
def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
|
834
|
+
"""
|
819
835
|
Description:
|
820
836
|
Download the data of single time or a series of time
|
821
837
|
|
@@ -838,20 +854,20 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
|
|
838
854
|
|
839
855
|
Returns:
|
840
856
|
None
|
841
|
-
|
857
|
+
"""
|
842
858
|
# 打印信息并处理数据集和版本名称
|
843
859
|
if dataset_name is None and version_name is None:
|
844
|
-
print(
|
845
|
-
print(
|
860
|
+
print("The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.")
|
861
|
+
print("If you wanna choose the dataset and version by yourself, please set the dataset_name and version_name together.")
|
846
862
|
elif dataset_name is None and version_name is not None:
|
847
|
-
print(
|
848
|
-
print(
|
863
|
+
print("Please ensure the dataset_name is not None")
|
864
|
+
print("If you do not add the dataset_name, both the dataset and version will be chosen according to the download_time.")
|
849
865
|
elif dataset_name is not None and version_name is None:
|
850
|
-
print(
|
851
|
-
print(
|
866
|
+
print("Please ensure the version_name is not None")
|
867
|
+
print("If you do not add the version_name, both the dataset and version will be chosen according to the download_time.")
|
852
868
|
else:
|
853
|
-
print(
|
854
|
-
print(
|
869
|
+
print("The dataset_name and version_name are both set by yourself.")
|
870
|
+
print("Please ensure the dataset_name and version_name are correct.")
|
855
871
|
|
856
872
|
if isinstance(var, list):
|
857
873
|
if len(var) == 1:
|
@@ -861,17 +877,17 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
|
|
861
877
|
elif isinstance(var, str):
|
862
878
|
var = convert_full_name_to_short_name(var)
|
863
879
|
else:
|
864
|
-
raise ValueError(
|
880
|
+
raise ValueError("The var is invalid")
|
865
881
|
if var is False:
|
866
|
-
raise ValueError(
|
882
|
+
raise ValueError("The var is invalid")
|
867
883
|
if lon_min < 0 or lon_min > 359.92 or lon_max < 0 or lon_max > 359.92 or lat_min < -80 or lat_min > 90 or lat_max < -80 or lat_max > 90:
|
868
|
-
print(
|
869
|
-
print(
|
870
|
-
print(
|
871
|
-
raise ValueError(
|
884
|
+
print("Please ensure the lon_min, lon_max, lat_min, lat_max are in the range")
|
885
|
+
print("The range of lon_min, lon_max is 0~359.92")
|
886
|
+
print("The range of lat_min, lat_max is -80~90")
|
887
|
+
raise ValueError("The lon or lat is invalid")
|
872
888
|
|
873
889
|
if ftimes != 1:
|
874
|
-
print(
|
890
|
+
print("Please ensure the ftimes is in [1, 8]")
|
875
891
|
ftimes = max(min(ftimes, 8), 1)
|
876
892
|
|
877
893
|
if store_path is None:
|
@@ -884,19 +900,44 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
|
|
884
900
|
|
885
901
|
time_s = str(time_s)
|
886
902
|
if len(time_s) == 8:
|
887
|
-
time_s +=
|
903
|
+
time_s += "00"
|
888
904
|
if time_e is None:
|
889
905
|
time_e = time_s[:]
|
890
906
|
else:
|
891
907
|
time_e = str(time_e)
|
892
908
|
if len(time_e) == 8:
|
893
|
-
time_e +=
|
909
|
+
time_e += "21"
|
910
|
+
|
911
|
+
global count_dict
|
912
|
+
count_dict = {
|
913
|
+
'success': 0,
|
914
|
+
'fail': 0,
|
915
|
+
'skip': 0,
|
916
|
+
'no_data': 0,
|
917
|
+
'total': 0,
|
918
|
+
'no_data_list': []
|
919
|
+
}
|
894
920
|
|
895
921
|
download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
|
922
|
+
|
923
|
+
count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
|
924
|
+
|
925
|
+
print("[bold #ecdbfe]-" * 160)
|
926
|
+
print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}")
|
927
|
+
if count_dict['fail'] > 0:
|
928
|
+
print("[bold #be5528]Please try again to download the failed data later")
|
929
|
+
if count_dict['no_data'] > 0:
|
930
|
+
if count_dict['no_data'] == 1:
|
931
|
+
print(f"[bold #f90000]There is {count_dict['no_data']} data that does not exist in any dataset and version")
|
932
|
+
else:
|
933
|
+
print(f"[bold #f90000]These are {count_dict['no_data']} data that do not exist in any dataset and version")
|
934
|
+
for no_data in count_dict['no_data_list']:
|
935
|
+
print(f"[bold #d81b60]{no_data}")
|
936
|
+
print("[bold #ecdbfe]-" * 160)
|
896
937
|
|
897
938
|
|
898
939
|
def how_to_use():
|
899
|
-
print(
|
940
|
+
print("""
|
900
941
|
# 1. Choose the dataset and version according to the time:
|
901
942
|
# 1.1 Use function to query
|
902
943
|
You can use the function check_time_in_dataset_and_version(time_input=20241101) to find the dataset and version according to the time.
|
@@ -941,30 +982,30 @@ def how_to_use():
|
|
941
982
|
# 7.2 You can download the data of single time or a series of time
|
942
983
|
# 7.3 The parameters you must set are var, ymdh_time_s, ymdh_time_e
|
943
984
|
# 7.4 Example: download('u', '2024110112', '2024110212', lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None)
|
944
|
-
|
985
|
+
""")
|
945
986
|
|
946
987
|
|
947
|
-
if __name__ ==
|
988
|
+
if __name__ == "__main__":
|
948
989
|
# help(hycom3h.download)
|
949
|
-
time_s, time_e =
|
950
|
-
merge_name =
|
951
|
-
root_path = r
|
952
|
-
location_dict = {
|
990
|
+
time_s, time_e = "2018070100", "2019123121"
|
991
|
+
merge_name = "2018_2024"
|
992
|
+
root_path = r"G:\Data\HYCOM\3hourly"
|
993
|
+
location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
|
953
994
|
download_dict = {
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
995
|
+
"water_u": {"simple_name": "u", "download": 1},
|
996
|
+
"water_v": {"simple_name": "v", "download": 1},
|
997
|
+
"surf_el": {"simple_name": "ssh", "download": 1},
|
998
|
+
"water_temp": {"simple_name": "temp", "download": 1},
|
999
|
+
"salinity": {"simple_name": "salt", "download": 1},
|
1000
|
+
"water_u_bottom": {"simple_name": "u_b", "download": 0},
|
1001
|
+
"water_v_bottom": {"simple_name": "v_b", "download": 0},
|
1002
|
+
"water_temp_bottom": {"simple_name": "temp_b", "download": 0},
|
1003
|
+
"salinity_bottom": {"simple_name": "salt_b", "download": 0},
|
963
1004
|
}
|
964
1005
|
|
965
1006
|
var_list = []
|
966
1007
|
for var_name in download_dict.keys():
|
967
|
-
if download_dict[var_name][
|
1008
|
+
if download_dict[var_name]["download"] == 1:
|
968
1009
|
var_list.append(var_name)
|
969
1010
|
|
970
1011
|
# set depth or level, only one can be True
|
@@ -978,14 +1019,14 @@ if __name__ == '__main__':
|
|
978
1019
|
|
979
1020
|
download_switch, single_var = True, False
|
980
1021
|
combine_switch = False
|
981
|
-
copy_switch, copy_dir = False, r
|
1022
|
+
copy_switch, copy_dir = False, r"G:\Data\HYCOM\3hourly"
|
982
1023
|
|
983
1024
|
if download_switch:
|
984
1025
|
if single_var:
|
985
1026
|
for var_name in var_list:
|
986
|
-
download(var=var_name, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict[
|
1027
|
+
download(var=var_name, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes)
|
987
1028
|
else:
|
988
|
-
download(var=var_list, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict[
|
1029
|
+
download(var=var_list, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes)
|
989
1030
|
|
990
1031
|
""" if combine_switch or copy_switch:
|
991
1032
|
time_list = get_time_list(time_s, time_e, 3, 'hour')
|