oafuncs 0.0.98.4__py3-none-any.whl → 0.0.98.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_down/hycom_3hourly.py +93 -109
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.5.dist-info}/METADATA +1 -1
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.5.dist-info}/RECORD +6 -8
- oafuncs/oa_down/hycom_3hourly_20250407.py +0 -1295
- oafuncs/oa_down/hycom_3hourly_20250416.py +0 -1191
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.5.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.5.dist-info}/licenses/LICENSE.txt +0 -0
- {oafuncs-0.0.98.4.dist-info → oafuncs-0.0.98.5.dist-info}/top_level.txt +0 -0
@@ -1,1191 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding=utf-8
|
3
|
-
"""
|
4
|
-
Author: Liu Kun && 16031215@qq.com
|
5
|
-
Date: 2025-04-16 11:36:15
|
6
|
-
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2025-04-16 11:36:16
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly copy.py
|
9
|
-
Description:
|
10
|
-
EditPlatform: vscode
|
11
|
-
ComputerInfo: XPS 15 9510
|
12
|
-
SystemInfo: Windows 11
|
13
|
-
Python Version: 3.12
|
14
|
-
"""
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
import datetime
|
19
|
-
import os
|
20
|
-
import random
|
21
|
-
import re
|
22
|
-
import time
|
23
|
-
import warnings
|
24
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
25
|
-
from pathlib import Path
|
26
|
-
from threading import Lock
|
27
|
-
|
28
|
-
import matplotlib.pyplot as plt
|
29
|
-
import netCDF4 as nc
|
30
|
-
import numpy as np
|
31
|
-
import pandas as pd
|
32
|
-
import requests
|
33
|
-
import xarray as xr
|
34
|
-
from rich import print
|
35
|
-
from rich.progress import Progress
|
36
|
-
|
37
|
-
from oafuncs.oa_down.idm import downloader as idm_downloader
|
38
|
-
from oafuncs.oa_down.user_agent import get_ua
|
39
|
-
from oafuncs.oa_file import file_size
|
40
|
-
from oafuncs.oa_nc import check as check_nc
|
41
|
-
from oafuncs.oa_nc import modify as modify_nc
|
42
|
-
|
43
|
-
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
|
44
|
-
|
45
|
-
__all__ = ["draw_time_range", "download"]
|
46
|
-
|
47
|
-
|
48
|
-
def _get_initial_data():
|
49
|
-
global variable_info, data_info, var_group, single_var_group
|
50
|
-
# ----------------------------------------------
|
51
|
-
# variable
|
52
|
-
variable_info = {
|
53
|
-
"u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
|
54
|
-
"v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
|
55
|
-
"temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
|
56
|
-
"salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
|
57
|
-
"ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
|
58
|
-
"u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
|
59
|
-
"v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
|
60
|
-
"temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
|
61
|
-
"salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
|
62
|
-
}
|
63
|
-
# ----------------------------------------------
|
64
|
-
# time resolution
|
65
|
-
data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
|
66
|
-
|
67
|
-
# hourly data
|
68
|
-
# dataset: GLBv0.08, GLBu0.08, GLBy0.08
|
69
|
-
data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}, "ESPC_D": {}}
|
70
|
-
|
71
|
-
# version
|
72
|
-
# version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
|
73
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
|
74
|
-
# version of GLBu0.08: 93.0
|
75
|
-
data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
|
76
|
-
# version of GLBy0.08: 93.0
|
77
|
-
data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
|
78
|
-
# version of ESPC_D: V02
|
79
|
-
data_info["hourly"]["dataset"]["ESPC_D"]["version"] = {"V02": {}}
|
80
|
-
|
81
|
-
# info details
|
82
|
-
# time range
|
83
|
-
# GLBv0.08
|
84
|
-
# 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
|
85
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "1994010112", "time_end": "2015123109"}
|
86
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "2014070112", "time_end": "2016093009"}
|
87
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "2016050112", "time_end": "2017020109"}
|
88
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "2017020112", "time_end": "2017060109"}
|
89
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "2017060112", "time_end": "2017100109"}
|
90
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "2017100112", "time_end": "2018032009"}
|
91
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
|
92
|
-
# GLBu0.08
|
93
|
-
data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018091912", "time_end": "2018120909"}
|
94
|
-
# GLBy0.08
|
95
|
-
data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018120412", "time_end": "2024090509"}
|
96
|
-
# ESPC-D
|
97
|
-
data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["time_range"] = {"time_start": "2024081012", "time_end": "2030010100"}
|
98
|
-
|
99
|
-
# classification method
|
100
|
-
# year_different: the data of different years is stored in different files
|
101
|
-
# same_path: the data of different years is stored in the same file
|
102
|
-
# var_different: the data of different variables is stored in different files
|
103
|
-
# var_year_different: the data of different variables and years is stored in different files
|
104
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
|
105
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
|
106
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
|
107
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
|
108
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
|
109
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
|
110
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
|
111
|
-
data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
|
112
|
-
data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
|
113
|
-
data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["classification"] = "single_var_year_different"
|
114
|
-
|
115
|
-
# download info
|
116
|
-
# base url
|
117
|
-
# GLBv0.08 53.X
|
118
|
-
url_53x = {}
|
119
|
-
for y_53x in range(1994, 2016):
|
120
|
-
# r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
|
121
|
-
url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
|
122
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
|
123
|
-
# GLBv0.08 56.3
|
124
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
|
125
|
-
# GLBv0.08 57.2
|
126
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
|
127
|
-
# GLBv0.08 92.8
|
128
|
-
url_928 = {
|
129
|
-
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
|
130
|
-
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
|
131
|
-
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
|
132
|
-
}
|
133
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
|
134
|
-
# GLBv0.08 57.7
|
135
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
|
136
|
-
# GLBv0.08 92.9
|
137
|
-
url_929 = {
|
138
|
-
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
|
139
|
-
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
|
140
|
-
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
|
141
|
-
}
|
142
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
|
143
|
-
# GLBv0.08 93.0
|
144
|
-
url_930_v = {
|
145
|
-
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
|
146
|
-
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
|
147
|
-
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
|
148
|
-
}
|
149
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
|
150
|
-
# GLBu0.08 93.0
|
151
|
-
url_930_u = {
|
152
|
-
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
|
153
|
-
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
|
154
|
-
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
|
155
|
-
}
|
156
|
-
data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
|
157
|
-
# GLBy0.08 93.0
|
158
|
-
uv3z_930_y = {}
|
159
|
-
ts3z_930_y = {}
|
160
|
-
ssh_930_y = {}
|
161
|
-
for y_930_y in range(2018, 2025):
|
162
|
-
uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
|
163
|
-
ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
|
164
|
-
ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
|
165
|
-
# GLBy0.08 93.0 data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
|
166
|
-
url_930_y = {
|
167
|
-
"uv3z": uv3z_930_y,
|
168
|
-
"ts3z": ts3z_930_y,
|
169
|
-
"ssh": ssh_930_y,
|
170
|
-
}
|
171
|
-
data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
|
172
|
-
# ESPC-D-V02
|
173
|
-
u3z_espc_d_v02_y = {}
|
174
|
-
v3z_espc_d_v02_y = {}
|
175
|
-
t3z_espc_d_v02_y = {}
|
176
|
-
s3z_espc_d_v02_y = {}
|
177
|
-
ssh_espc_d_v02_y = {}
|
178
|
-
for y_espc_d_v02 in range(2024, 2030):
|
179
|
-
u3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/u3z/{y_espc_d_v02}?"
|
180
|
-
v3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/v3z/{y_espc_d_v02}?"
|
181
|
-
t3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/t3z/{y_espc_d_v02}?"
|
182
|
-
s3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/s3z/{y_espc_d_v02}?"
|
183
|
-
ssh_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/ssh/{y_espc_d_v02}?"
|
184
|
-
url_espc_d_v02_y = {
|
185
|
-
"u3z": u3z_espc_d_v02_y,
|
186
|
-
"v3z": v3z_espc_d_v02_y,
|
187
|
-
"t3z": t3z_espc_d_v02_y,
|
188
|
-
"s3z": s3z_espc_d_v02_y,
|
189
|
-
"ssh": ssh_espc_d_v02_y,
|
190
|
-
}
|
191
|
-
data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["url"] = url_espc_d_v02_y
|
192
|
-
# ----------------------------------------------
|
193
|
-
var_group = {
|
194
|
-
"uv3z": ["u", "v", "u_b", "v_b"],
|
195
|
-
"ts3z": ["temp", "salt", "temp_b", "salt_b"],
|
196
|
-
"ssh": ["ssh"],
|
197
|
-
}
|
198
|
-
# ----------------------------------------------
|
199
|
-
single_var_group = {
|
200
|
-
"u3z": ["u"],
|
201
|
-
"v3z": ["v"],
|
202
|
-
"t3z": ["temp"],
|
203
|
-
"s3z": ["salt"],
|
204
|
-
"ssh": ["ssh"],
|
205
|
-
}
|
206
|
-
|
207
|
-
return variable_info, data_info, var_group, single_var_group
|
208
|
-
|
209
|
-
|
210
|
-
def draw_time_range(pic_save_folder=None):
|
211
|
-
if pic_save_folder is not None:
|
212
|
-
os.makedirs(pic_save_folder, exist_ok=True)
|
213
|
-
# Converting the data into a format suitable for plotting
|
214
|
-
data = []
|
215
|
-
for dataset, versions in data_info["hourly"]["dataset"].items():
|
216
|
-
for version, time_range in versions["version"].items():
|
217
|
-
t_s = time_range["time_range"]["time_start"]
|
218
|
-
t_e = time_range["time_range"]["time_end"]
|
219
|
-
if len(t_s) == 8:
|
220
|
-
t_s = t_s + "00"
|
221
|
-
if len(t_e) == 8:
|
222
|
-
t_e = t_e + "21"
|
223
|
-
t_s, t_e = t_s + "0000", t_e + "0000"
|
224
|
-
data.append(
|
225
|
-
{
|
226
|
-
"dataset": dataset,
|
227
|
-
"version": version,
|
228
|
-
"start_date": pd.to_datetime(t_s),
|
229
|
-
"end_date": pd.to_datetime(t_e),
|
230
|
-
}
|
231
|
-
)
|
232
|
-
|
233
|
-
# Creating a DataFrame
|
234
|
-
df = pd.DataFrame(data)
|
235
|
-
|
236
|
-
# Plotting with combined labels for datasets and versions on the y-axis
|
237
|
-
plt.figure(figsize=(12, 6))
|
238
|
-
|
239
|
-
# Combined labels for datasets and versions
|
240
|
-
combined_labels = [f"{dataset}_{version}" for dataset, version in zip(df["dataset"], df["version"])]
|
241
|
-
|
242
|
-
colors = plt.cm.viridis(np.linspace(0, 1, len(combined_labels)))
|
243
|
-
|
244
|
-
# Assigning a color to each combined label
|
245
|
-
label_colors = {label: colors[i] for i, label in enumerate(combined_labels)}
|
246
|
-
|
247
|
-
# Plotting each time range
|
248
|
-
k = 1
|
249
|
-
for _, row in df.iterrows():
|
250
|
-
plt.plot([row["start_date"], row["end_date"]], [k, k], color=label_colors[f"{row['dataset']}_{row['version']}"], linewidth=6)
|
251
|
-
# plt.text(row['end_date'], k,
|
252
|
-
# f"{row['version']}", ha='right', color='black')
|
253
|
-
ymdh_s = row["start_date"].strftime("%Y-%m-%d %H")
|
254
|
-
ymdh_e = row["end_date"].strftime("%Y-%m-%d %H")
|
255
|
-
# if k == 1 or k == len(combined_labels):
|
256
|
-
if k == 1:
|
257
|
-
plt.text(row["start_date"], k + 0.125, f"{ymdh_s}", ha="left", color="black")
|
258
|
-
plt.text(row["end_date"], k + 0.125, f"{ymdh_e}", ha="right", color="black")
|
259
|
-
else:
|
260
|
-
plt.text(row["start_date"], k + 0.125, f"{ymdh_s}", ha="right", color="black")
|
261
|
-
plt.text(row["end_date"], k + 0.125, f"{ymdh_e}", ha="left", color="black")
|
262
|
-
k += 1
|
263
|
-
|
264
|
-
# Setting the y-axis labels
|
265
|
-
plt.yticks(range(1, len(combined_labels) + 1), combined_labels)
|
266
|
-
plt.xlabel("Time")
|
267
|
-
plt.ylabel("Dataset - Version")
|
268
|
-
plt.title("Time Range of Different Versions of Datasets")
|
269
|
-
plt.xticks(rotation=45)
|
270
|
-
plt.grid(True)
|
271
|
-
plt.tight_layout()
|
272
|
-
if pic_save_folder:
|
273
|
-
plt.savefig(Path(pic_save_folder) / "HYCOM_time_range.png")
|
274
|
-
print(f"[bold green]HYCOM_time_range.png has been saved in {pic_save_folder}")
|
275
|
-
else:
|
276
|
-
plt.savefig("HYCOM_time_range.png")
|
277
|
-
print("[bold green]HYCOM_time_range.png has been saved in the current folder")
|
278
|
-
print(f"Curren folder: {os.getcwd()}")
|
279
|
-
# plt.show()
|
280
|
-
plt.close()
|
281
|
-
|
282
|
-
|
283
|
-
def _get_time_list(time_s, time_e, delta, interval_type="hour"):
|
284
|
-
"""
|
285
|
-
Description: get a list of time strings from time_s to time_e with a specified interval
|
286
|
-
Args:
|
287
|
-
time_s: start time string, e.g. '2023080203' for hours or '20230802' for days
|
288
|
-
time_e: end time string, e.g. '2023080303' for hours or '20230803' for days
|
289
|
-
delta: interval of hours or days
|
290
|
-
interval_type: 'hour' for hour interval, 'day' for day interval
|
291
|
-
Returns:
|
292
|
-
dt_list: a list of time strings
|
293
|
-
"""
|
294
|
-
time_s, time_e = str(time_s), str(time_e)
|
295
|
-
if interval_type == "hour":
|
296
|
-
time_format = "%Y%m%d%H"
|
297
|
-
delta_type = "hours"
|
298
|
-
elif interval_type == "day":
|
299
|
-
time_format = "%Y%m%d"
|
300
|
-
delta_type = "days"
|
301
|
-
# Ensure time strings are in the correct format for days
|
302
|
-
time_s = time_s[:8]
|
303
|
-
time_e = time_e[:8]
|
304
|
-
else:
|
305
|
-
raise ValueError("interval_type must be 'hour' or 'day'")
|
306
|
-
|
307
|
-
dt = datetime.datetime.strptime(time_s, time_format)
|
308
|
-
dt_list = []
|
309
|
-
while dt.strftime(time_format) <= time_e:
|
310
|
-
dt_list.append(dt.strftime(time_format))
|
311
|
-
dt += datetime.timedelta(**{delta_type: delta})
|
312
|
-
return dt_list
|
313
|
-
|
314
|
-
|
315
|
-
def _transform_time(time_str):
|
316
|
-
# old_time = '2023080203'
|
317
|
-
# time_new = '2023-08-02T03%3A00%3A00Z'
|
318
|
-
time_new = f"{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z"
|
319
|
-
return time_new
|
320
|
-
|
321
|
-
|
322
|
-
def _get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_str_end=None, mode="single_depth", depth=None, level_num=None):
|
323
|
-
query_dict = {
|
324
|
-
"var": variable_info[var]["var_name"],
|
325
|
-
"north": lat_max,
|
326
|
-
"west": lon_min,
|
327
|
-
"east": lon_max,
|
328
|
-
"south": lat_min,
|
329
|
-
"horizStride": 1,
|
330
|
-
"time": None,
|
331
|
-
"time_start": None,
|
332
|
-
"time_end": None,
|
333
|
-
"timeStride": None,
|
334
|
-
"vertCoord": None,
|
335
|
-
"vertStride": None,
|
336
|
-
"addLatLon": "true",
|
337
|
-
"accept": "netcdf4",
|
338
|
-
}
|
339
|
-
|
340
|
-
if time_str_end is not None:
|
341
|
-
query_dict["time_start"] = _transform_time(time_str_ymdh)
|
342
|
-
query_dict["time_end"] = _transform_time(time_str_end)
|
343
|
-
query_dict["timeStride"] = 1
|
344
|
-
else:
|
345
|
-
query_dict["time"] = _transform_time(time_str_ymdh)
|
346
|
-
|
347
|
-
def get_nearest_level_index(depth):
|
348
|
-
level_depth = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 125.0, 150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0, 1250.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000]
|
349
|
-
return min(range(len(level_depth)), key=lambda i: abs(level_depth[i] - depth))
|
350
|
-
|
351
|
-
if var not in ["ssh", "u_b", "v_b", "temp_b", "salt_b"] and var in ["u", "v", "temp", "salt"]:
|
352
|
-
if mode == "depth":
|
353
|
-
if depth < 0 or depth > 5000:
|
354
|
-
print("Please ensure the depth is in the range of 0-5000 m")
|
355
|
-
query_dict["vertCoord"] = get_nearest_level_index(depth) + 1
|
356
|
-
elif mode == "level":
|
357
|
-
if level_num < 1 or level_num > 40:
|
358
|
-
print("Please ensure the level_num is in the range of 1-40")
|
359
|
-
query_dict["vertCoord"] = max(1, min(level_num, 40))
|
360
|
-
elif mode == "full":
|
361
|
-
query_dict["vertStride"] = 1
|
362
|
-
else:
|
363
|
-
raise ValueError("Invalid mode. Choose from 'depth', 'level', or 'full'")
|
364
|
-
|
365
|
-
query_dict = {k: v for k, v in query_dict.items() if v is not None}
|
366
|
-
|
367
|
-
return query_dict
|
368
|
-
|
369
|
-
|
370
|
-
def _check_time_in_dataset_and_version(time_input, time_end=None):
|
371
|
-
# 判断是处理单个时间点还是时间范围
|
372
|
-
is_single_time = time_end is None
|
373
|
-
|
374
|
-
# 如果是单个时间点,初始化时间范围
|
375
|
-
if is_single_time:
|
376
|
-
time_start = int(time_input)
|
377
|
-
time_end = time_start
|
378
|
-
time_input_str = str(time_input)
|
379
|
-
else:
|
380
|
-
time_start = int(time_input)
|
381
|
-
time_end = int(time_end)
|
382
|
-
time_input_str = f"{time_input}-{time_end}"
|
383
|
-
|
384
|
-
# 根据时间长度补全时间格式
|
385
|
-
if len(str(time_start)) == 8:
|
386
|
-
time_start = str(time_start) + "00"
|
387
|
-
if len(str(time_end)) == 8:
|
388
|
-
time_end = str(time_end) + "21"
|
389
|
-
time_start, time_end = int(time_start), int(time_end)
|
390
|
-
|
391
|
-
d_list = []
|
392
|
-
v_list = []
|
393
|
-
trange_list = []
|
394
|
-
have_data = False
|
395
|
-
|
396
|
-
# 遍历数据集和版本
|
397
|
-
for dataset_name in data_info["hourly"]["dataset"].keys():
|
398
|
-
for version_name in data_info["hourly"]["dataset"][dataset_name]["version"].keys():
|
399
|
-
time_s, time_e = list(data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"].values())
|
400
|
-
time_s, time_e = str(time_s), str(time_e)
|
401
|
-
if len(time_s) == 8:
|
402
|
-
time_s = time_s + "00"
|
403
|
-
if len(time_e) == 8:
|
404
|
-
time_e = time_e + "21"
|
405
|
-
# 检查时间是否在数据集的时间范围内
|
406
|
-
if is_single_time:
|
407
|
-
if time_start >= int(time_s) and time_start <= int(time_e):
|
408
|
-
d_list.append(dataset_name)
|
409
|
-
v_list.append(version_name)
|
410
|
-
trange_list.append(f"{time_s}-{time_e}")
|
411
|
-
have_data = True
|
412
|
-
else:
|
413
|
-
if time_start >= int(time_s) and time_end <= int(time_e):
|
414
|
-
d_list.append(dataset_name)
|
415
|
-
v_list.append(version_name)
|
416
|
-
trange_list.append(f"{time_s}-{time_e}")
|
417
|
-
have_data = True
|
418
|
-
|
419
|
-
# 输出结果
|
420
|
-
if match_time is None:
|
421
|
-
print(f"[bold red]{time_input_str} is in the following dataset and version:")
|
422
|
-
if have_data:
|
423
|
-
if match_time is None:
|
424
|
-
for d, v, trange in zip(d_list, v_list, trange_list):
|
425
|
-
print(f"[bold blue]{d} {v} {trange}")
|
426
|
-
if is_single_time:
|
427
|
-
return True
|
428
|
-
else:
|
429
|
-
base_url_s = _get_base_url(d_list[0], v_list[0], "u", str(time_start))
|
430
|
-
base_url_e = _get_base_url(d_list[0], v_list[0], "u", str(time_end))
|
431
|
-
if base_url_s == base_url_e:
|
432
|
-
return True
|
433
|
-
else:
|
434
|
-
print(f"[bold red]{time_start} to {time_end} is in different datasets or versions, so you can't download them together")
|
435
|
-
return False
|
436
|
-
else:
|
437
|
-
print(f"[bold red]{time_input_str} is not in any dataset and version")
|
438
|
-
return False
|
439
|
-
|
440
|
-
|
441
|
-
def _ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time_input, time_end=None):
|
442
|
-
# 根据时间长度补全时间格式
|
443
|
-
if len(str(time_input)) == 8:
|
444
|
-
time_input = str(time_input) + "00"
|
445
|
-
time_start = int(time_input)
|
446
|
-
if time_end is not None:
|
447
|
-
if len(str(time_end)) == 8:
|
448
|
-
time_end = str(time_end) + "21"
|
449
|
-
time_end = int(time_end)
|
450
|
-
else:
|
451
|
-
time_end = time_start
|
452
|
-
|
453
|
-
# 检查指定的数据集和版本是否存在
|
454
|
-
if dataset_name not in data_info["hourly"]["dataset"]:
|
455
|
-
print(f"[bold red]Dataset {dataset_name} not found.")
|
456
|
-
return False
|
457
|
-
if version_name not in data_info["hourly"]["dataset"][dataset_name]["version"]:
|
458
|
-
print(f"[bold red]Version {version_name} not found in dataset {dataset_name}.")
|
459
|
-
return False
|
460
|
-
|
461
|
-
# 获取指定数据集和版本的时间范围
|
462
|
-
time_range = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"]
|
463
|
-
time_s, time_e = list(time_range.values())
|
464
|
-
time_s, time_e = str(time_s), str(time_e)
|
465
|
-
if len(time_s) == 8:
|
466
|
-
time_s = time_s + "00"
|
467
|
-
if len(time_e) == 8:
|
468
|
-
time_e = time_e + "21"
|
469
|
-
time_s, time_e = int(time_s), int(time_e)
|
470
|
-
|
471
|
-
# 检查时间是否在指定数据集和版本的时间范围内
|
472
|
-
if time_start >= time_s and time_end <= time_e:
|
473
|
-
print(f"[bold blue]Time {time_input} to {time_end} is within dataset {dataset_name} and version {version_name}.")
|
474
|
-
return True
|
475
|
-
else:
|
476
|
-
print(f"[bold red]Time {time_input} to {time_end} is not within dataset {dataset_name} and version {version_name}.")
|
477
|
-
return False
|
478
|
-
|
479
|
-
|
480
|
-
def _direct_choose_dataset_and_version(time_input, time_end=None):
|
481
|
-
# 假设 data_info 是一个字典,包含了数据集和版本的信息
|
482
|
-
# 示例结构:data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
|
483
|
-
|
484
|
-
if len(str(time_input)) == 8:
|
485
|
-
time_input = str(time_input) + "00"
|
486
|
-
|
487
|
-
# 如果 time_end 是 None,则将 time_input 的值赋给它
|
488
|
-
if time_end is None:
|
489
|
-
time_end = time_input
|
490
|
-
|
491
|
-
# 处理开始和结束时间,确保它们是完整的 ymdh 格式
|
492
|
-
time_start, time_end = int(str(time_input)[:10]), int(str(time_end)[:10])
|
493
|
-
|
494
|
-
dataset_name_out, version_name_out = None, None
|
495
|
-
|
496
|
-
for dataset_name in data_info["hourly"]["dataset"].keys():
|
497
|
-
for version_name in data_info["hourly"]["dataset"][dataset_name]["version"].keys():
|
498
|
-
[time_s, time_e] = list(data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"].values())
|
499
|
-
time_s, time_e = str(time_s), str(time_e)
|
500
|
-
if len(time_s) == 8:
|
501
|
-
time_s = time_s + "00"
|
502
|
-
if len(time_e) == 8:
|
503
|
-
time_e = time_e + "21"
|
504
|
-
time_s, time_e = int(time_s), int(time_e)
|
505
|
-
|
506
|
-
# 检查时间是否在数据集版本的时间范围内
|
507
|
-
if time_start >= time_s and time_end <= time_e:
|
508
|
-
dataset_name_out, version_name_out = dataset_name, version_name
|
509
|
-
|
510
|
-
if dataset_name_out is not None and version_name_out is not None:
|
511
|
-
if match_time is None:
|
512
|
-
print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
|
513
|
-
|
514
|
-
# 如果没有找到匹配的数据集和版本,会返回 None
|
515
|
-
return dataset_name_out, version_name_out
|
516
|
-
|
517
|
-
|
518
|
-
def _get_base_url(dataset_name, version_name, var, ymdh_str):
|
519
|
-
year_str = int(ymdh_str[:4])
|
520
|
-
url_dict = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["url"]
|
521
|
-
classification_method = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["classification"]
|
522
|
-
if classification_method == "year_different":
|
523
|
-
base_url = url_dict[str(year_str)]
|
524
|
-
elif classification_method == "same_path":
|
525
|
-
base_url = url_dict
|
526
|
-
elif classification_method == "var_different":
|
527
|
-
base_url = None
|
528
|
-
for key, value in var_group.items():
|
529
|
-
if var in value:
|
530
|
-
base_url = url_dict[key]
|
531
|
-
break
|
532
|
-
if base_url is None:
|
533
|
-
print("Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]")
|
534
|
-
elif classification_method == "var_year_different":
|
535
|
-
if dataset_name == "GLBy0.08" and version_name == "93.0":
|
536
|
-
mdh_str = ymdh_str[4:]
|
537
|
-
# GLBy0.08 93.0
|
538
|
-
# data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
|
539
|
-
if "010100" <= mdh_str <= "010109":
|
540
|
-
year_str = int(ymdh_str[:4]) - 1
|
541
|
-
else:
|
542
|
-
year_str = int(ymdh_str[:4])
|
543
|
-
base_url = None
|
544
|
-
for key, value in var_group.items():
|
545
|
-
if var in value:
|
546
|
-
base_url = url_dict[key][str(year_str)]
|
547
|
-
break
|
548
|
-
if base_url is None:
|
549
|
-
print("Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]")
|
550
|
-
elif classification_method == "single_var_year_different":
|
551
|
-
base_url = None
|
552
|
-
if dataset_name == "ESPC_D" and version_name == "V02":
|
553
|
-
mdh_str = ymdh_str[4:]
|
554
|
-
# ESPC-D-V02
|
555
|
-
if "010100" <= mdh_str <= "010109":
|
556
|
-
year_str = int(ymdh_str[:4]) - 1
|
557
|
-
else:
|
558
|
-
year_str = int(ymdh_str[:4])
|
559
|
-
for key, value in single_var_group.items():
|
560
|
-
if var in value:
|
561
|
-
base_url = url_dict[key][str(year_str)]
|
562
|
-
break
|
563
|
-
if base_url is None:
|
564
|
-
print("Please ensure the var is in [u,v,temp,salt,ssh]")
|
565
|
-
return base_url
|
566
|
-
|
567
|
-
|
568
|
-
def _get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict):
|
569
|
-
base_url = _get_base_url(dataset_name, version_name, var, ymdh_str)
|
570
|
-
if isinstance(query_dict["var"], str):
|
571
|
-
query_dict["var"] = [query_dict["var"]]
|
572
|
-
target_url = base_url + "&".join(f"var={var}" for var in query_dict["var"]) + "&" + "&".join(f"{key}={value}" for key, value in query_dict.items() if key != "var")
|
573
|
-
return target_url
|
574
|
-
|
575
|
-
|
576
|
-
def _clear_existing_file(file_full_path):
|
577
|
-
if os.path.exists(file_full_path):
|
578
|
-
os.remove(file_full_path)
|
579
|
-
print(f"{file_full_path} has been removed")
|
580
|
-
|
581
|
-
|
582
|
-
def _check_existing_file(file_full_path, avg_size):
|
583
|
-
if os.path.exists(file_full_path):
|
584
|
-
print(f"[bold #FFA54F]{file_full_path} exists")
|
585
|
-
fsize = file_size(file_full_path)
|
586
|
-
delta_size_ratio = (fsize - avg_size) / avg_size
|
587
|
-
if abs(delta_size_ratio) > 0.025:
|
588
|
-
if check_nc(file_full_path):
|
589
|
-
return True
|
590
|
-
else:
|
591
|
-
# print(f"File size is abnormal and cannot be opened, {file_full_path}: {fsize:.2f} KB")
|
592
|
-
return False
|
593
|
-
else:
|
594
|
-
return True
|
595
|
-
else:
|
596
|
-
return False
|
597
|
-
|
598
|
-
|
599
|
-
def _get_mean_size_move(same_file, current_file):
|
600
|
-
with fsize_dict_lock:
|
601
|
-
if same_file not in fsize_dict.keys():
|
602
|
-
fsize_dict[same_file] = {"size_list": [], "mean_size": 1.0}
|
603
|
-
|
604
|
-
tolerance_ratio = 0.025
|
605
|
-
current_file_size = file_size(current_file)
|
606
|
-
|
607
|
-
if fsize_dict[same_file]["size_list"]:
|
608
|
-
fsize_dict[same_file]["mean_size"] = sum(fsize_dict[same_file]["size_list"]) / len(fsize_dict[same_file]["size_list"])
|
609
|
-
fsize_dict[same_file]["mean_size"] = max(fsize_dict[same_file]["mean_size"], 1.0)
|
610
|
-
else:
|
611
|
-
fsize_dict[same_file]["mean_size"] = 1.0
|
612
|
-
|
613
|
-
size_difference_ratio = (current_file_size - fsize_dict[same_file]["mean_size"]) / fsize_dict[same_file]["mean_size"]
|
614
|
-
|
615
|
-
if abs(size_difference_ratio) > tolerance_ratio:
|
616
|
-
if check_nc(current_file, print_messages=False):
|
617
|
-
fsize_dict[same_file]["size_list"] = [current_file_size]
|
618
|
-
fsize_dict[same_file]["mean_size"] = current_file_size
|
619
|
-
else:
|
620
|
-
_clear_existing_file(current_file)
|
621
|
-
# print(f"File size is abnormal, may need to be downloaded again, file size: {current_file_size:.2f} KB")
|
622
|
-
else:
|
623
|
-
fsize_dict[same_file]["size_list"].append(current_file_size)
|
624
|
-
|
625
|
-
return fsize_dict[same_file]["mean_size"]
|
626
|
-
|
627
|
-
|
628
|
-
def _check_ftime(nc_file, tname="time", if_print=False):
|
629
|
-
if not os.path.exists(nc_file):
|
630
|
-
return False
|
631
|
-
nc_file = str(nc_file)
|
632
|
-
try:
|
633
|
-
ds = xr.open_dataset(nc_file)
|
634
|
-
real_time = ds[tname].values[0]
|
635
|
-
ds.close()
|
636
|
-
real_time = str(real_time)[:13]
|
637
|
-
real_time = real_time.replace("-", "").replace("T", "")
|
638
|
-
f_time = re.findall(r"\d{10}", nc_file)[0]
|
639
|
-
if real_time == f_time:
|
640
|
-
return True
|
641
|
-
else:
|
642
|
-
if if_print:
|
643
|
-
print(f"[bold #daff5c]File time error, file/real time: [bold blue]{f_time}/{real_time}")
|
644
|
-
return False
|
645
|
-
except Exception as e:
|
646
|
-
if if_print:
|
647
|
-
print(f"[bold #daff5c]File time check failed, {nc_file}: {e}")
|
648
|
-
return False
|
649
|
-
|
650
|
-
|
651
|
-
def _correct_time(nc_file):
|
652
|
-
dataset = nc.Dataset(nc_file)
|
653
|
-
time_units = dataset.variables["time"].units
|
654
|
-
dataset.close()
|
655
|
-
origin_str = time_units.split("since")[1].strip()
|
656
|
-
origin_datetime = datetime.datetime.strptime(origin_str, "%Y-%m-%d %H:%M:%S")
|
657
|
-
given_date_str = re.findall(r"\d{10}", str(nc_file))[0]
|
658
|
-
given_datetime = datetime.datetime.strptime(given_date_str, "%Y%m%d%H")
|
659
|
-
time_difference = (given_datetime - origin_datetime).total_seconds()
|
660
|
-
if "hours" in time_units:
|
661
|
-
time_difference /= 3600
|
662
|
-
elif "days" in time_units:
|
663
|
-
time_difference /= 3600 * 24
|
664
|
-
modify_nc(nc_file, "time", None, time_difference)
|
665
|
-
|
666
|
-
|
667
|
-
def _download_file(target_url, store_path, file_name, cover=False):
|
668
|
-
fname = Path(store_path) / file_name
|
669
|
-
file_name_split = file_name.split("_")
|
670
|
-
file_name_split = file_name_split[:-1]
|
671
|
-
same_file = "_".join(file_name_split) + "*nc"
|
672
|
-
|
673
|
-
if match_time is not None:
|
674
|
-
if check_nc(fname, print_messages=False):
|
675
|
-
if not _check_ftime(fname, if_print=True):
|
676
|
-
if match_time:
|
677
|
-
_correct_time(fname)
|
678
|
-
count_dict["skip"] += 1
|
679
|
-
else:
|
680
|
-
_clear_existing_file(fname)
|
681
|
-
count_dict["no_data"] += 1
|
682
|
-
else:
|
683
|
-
count_dict["skip"] += 1
|
684
|
-
print(f"[bold green]{file_name} is correct")
|
685
|
-
return
|
686
|
-
|
687
|
-
if not cover and os.path.exists(fname):
|
688
|
-
print(f"[bold #FFA54F]{fname} exists, skipping ...")
|
689
|
-
count_dict["skip"] += 1
|
690
|
-
return
|
691
|
-
|
692
|
-
if same_file not in fsize_dict.keys():
|
693
|
-
check_nc(fname, delete_if_invalid=True, print_messages=False)
|
694
|
-
|
695
|
-
get_mean_size = _get_mean_size_move(same_file, fname)
|
696
|
-
|
697
|
-
if _check_existing_file(fname, get_mean_size):
|
698
|
-
count_dict["skip"] += 1
|
699
|
-
return
|
700
|
-
|
701
|
-
_clear_existing_file(fname)
|
702
|
-
|
703
|
-
if not use_idm:
|
704
|
-
print(f"[bold #f0f6d0]Requesting {file_name} ...")
|
705
|
-
s = requests.Session()
|
706
|
-
download_success = False
|
707
|
-
request_times = 0
|
708
|
-
|
709
|
-
def calculate_wait_time(time_str, target_url):
|
710
|
-
time_pattern = r"\d{10}"
|
711
|
-
times_in_str = re.findall(time_pattern, time_str)
|
712
|
-
num_times_str = len(times_in_str)
|
713
|
-
|
714
|
-
if num_times_str > 1:
|
715
|
-
delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
|
716
|
-
delta_t = delta_t.total_seconds() / 3600
|
717
|
-
delta_t = delta_t / 3 + 1
|
718
|
-
else:
|
719
|
-
delta_t = 1
|
720
|
-
num_var = int(target_url.count("var="))
|
721
|
-
if num_var <= 0:
|
722
|
-
num_var = 1
|
723
|
-
return int(delta_t * 5 * 60 * num_var)
|
724
|
-
|
725
|
-
max_timeout = calculate_wait_time(file_name, target_url)
|
726
|
-
print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
|
727
|
-
|
728
|
-
download_time_s = datetime.datetime.now()
|
729
|
-
order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
|
730
|
-
while not download_success:
|
731
|
-
if request_times >= 10:
|
732
|
-
print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
|
733
|
-
count_dict["fail"] += 1
|
734
|
-
break
|
735
|
-
if request_times > 0:
|
736
|
-
print(f"[bold #ffe5c0]Retrying the {order_list[request_times - 1]} time...")
|
737
|
-
try:
|
738
|
-
referer_center = target_url.split("?")[0].split("ncss/")[-1]
|
739
|
-
headers = {
|
740
|
-
"User-Agent": get_ua(), # 后面几项可以不加,依旧能下载
|
741
|
-
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
742
|
-
"Accept-Encoding": "gzip, deflate, br, zstd",
|
743
|
-
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
744
|
-
"Referer": rf"https://ncss.hycom.org/thredds/ncss/grid/{referer_center}/dataset.html",
|
745
|
-
}
|
746
|
-
response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout))
|
747
|
-
response.raise_for_status()
|
748
|
-
with open(fname, "wb") as f:
|
749
|
-
print(f"[bold #96cbd7]Downloading {file_name} ...")
|
750
|
-
for chunk in response.iter_content(chunk_size=1024):
|
751
|
-
if chunk:
|
752
|
-
f.write(chunk)
|
753
|
-
|
754
|
-
f.close()
|
755
|
-
|
756
|
-
if os.path.exists(fname):
|
757
|
-
download_success = True
|
758
|
-
download_time_e = datetime.datetime.now()
|
759
|
-
download_delta = download_time_e - download_time_s
|
760
|
-
print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
|
761
|
-
count_dict["success"] += 1
|
762
|
-
|
763
|
-
except requests.exceptions.HTTPError as errh:
|
764
|
-
print(f"Http Error: {errh}")
|
765
|
-
except requests.exceptions.ConnectionError as errc:
|
766
|
-
print(f"Error Connecting: {errc}")
|
767
|
-
except requests.exceptions.Timeout as errt:
|
768
|
-
print(f"Timeout Error: {errt}")
|
769
|
-
except requests.exceptions.RequestException as err:
|
770
|
-
print(f"OOps: Something Else: {err}")
|
771
|
-
|
772
|
-
time.sleep(3)
|
773
|
-
request_times += 1
|
774
|
-
else:
|
775
|
-
idm_downloader(target_url, store_path, file_name, given_idm_engine)
|
776
|
-
idm_download_list.append(fname)
|
777
|
-
print(f"[bold #3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been submit to IDM for downloading")
|
778
|
-
|
779
|
-
|
780
|
-
def _check_hour_is_valid(ymdh_str):
|
781
|
-
hh = int(str(ymdh_str[-2:]))
|
782
|
-
if hh in [0, 3, 6, 9, 12, 15, 18, 21]:
|
783
|
-
return True
|
784
|
-
else:
|
785
|
-
return False
|
786
|
-
|
787
|
-
|
788
|
-
def _check_dataset_version(dataset_name, version_name, download_time, download_time_end=None):
|
789
|
-
if dataset_name is not None and version_name is not None:
|
790
|
-
just_ensure = _ensure_time_in_specific_dataset_and_version(dataset_name, version_name, download_time, download_time_end)
|
791
|
-
if just_ensure:
|
792
|
-
return dataset_name, version_name
|
793
|
-
else:
|
794
|
-
return None, None
|
795
|
-
|
796
|
-
download_time_str = str(download_time)
|
797
|
-
|
798
|
-
if len(download_time_str) == 8:
|
799
|
-
download_time_str = download_time_str + "00"
|
800
|
-
|
801
|
-
if download_time_end is None and not _check_hour_is_valid(download_time_str):
|
802
|
-
print("Please ensure the hour is 00, 03, 06, 09, 12, 15, 18, 21")
|
803
|
-
raise ValueError("The hour is invalid")
|
804
|
-
|
805
|
-
if download_time_end is not None:
|
806
|
-
if len(str(download_time_end)) == 8:
|
807
|
-
download_time_end = str(download_time_end) + "21"
|
808
|
-
have_data = _check_time_in_dataset_and_version(download_time_str, download_time_end)
|
809
|
-
if have_data:
|
810
|
-
return _direct_choose_dataset_and_version(download_time_str, download_time_end)
|
811
|
-
else:
|
812
|
-
have_data = _check_time_in_dataset_and_version(download_time_str)
|
813
|
-
if have_data:
|
814
|
-
return _direct_choose_dataset_and_version(download_time_str)
|
815
|
-
|
816
|
-
return None, None
|
817
|
-
|
818
|
-
|
819
|
-
def _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end=None):
|
820
|
-
ymdh_str = str(download_time)
|
821
|
-
if depth is not None and level_num is not None:
|
822
|
-
print("Please ensure the depth or level_num is None")
|
823
|
-
print("Progress will use the depth")
|
824
|
-
which_mode = "depth"
|
825
|
-
elif depth is not None and level_num is None:
|
826
|
-
print(f"Data of single depth (~{depth} m) will be downloaded...")
|
827
|
-
which_mode = "depth"
|
828
|
-
elif level_num is not None and depth is None:
|
829
|
-
print(f"Data of single level ({level_num}) will be downloaded...")
|
830
|
-
which_mode = "level"
|
831
|
-
else:
|
832
|
-
which_mode = "full"
|
833
|
-
query_dict = _get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, download_time_end, which_mode, depth, level_num)
|
834
|
-
submit_url = _get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict)
|
835
|
-
return submit_url
|
836
|
-
|
837
|
-
|
838
|
-
def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time="2024083100", download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, cover=False):
|
839
|
-
print("[bold #ecdbfe]-" * mark_len)
|
840
|
-
download_time = str(download_time)
|
841
|
-
if download_time_end is not None:
|
842
|
-
download_time_end = str(download_time_end)
|
843
|
-
dataset_name, version_name = _check_dataset_version(dataset_name, version_name, download_time, download_time_end)
|
844
|
-
else:
|
845
|
-
dataset_name, version_name = _check_dataset_version(dataset_name, version_name, download_time)
|
846
|
-
if dataset_name is None and version_name is None:
|
847
|
-
count_dict["no_data"] += 1
|
848
|
-
if download_time_end is not None:
|
849
|
-
count_dict["no_data_list"].append(f"{download_time}-{download_time_end}")
|
850
|
-
else:
|
851
|
-
count_dict["no_data_list"].append(download_time)
|
852
|
-
return
|
853
|
-
|
854
|
-
if isinstance(var, str):
|
855
|
-
var = [var]
|
856
|
-
|
857
|
-
if isinstance(var, list):
|
858
|
-
if len(var) == 1:
|
859
|
-
var = var[0]
|
860
|
-
submit_url = _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
|
861
|
-
file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
|
862
|
-
if download_time_end is not None:
|
863
|
-
file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}-{download_time_end}.nc"
|
864
|
-
_download_file(submit_url, store_path, file_name, cover)
|
865
|
-
else:
|
866
|
-
if download_time < "2024081012":
|
867
|
-
varlist = [_ for _ in var]
|
868
|
-
for key, value in var_group.items():
|
869
|
-
current_group = []
|
870
|
-
for v in varlist:
|
871
|
-
if v in value:
|
872
|
-
current_group.append(v)
|
873
|
-
if len(current_group) == 0:
|
874
|
-
continue
|
875
|
-
|
876
|
-
var = current_group[0]
|
877
|
-
submit_url = _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
|
878
|
-
file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
|
879
|
-
old_str = f"var={variable_info[var]['var_name']}"
|
880
|
-
new_str = f"var={variable_info[var]['var_name']}"
|
881
|
-
if len(current_group) > 1:
|
882
|
-
for v in current_group[1:]:
|
883
|
-
new_str = f"{new_str}&var={variable_info[v]['var_name']}"
|
884
|
-
submit_url = submit_url.replace(old_str, new_str)
|
885
|
-
file_name = f"HYCOM_{key}_{download_time}.nc"
|
886
|
-
if download_time_end is not None:
|
887
|
-
file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc"
|
888
|
-
_download_file(submit_url, store_path, file_name, cover)
|
889
|
-
else:
|
890
|
-
for v in var:
|
891
|
-
submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
|
892
|
-
file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
|
893
|
-
if download_time_end is not None:
|
894
|
-
file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}-{download_time_end}.nc"
|
895
|
-
_download_file(submit_url, store_path, file_name, cover)
|
896
|
-
|
897
|
-
|
898
|
-
def _convert_full_name_to_short_name(full_name):
|
899
|
-
for var, info in variable_info.items():
|
900
|
-
if full_name == info["var_name"] or full_name == info["standard_name"] or full_name == var:
|
901
|
-
return var
|
902
|
-
print("[bold #FFE4E1]Please ensure the var is in:\n[bold blue]u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b")
|
903
|
-
print("or")
|
904
|
-
print("[bold blue]water_u, water_v, water_temp, salinity, surf_el, water_u_bottom, water_v_bottom, water_temp_bottom, salinity_bottom")
|
905
|
-
return False
|
906
|
-
|
907
|
-
|
908
|
-
def _download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover):
|
909
|
-
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, cover)
|
910
|
-
|
911
|
-
|
912
|
-
def _done_callback(future, progress, task, total, counter_lock):
|
913
|
-
global parallel_counter
|
914
|
-
with counter_lock:
|
915
|
-
parallel_counter += 1
|
916
|
-
progress.update(task, advance=1, description=f"[cyan]{bar_desc} {parallel_counter}/{total}")
|
917
|
-
|
918
|
-
|
919
|
-
def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, cover=False, interval_hour=3):
|
920
|
-
ymdh_time_s, ymdh_time_e = str(time_s), str(time_e)
|
921
|
-
if num_workers is not None and num_workers > 1:
|
922
|
-
global parallel_counter
|
923
|
-
parallel_counter = 0
|
924
|
-
counter_lock = Lock()
|
925
|
-
if ymdh_time_s == ymdh_time_e:
|
926
|
-
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, cover)
|
927
|
-
elif int(ymdh_time_s) < int(ymdh_time_e):
|
928
|
-
if match_time is None:
|
929
|
-
print("*" * mark_len)
|
930
|
-
print("Downloading a series of files...")
|
931
|
-
time_list = _get_time_list(ymdh_time_s, ymdh_time_e, interval_hour, "hour")
|
932
|
-
with Progress() as progress:
|
933
|
-
task = progress.add_task(f"[cyan]{bar_desc}", total=len(time_list))
|
934
|
-
if num_workers is None or num_workers <= 1:
|
935
|
-
for i, time_str in enumerate(time_list):
|
936
|
-
_prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, cover)
|
937
|
-
progress.update(task, advance=1, description=f"[cyan]{bar_desc} {i + 1}/{len(time_list)}")
|
938
|
-
else:
|
939
|
-
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
940
|
-
futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, cover) for time_str in time_list]
|
941
|
-
for feature in as_completed(futures):
|
942
|
-
_done_callback(feature, progress, task, len(time_list), counter_lock)
|
943
|
-
else:
|
944
|
-
print("[bold red]Please ensure the time_s is no more than time_e")
|
945
|
-
|
946
|
-
|
947
|
-
def download(
|
948
|
-
variables,
|
949
|
-
start_time,
|
950
|
-
end_time=None,
|
951
|
-
lon_min=0,
|
952
|
-
lon_max=359.92,
|
953
|
-
lat_min=-80,
|
954
|
-
lat_max=90,
|
955
|
-
depth=None,
|
956
|
-
level=None,
|
957
|
-
output_dir=None,
|
958
|
-
dataset=None,
|
959
|
-
version=None,
|
960
|
-
workers=None,
|
961
|
-
overwrite=False,
|
962
|
-
idm_path=None,
|
963
|
-
validate_time=None,
|
964
|
-
interval_hours=3,
|
965
|
-
):
|
966
|
-
"""
|
967
|
-
Download data for a single time or a series of times.
|
968
|
-
|
969
|
-
Parameters:
|
970
|
-
variables (str or list): Variable names to download. Examples include:
|
971
|
-
'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b'
|
972
|
-
or their full names like 'water_u', 'water_v', etc.
|
973
|
-
start_time (str): Start time in the format 'YYYYMMDDHH' or 'YYYYMMDD'.
|
974
|
-
If hour is included, it must be one of [00, 03, 06, 09, 12, 15, 18, 21].
|
975
|
-
end_time (str, optional): End time in the format 'YYYYMMDDHH' or 'YYYYMMDD'.
|
976
|
-
If not provided, only data for the start_time will be downloaded.
|
977
|
-
lon_min (float, optional): Minimum longitude. Default is 0.
|
978
|
-
lon_max (float, optional): Maximum longitude. Default is 359.92.
|
979
|
-
lat_min (float, optional): Minimum latitude. Default is -80.
|
980
|
-
lat_max (float, optional): Maximum latitude. Default is 90.
|
981
|
-
depth (float, optional): Depth in meters. If specified, data for a single depth
|
982
|
-
will be downloaded. Suggested range: [0, 5000].
|
983
|
-
level (int, optional): Vertical level number. If specified, data for a single
|
984
|
-
level will be downloaded. Suggested range: [1, 40].
|
985
|
-
output_dir (str, optional): Directory to save downloaded files. If not provided,
|
986
|
-
files will be saved in the current working directory.
|
987
|
-
dataset (str, optional): Dataset name. Examples: 'GLBv0.08', 'GLBu0.08', etc.
|
988
|
-
If not provided, the dataset will be chosen based on the time range.
|
989
|
-
version (str, optional): Dataset version. Examples: '53.X', '56.3', etc.
|
990
|
-
If not provided, the version will be chosen based on the time range.
|
991
|
-
workers (int, optional): Number of parallel workers. Default is 1. Maximum is 10.
|
992
|
-
overwrite (bool, optional): Whether to overwrite existing files. Default is False.
|
993
|
-
idm_path (str, optional): Path to the Internet Download Manager (IDM) executable.
|
994
|
-
If provided, IDM will be used for downloading.
|
995
|
-
validate_time (bool, optional): Time validation mode. Default is None.
|
996
|
-
- None: Only download data.
|
997
|
-
- True: Modify the real time in the data to match the file name.
|
998
|
-
- False: Check if the real time matches the file name. If not, delete the file.
|
999
|
-
interval_hours (int, optional): Time interval in hours for downloading data.
|
1000
|
-
Default is 3. Examples: 3, 6, etc.
|
1001
|
-
|
1002
|
-
Returns:
|
1003
|
-
None
|
1004
|
-
|
1005
|
-
Example:
|
1006
|
-
>>> download(
|
1007
|
-
variables='u',
|
1008
|
-
start_time='2024083100',
|
1009
|
-
end_time='2024090100',
|
1010
|
-
lon_min=0,
|
1011
|
-
lon_max=359.92,
|
1012
|
-
lat_min=-80,
|
1013
|
-
lat_max=90,
|
1014
|
-
depth=None,
|
1015
|
-
level=None,
|
1016
|
-
output_dir=None,
|
1017
|
-
dataset=None,
|
1018
|
-
version=None,
|
1019
|
-
workers=4,
|
1020
|
-
overwrite=False,
|
1021
|
-
idm_path=None,
|
1022
|
-
validate_time=None,
|
1023
|
-
interval_hours=3,
|
1024
|
-
)
|
1025
|
-
"""
|
1026
|
-
from oafuncs.oa_tool import pbar
|
1027
|
-
|
1028
|
-
_get_initial_data()
|
1029
|
-
|
1030
|
-
if dataset is None and version is None:
|
1031
|
-
if validate_time is None:
|
1032
|
-
print("Dataset and version will be chosen based on the time range.")
|
1033
|
-
print("If multiple datasets or versions exist, the latest one will be used.")
|
1034
|
-
elif dataset is None:
|
1035
|
-
print("Please provide a dataset name if specifying a version.")
|
1036
|
-
elif version is None:
|
1037
|
-
print("Please provide a version if specifying a dataset name.")
|
1038
|
-
else:
|
1039
|
-
print("Using the specified dataset and version.")
|
1040
|
-
|
1041
|
-
if isinstance(variables, list):
|
1042
|
-
if len(variables) == 1:
|
1043
|
-
variables = _convert_full_name_to_short_name(variables[0])
|
1044
|
-
else:
|
1045
|
-
variables = [_convert_full_name_to_short_name(v) for v in variables]
|
1046
|
-
elif isinstance(variables, str):
|
1047
|
-
variables = _convert_full_name_to_short_name(variables)
|
1048
|
-
else:
|
1049
|
-
raise ValueError("Invalid variable(s) provided.")
|
1050
|
-
if variables is False:
|
1051
|
-
raise ValueError("Invalid variable(s) provided.")
|
1052
|
-
if not (0 <= lon_min <= 359.92 and 0 <= lon_max <= 359.92 and -80 <= lat_min <= 90 and -80 <= lat_max <= 90):
|
1053
|
-
raise ValueError("Longitude or latitude values are out of range.")
|
1054
|
-
|
1055
|
-
if output_dir is None:
|
1056
|
-
output_dir = str(Path.cwd())
|
1057
|
-
else:
|
1058
|
-
os.makedirs(output_dir, exist_ok=True)
|
1059
|
-
|
1060
|
-
if workers is not None:
|
1061
|
-
workers = max(min(workers, 10), 1)
|
1062
|
-
start_time = str(start_time)
|
1063
|
-
if len(start_time) == 8:
|
1064
|
-
start_time += "00"
|
1065
|
-
if end_time is None:
|
1066
|
-
end_time = start_time[:]
|
1067
|
-
else:
|
1068
|
-
end_time = str(end_time)
|
1069
|
-
if len(end_time) == 8:
|
1070
|
-
end_time += "21"
|
1071
|
-
|
1072
|
-
global count_dict
|
1073
|
-
count_dict = {"success": 0, "fail": 0, "skip": 0, "no_data": 0, "total": 0, "no_data_list": []}
|
1074
|
-
|
1075
|
-
global fsize_dict
|
1076
|
-
fsize_dict = {}
|
1077
|
-
|
1078
|
-
global fsize_dict_lock
|
1079
|
-
fsize_dict_lock = Lock()
|
1080
|
-
|
1081
|
-
global use_idm, given_idm_engine, idm_download_list, bar_desc
|
1082
|
-
if idm_path is not None:
|
1083
|
-
use_idm = True
|
1084
|
-
workers = 1
|
1085
|
-
given_idm_engine = idm_path
|
1086
|
-
idm_download_list = []
|
1087
|
-
bar_desc = "Submitting to IDM ..."
|
1088
|
-
else:
|
1089
|
-
use_idm = False
|
1090
|
-
bar_desc = "Downloading ..."
|
1091
|
-
|
1092
|
-
global match_time
|
1093
|
-
match_time = validate_time
|
1094
|
-
|
1095
|
-
global mark_len
|
1096
|
-
mark_len = 100
|
1097
|
-
|
1098
|
-
if validate_time is not None:
|
1099
|
-
workers = 1
|
1100
|
-
print("*" * mark_len)
|
1101
|
-
print("[bold red]Only checking the time of existing files.")
|
1102
|
-
bar_desc = "Checking time ..."
|
1103
|
-
|
1104
|
-
_download_hourly_func(
|
1105
|
-
variables,
|
1106
|
-
start_time,
|
1107
|
-
end_time,
|
1108
|
-
lon_min,
|
1109
|
-
lon_max,
|
1110
|
-
lat_min,
|
1111
|
-
lat_max,
|
1112
|
-
depth,
|
1113
|
-
level,
|
1114
|
-
output_dir,
|
1115
|
-
dataset,
|
1116
|
-
version,
|
1117
|
-
workers,
|
1118
|
-
overwrite,
|
1119
|
-
int(interval_hours),
|
1120
|
-
)
|
1121
|
-
|
1122
|
-
if idm_path is not None:
|
1123
|
-
print("[bold #ecdbfe]*" * mark_len)
|
1124
|
-
print(f"[bold #3dfc40]{'All files have been submitted to IDM for downloading'.center(mark_len, '*')}")
|
1125
|
-
print("[bold #ecdbfe]*" * mark_len)
|
1126
|
-
if idm_download_list:
|
1127
|
-
remain_list = idm_download_list.copy()
|
1128
|
-
for _ in pbar(range(len(idm_download_list)), cmap="diverging_1", description="Downloading: "):
|
1129
|
-
success = False
|
1130
|
-
while not success:
|
1131
|
-
for f in remain_list:
|
1132
|
-
if check_nc(f, print_messages=False):
|
1133
|
-
count_dict["success"] += 1
|
1134
|
-
success = True
|
1135
|
-
remain_list.remove(f)
|
1136
|
-
break
|
1137
|
-
|
1138
|
-
count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
|
1139
|
-
print("[bold #ecdbfe]=" * mark_len)
|
1140
|
-
print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}\nNo data: {count_dict['no_data']}")
|
1141
|
-
print("[bold #ecdbfe]=" * mark_len)
|
1142
|
-
if count_dict["fail"] > 0:
|
1143
|
-
print("[bold #be5528]Please try again to download the failed data later.")
|
1144
|
-
if count_dict["no_data"] > 0:
|
1145
|
-
print(f"[bold #f90000]{count_dict['no_data']} data entries do not exist in any dataset or version.")
|
1146
|
-
for no_data in count_dict["no_data_list"]:
|
1147
|
-
print(f"[bold #d81b60]{no_data}")
|
1148
|
-
print("[bold #ecdbfe]=" * mark_len)
|
1149
|
-
|
1150
|
-
|
1151
|
-
if __name__ == "__main__":
|
1152
|
-
download_dict = {
|
1153
|
-
"water_u": {"simple_name": "u", "download": 1},
|
1154
|
-
"water_v": {"simple_name": "v", "download": 1},
|
1155
|
-
"surf_el": {"simple_name": "ssh", "download": 1},
|
1156
|
-
"water_temp": {"simple_name": "temp", "download": 1},
|
1157
|
-
"salinity": {"simple_name": "salt", "download": 1},
|
1158
|
-
"water_u_bottom": {"simple_name": "u_b", "download": 0},
|
1159
|
-
"water_v_bottom": {"simple_name": "v_b", "download": 0},
|
1160
|
-
"water_temp_bottom": {"simple_name": "temp_b", "download": 0},
|
1161
|
-
"salinity_bottom": {"simple_name": "salt_b", "download": 0},
|
1162
|
-
}
|
1163
|
-
|
1164
|
-
var_list = [var_name for var_name in download_dict.keys() if download_dict[var_name]["download"]]
|
1165
|
-
|
1166
|
-
single_var = False
|
1167
|
-
|
1168
|
-
options = {
|
1169
|
-
"variables": var_list,
|
1170
|
-
"start_time": "2025010300",
|
1171
|
-
"end_time": "2025010309",
|
1172
|
-
"output_dir": r"I:\Data\HYCOM\3hourly_test",
|
1173
|
-
"lon_min": 105,
|
1174
|
-
"lon_max": 130,
|
1175
|
-
"lat_min": 15,
|
1176
|
-
"lat_max": 45,
|
1177
|
-
"workers": 1,
|
1178
|
-
"overwrite": False,
|
1179
|
-
"depth": None,
|
1180
|
-
"level": None,
|
1181
|
-
"validate_time": True,
|
1182
|
-
"idm_path": r"D:\Programs\Internet Download Manager\IDMan.exe",
|
1183
|
-
"interval_hours": 3,
|
1184
|
-
}
|
1185
|
-
|
1186
|
-
if single_var:
|
1187
|
-
for var_name in var_list:
|
1188
|
-
options["variables"] = var_name
|
1189
|
-
download(**options)
|
1190
|
-
else:
|
1191
|
-
download(**options)
|