oafuncs 0.0.81__py2.py3-none-any.whl → 0.0.83__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. oafuncs/data_store/OAFuncs.png +0 -0
  2. oafuncs/oa_cmap.py +1 -0
  3. oafuncs/oa_data.py +107 -28
  4. oafuncs/oa_down/__init__.py +5 -4
  5. oafuncs/oa_down/hycom_3hourly.py +152 -35
  6. oafuncs/oa_down/user_agent.py +34 -0
  7. oafuncs/oa_draw.py +165 -103
  8. oafuncs/oa_file.py +66 -53
  9. oafuncs/oa_help.py +19 -16
  10. oafuncs/oa_nc.py +82 -114
  11. oafuncs-0.0.83.dist-info/METADATA +91 -0
  12. oafuncs-0.0.83.dist-info/RECORD +26 -0
  13. oafuncs/oa_down/test.py +0 -151
  14. oafuncs/oa_s/__init__.py +0 -23
  15. oafuncs/oa_s/oa_cmap.py +0 -163
  16. oafuncs/oa_s/oa_data.py +0 -187
  17. oafuncs/oa_s/oa_draw.py +0 -451
  18. oafuncs/oa_s/oa_file.py +0 -332
  19. oafuncs/oa_s/oa_help.py +0 -39
  20. oafuncs/oa_s/oa_nc.py +0 -410
  21. oafuncs/oa_s/oa_python.py +0 -107
  22. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/__init__.py" +0 -26
  23. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_cmap.py" +0 -163
  24. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_data.py" +0 -187
  25. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_down/__init__.py" +0 -20
  26. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_down/hycom_3hourly.py" +0 -1176
  27. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_down/literature.py" +0 -332
  28. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_down/test_ua.py" +0 -151
  29. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_draw.py" +0 -451
  30. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_file.py" +0 -332
  31. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_help.py" +0 -39
  32. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_nc.py" +0 -410
  33. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_python.py" +0 -107
  34. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_sign/__init__.py" +0 -21
  35. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_sign/meteorological.py" +0 -168
  36. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_sign/ocean.py" +0 -158
  37. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_sign/scientific.py" +0 -139
  38. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_tool/__init__.py" +0 -18
  39. oafuncs - /321/205/320/231/320/277/321/206/320/254/320/274/oa_tool/email.py" +0 -114
  40. oafuncs-0.0.81.dist-info/METADATA +0 -918
  41. oafuncs-0.0.81.dist-info/RECORD +0 -51
  42. {oafuncs-0.0.81.dist-info → oafuncs-0.0.83.dist-info}/LICENSE.txt +0 -0
  43. {oafuncs-0.0.81.dist-info → oafuncs-0.0.83.dist-info}/WHEEL +0 -0
  44. {oafuncs-0.0.81.dist-info → oafuncs-0.0.83.dist-info}/top_level.txt +0 -0
@@ -1,1176 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding=utf-8
3
- """
4
- Author: Liu Kun && 16031215@qq.com
5
- Date: 2024-11-01 10:31:09
6
- LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2024-12-08 10:20:45
8
- FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
9
- Description:
10
- EditPlatform: vscode
11
- ComputerInfo: XPS 15 9510
12
- SystemInfo: Windows 11
13
- Python Version: 3.12
14
- """
15
-
16
- import datetime
17
- import os
18
- import random
19
- import time
20
- import warnings
21
- from concurrent.futures import ThreadPoolExecutor, as_completed
22
- from pathlib import Path
23
- from threading import Lock
24
-
25
- import matplotlib.pyplot as plt
26
- import numpy as np
27
- import pandas as pd
28
- import requests
29
- from bs4 import BeautifulSoup
30
- from rich import print
31
- from rich.progress import Progress
32
-
33
- warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
34
-
35
- __all__ = ["draw_time_range", "download", "how_to_use", "get_time_list", "get_ua"]
36
-
37
- # time resolution
38
- data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
39
-
40
- # hourly data
41
- # dataset: GLBv0.08, GLBu0.08, GLBy0.08
42
- data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}}
43
-
44
- # version
45
- # version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
46
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
47
- # version of GLBu0.08: 93.0
48
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
49
- # version of GLBy0.08: 93.0
50
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
51
-
52
- # info details
53
- # time range
54
- # GLBv0.08
55
- # 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
56
- # 目前只纠正了GLBv0.08 93.0的时间范围,具体到小时了
57
- # 其他数据集的时刻暂时默认为00起,21止
58
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "1994010112", "time_end": "2015123109"}
59
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "2014070112", "time_end": "2016093009"}
60
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "2016050112", "time_end": "2017020109"}
61
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "2017020112", "time_end": "2017060109"}
62
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "2017060112", "time_end": "2017100109"}
63
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "2017100112", "time_end": "2018032009"}
64
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
65
- # GLBu0.08
66
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018091912", "time_end": "2018120909"}
67
- # GLBy0.08
68
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018120412", "time_end": "20300904"}
69
-
70
- # variable
71
- variable_info = {
72
- "u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
73
- "v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
74
- "temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
75
- "salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
76
- "ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
77
- "u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
78
- "v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
79
- "temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
80
- "salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
81
- }
82
-
83
- # classification method
84
- # year_different: the data of different years is stored in different files
85
- # same_path: the data of different years is stored in the same file
86
- # var_different: the data of different variables is stored in different files
87
- # var_year_different: the data of different variables and years is stored in different files
88
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
89
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
90
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
91
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
92
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
93
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
94
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
95
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
96
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
97
-
98
- # download info
99
- # base url
100
- # GLBv0.08 53.X
101
- url_53x = {}
102
- for y_53x in range(1994, 2016):
103
- # r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
104
- url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
105
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
106
- # GLBv0.08 56.3
107
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
108
- # GLBv0.08 57.2
109
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
110
- # GLBv0.08 92.8
111
- url_928 = {
112
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
113
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
114
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
115
- }
116
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
117
- # GLBv0.08 57.7
118
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
119
- # GLBv0.08 92.9
120
- url_929 = {
121
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
122
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
123
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
124
- }
125
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
126
- # GLBv0.08 93.0
127
- url_930_v = {
128
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
129
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
130
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
131
- }
132
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
133
- # GLBu0.08 93.0
134
- url_930_u = {
135
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
136
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
137
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
138
- }
139
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
140
- # GLBy0.08 93.0
141
- uv3z_930_y = {}
142
- ts3z_930_y = {}
143
- ssh_930_y = {}
144
- for y_930_y in range(2018, 2030):
145
- uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
146
- ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
147
- ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
148
- # GLBy0.08 93.0 data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
149
- url_930_y = {
150
- "uv3z": uv3z_930_y,
151
- "ts3z": ts3z_930_y,
152
- "ssh": ssh_930_y,
153
- }
154
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
155
-
156
- var_group = {
157
- "uv3z": ["u", "v", "u_b", "v_b"],
158
- "ts3z": ["temp", "salt", "temp_b", "salt_b"],
159
- "ssh": ["ssh"],
160
- }
161
-
162
-
163
- def draw_time_range(pic_save_folder=None):
164
- if pic_save_folder is not None:
165
- os.makedirs(pic_save_folder, exist_ok=True)
166
- # Converting the data into a format suitable for plotting
167
- data = []
168
- for dataset, versions in data_info["hourly"]["dataset"].items():
169
- for version, time_range in versions["version"].items():
170
- t_s = time_range["time_range"]["time_start"]
171
- t_e = time_range["time_range"]["time_end"]
172
- if len(t_s) == 8:
173
- t_s = t_s + "00"
174
- if len(t_e) == 8:
175
- t_e = t_e + "21"
176
- t_s, t_e = t_s + "0000", t_e + "0000"
177
- data.append(
178
- {
179
- "dataset": dataset,
180
- "version": version,
181
- "start_date": pd.to_datetime(t_s),
182
- "end_date": pd.to_datetime(t_e),
183
- }
184
- )
185
-
186
- # Creating a DataFrame
187
- df = pd.DataFrame(data)
188
-
189
- # Plotting with combined labels for datasets and versions on the y-axis
190
- plt.figure(figsize=(12, 6))
191
-
192
- # Combined labels for datasets and versions
193
- combined_labels = [f"{dataset}_{version}" for dataset, version in zip(df["dataset"], df["version"])]
194
-
195
- colors = plt.cm.viridis(np.linspace(0, 1, len(combined_labels)))
196
-
197
- # Assigning a color to each combined label
198
- label_colors = {label: colors[i] for i, label in enumerate(combined_labels)}
199
-
200
- # Plotting each time range
201
- k = 1
202
- for _, row in df.iterrows():
203
- plt.plot([row["start_date"], row["end_date"]], [k, k], color=label_colors[f"{row['dataset']}_{row['version']}"], linewidth=6)
204
- # plt.text(row['end_date'], k,
205
- # f"{row['version']}", ha='right', color='black')
206
- ymdh_s = row["start_date"].strftime("%Y-%m-%d %H")
207
- ymdh_e = row["end_date"].strftime("%Y-%m-%d %H")
208
- if k == 1 or k == len(combined_labels):
209
- plt.text(row["start_date"], k + 0.125, f"{ymdh_s}", ha="left", color="black")
210
- plt.text(row["end_date"], k + 0.125, f"{ymdh_e}", ha="right", color="black")
211
- else:
212
- plt.text(row["start_date"], k + 0.125, f"{ymdh_s}", ha="right", color="black")
213
- plt.text(row["end_date"], k + 0.125, f"{ymdh_e}", ha="left", color="black")
214
- k += 1
215
-
216
- # Setting the y-axis labels
217
- plt.yticks(range(1, len(combined_labels) + 1), combined_labels)
218
- plt.xlabel("Time")
219
- plt.ylabel("Dataset - Version")
220
- plt.title("Time Range of Different Versions of Datasets")
221
- plt.xticks(rotation=45)
222
- plt.grid(True)
223
- plt.tight_layout()
224
- if pic_save_folder:
225
- plt.savefig(Path(pic_save_folder) / "HYCOM_time_range.png")
226
- print(f"[bold green]HYCOM_time_range.png has been saved in {pic_save_folder}")
227
- else:
228
- plt.savefig("HYCOM_time_range.png")
229
- print("[bold green]HYCOM_time_range.png has been saved in the current folder")
230
- print(f"Curren folder: {os.getcwd()}")
231
- # plt.show()
232
- plt.close()
233
-
234
-
235
- def get_time_list(time_s, time_e, delta, interval_type="hour"):
236
- """
237
- Description: get a list of time strings from time_s to time_e with a specified interval
238
- Args:
239
- time_s: start time string, e.g. '2023080203' for hours or '20230802' for days
240
- time_e: end time string, e.g. '2023080303' for hours or '20230803' for days
241
- delta: interval of hours or days
242
- interval_type: 'hour' for hour interval, 'day' for day interval
243
- Returns:
244
- dt_list: a list of time strings
245
- """
246
- time_s, time_e = str(time_s), str(time_e)
247
- if interval_type == "hour":
248
- time_format = "%Y%m%d%H"
249
- delta_type = "hours"
250
- elif interval_type == "day":
251
- time_format = "%Y%m%d"
252
- delta_type = "days"
253
- # Ensure time strings are in the correct format for days
254
- time_s = time_s[:8]
255
- time_e = time_e[:8]
256
- else:
257
- raise ValueError("interval_type must be 'hour' or 'day'")
258
-
259
- dt = datetime.datetime.strptime(time_s, time_format)
260
- dt_list = []
261
- while dt.strftime(time_format) <= time_e:
262
- dt_list.append(dt.strftime(time_format))
263
- dt += datetime.timedelta(**{delta_type: delta})
264
- return dt_list
265
-
266
-
267
- def transform_time(time_str):
268
- # old_time = '2023080203'
269
- # time_new = '2023-08-02T03%3A00%3A00Z'
270
- time_new = f"{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z"
271
- return time_new
272
-
273
-
274
- def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_str_end=None, mode="single_depth", depth=None, level_num=None):
275
- query_dict = {
276
- "var": variable_info[var]["var_name"],
277
- "north": lat_max,
278
- "west": lon_min,
279
- "east": lon_max,
280
- "south": lat_min,
281
- "horizStride": 1,
282
- "time": None,
283
- "time_start": None,
284
- "time_end": None,
285
- "timeStride": None,
286
- "vertCoord": None,
287
- "vertStride": None,
288
- "addLatLon": "true",
289
- "accept": "netcdf4",
290
- }
291
-
292
- if time_str_end is not None:
293
- query_dict["time_start"] = transform_time(time_str_ymdh)
294
- query_dict["time_end"] = transform_time(time_str_end)
295
- query_dict["timeStride"] = 1
296
- else:
297
- query_dict["time"] = transform_time(time_str_ymdh)
298
-
299
- def get_nearest_level_index(depth):
300
- level_depth = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 125.0, 150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0, 1250.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000]
301
- return min(range(len(level_depth)), key=lambda i: abs(level_depth[i] - depth))
302
-
303
- if var not in ["ssh", "u_b", "v_b", "temp_b", "salt_b"] and var in ["u", "v", "temp", "salt"]:
304
- if mode == "depth":
305
- if depth < 0 or depth > 5000:
306
- print("Please ensure the depth is in the range of 0-5000 m")
307
- query_dict["vertCoord"] = get_nearest_level_index(depth) + 1
308
- elif mode == "level":
309
- if level_num < 1 or level_num > 40:
310
- print("Please ensure the level_num is in the range of 1-40")
311
- query_dict["vertCoord"] = max(1, min(level_num, 40))
312
- elif mode == "full":
313
- query_dict["vertStride"] = 1
314
- else:
315
- raise ValueError("Invalid mode. Choose from 'depth', 'level', or 'full'")
316
-
317
- query_dict = {k: v for k, v in query_dict.items() if v is not None}
318
-
319
- return query_dict
320
-
321
-
322
- def check_time_in_dataset_and_version(time_input, time_end=None):
323
- # 判断是处理单个时间点还是时间范围
324
- is_single_time = time_end is None
325
-
326
- # 如果是单个时间点,初始化时间范围
327
- if is_single_time:
328
- time_start = int(time_input)
329
- time_end = time_start
330
- time_input_str = str(time_input)
331
- else:
332
- time_start = int(time_input)
333
- time_end = int(time_end)
334
- time_input_str = f"{time_input}-{time_end}"
335
-
336
- # 根据时间长度补全时间格式
337
- if len(str(time_start)) == 8:
338
- time_start = str(time_start) + "00"
339
- if len(str(time_end)) == 8:
340
- time_end = str(time_end) + "21"
341
- time_start, time_end = int(time_start), int(time_end)
342
-
343
- d_list = []
344
- v_list = []
345
- trange_list = []
346
- have_data = False
347
-
348
- # 遍历数据集和版本
349
- for dataset_name in data_info["hourly"]["dataset"].keys():
350
- for version_name in data_info["hourly"]["dataset"][dataset_name]["version"].keys():
351
- time_s, time_e = list(data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"].values())
352
- time_s, time_e = str(time_s), str(time_e)
353
- if len(time_s) == 8:
354
- time_s = time_s + "00"
355
- if len(time_e) == 8:
356
- time_e = time_e + "21"
357
- # 检查时间是否在数据集的时间范围内
358
- if is_single_time:
359
- if time_start >= int(time_s) and time_start <= int(time_e):
360
- d_list.append(dataset_name)
361
- v_list.append(version_name)
362
- trange_list.append(f"{time_s}-{time_e}")
363
- have_data = True
364
- else:
365
- if time_start >= int(time_s) and time_end <= int(time_e):
366
- d_list.append(dataset_name)
367
- v_list.append(version_name)
368
- trange_list.append(f"{time_s}-{time_e}")
369
- have_data = True
370
-
371
- # 输出结果
372
- print(f"[bold red]{time_input_str} is in the following dataset and version:")
373
- if have_data:
374
- for d, v, trange in zip(d_list, v_list, trange_list):
375
- print(f"[bold blue]{d} {v} {trange}")
376
- if is_single_time:
377
- return True
378
- else:
379
- base_url_s = get_base_url(d_list[0], v_list[0], "u", str(time_start))
380
- base_url_e = get_base_url(d_list[0], v_list[0], "u", str(time_end))
381
- if base_url_s == base_url_e:
382
- return True
383
- else:
384
- print(f"[bold red]{time_start} to {time_end} is in different datasets or versions, so you can't download them together")
385
- return False
386
- else:
387
- print(f"[bold red]{time_input_str} is not in any dataset and version")
388
- return False
389
-
390
-
391
- def ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time_input, time_end=None):
392
- # 根据时间长度补全时间格式
393
- if len(str(time_input)) == 8:
394
- time_input = str(time_input) + "00"
395
- time_start = int(time_input)
396
- if time_end is not None:
397
- if len(str(time_end)) == 8:
398
- time_end = str(time_end) + "21"
399
- time_end = int(time_end)
400
- else:
401
- time_end = time_start
402
-
403
- # 检查指定的数据集和版本是否存在
404
- if dataset_name not in data_info["hourly"]["dataset"]:
405
- print(f"[bold red]Dataset {dataset_name} not found.")
406
- return False
407
- if version_name not in data_info["hourly"]["dataset"][dataset_name]["version"]:
408
- print(f"[bold red]Version {version_name} not found in dataset {dataset_name}.")
409
- return False
410
-
411
- # 获取指定数据集和版本的时间范围
412
- time_range = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"]
413
- time_s, time_e = list(time_range.values())
414
- time_s, time_e = str(time_s), str(time_e)
415
- if len(time_s) == 8:
416
- time_s = time_s + "00"
417
- if len(time_e) == 8:
418
- time_e = time_e + "21"
419
- time_s, time_e = int(time_s), int(time_e)
420
-
421
- # 检查时间是否在指定数据集和版本的时间范围内
422
- if time_start >= time_s and time_end <= time_e:
423
- print(f"[bold blue]Time {time_input} to {time_end} is within dataset {dataset_name} and version {version_name}.")
424
- return True
425
- else:
426
- print(f"[bold red]Time {time_input} to {time_end} is not within dataset {dataset_name} and version {version_name}.")
427
- return False
428
-
429
-
430
- def direct_choose_dataset_and_version(time_input, time_end=None):
431
- # 假设 data_info 是一个字典,包含了数据集和版本的信息
432
- # 示例结构:data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
433
-
434
- if len(str(time_input)) == 8:
435
- time_input = str(time_input) + "00"
436
-
437
- # 如果 time_end 是 None,则将 time_input 的值赋给它
438
- if time_end is None:
439
- time_end = time_input
440
-
441
- # 处理开始和结束时间,确保它们是完整的 ymdh 格式
442
- time_start, time_end = int(str(time_input)[:10]), int(str(time_end)[:10])
443
-
444
- dataset_name_out, version_name_out = None, None
445
-
446
- for dataset_name in data_info["hourly"]["dataset"].keys():
447
- for version_name in data_info["hourly"]["dataset"][dataset_name]["version"].keys():
448
- [time_s, time_e] = list(data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"].values())
449
- time_s, time_e = str(time_s), str(time_e)
450
- if len(time_s) == 8:
451
- time_s = time_s + "00"
452
- if len(time_e) == 8:
453
- time_e = time_e + "21"
454
- time_s, time_e = int(time_s), int(time_e)
455
-
456
- # 检查时间是否在数据集版本的时间范围内
457
- if time_start >= time_s and time_end <= time_e:
458
- # print(f'[bold purple]dataset: {dataset_name}, version: {version_name} is chosen')
459
- # return dataset_name, version_name
460
- dataset_name_out, version_name_out = dataset_name, version_name
461
-
462
- if dataset_name_out is not None and version_name_out is not None:
463
- print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
464
-
465
- # 如果没有找到匹配的数据集和版本,会返回 None
466
- return dataset_name_out, version_name_out
467
-
468
-
469
- def get_base_url(dataset_name, version_name, var, ymdh_str):
470
- year_str = int(ymdh_str[:4])
471
- url_dict = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["url"]
472
- classification_method = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["classification"]
473
- if classification_method == "year_different":
474
- base_url = url_dict[str(year_str)]
475
- elif classification_method == "same_path":
476
- base_url = url_dict
477
- elif classification_method == "var_different":
478
- base_url = None
479
- for key, value in var_group.items():
480
- if var in value:
481
- base_url = url_dict[key]
482
- break
483
- if base_url is None:
484
- print("Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]")
485
- elif classification_method == "var_year_different":
486
- if dataset_name == "GLBy0.08" and version_name == "93.0":
487
- mdh_str = ymdh_str[4:]
488
- # GLBy0.08 93.0
489
- # data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
490
- if mdh_str <= "010109":
491
- year_str = int(ymdh_str[:4]) - 1
492
- base_url = None
493
- for key, value in var_group.items():
494
- if var in value:
495
- base_url = url_dict[key][str(year_str)]
496
- break
497
- if base_url is None:
498
- print("Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]")
499
- return base_url
500
-
501
-
502
- def get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict):
503
- base_url = get_base_url(dataset_name, version_name, var, ymdh_str)
504
- if isinstance(query_dict["var"], str):
505
- query_dict["var"] = [query_dict["var"]]
506
- target_url = base_url + "&".join(f"var={var}" for var in query_dict["var"]) + "&" + "&".join(f"{key}={value}" for key, value in query_dict.items() if key != "var")
507
- return target_url
508
-
509
-
510
- def clear_existing_file(file_full_path):
511
- if os.path.exists(file_full_path):
512
- os.remove(file_full_path)
513
- print(f"{file_full_path} has been removed")
514
-
515
-
516
- def _get_file_size(file_path, unit="KB"):
517
- # 检查文件是否存在
518
- if not os.path.exists(file_path):
519
- return "文件不存在"
520
-
521
- # 获取文件大小(字节)
522
- file_size = os.path.getsize(file_path)
523
-
524
- # 单位转换字典
525
- unit_dict = {"PB": 1024**5, "TB": 1024**4, "GB": 1024**3, "MB": 1024**2, "KB": 1024}
526
-
527
- # 检查传入的单位是否合法
528
- if unit not in unit_dict:
529
- return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
530
-
531
- # 转换文件大小到指定单位
532
- converted_size = file_size / unit_dict[unit]
533
-
534
- return converted_size
535
-
536
-
537
- def check_existing_file(file_full_path):
538
- if os.path.exists(file_full_path):
539
- print(f"[bold #FFA54F]{file_full_path} exists")
540
- fsize = _get_file_size(file_full_path)
541
- if fsize < 5:
542
- print(f"[bold #FFA54F]{file_full_path} may be incomplete\nFile size: {fsize:.2f} KB")
543
- # clear_existing_file(file_full_path)
544
- return False
545
- else:
546
- return True
547
- else:
548
- # print(f'{file_full_path} does not exist')
549
- return False
550
-
551
-
552
- def get_ua():
553
- current_dir = os.path.dirname(os.path.abspath(__file__))
554
- ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
555
-
556
- with open(ua_file_txt, "r") as f:
557
- ua_list = f.readlines()
558
- # 去掉换行符和空行
559
- ua_list = [line.strip() for line in ua_list if line.strip()]
560
-
561
- # if current_platform == 'Linux':
562
- # ua_list = [line for line in ua_list if 'Linux' in line]
563
-
564
- return random.choice(ua_list)
565
-
566
-
567
- def get_proxy_file():
568
- # 获取当前脚本的绝对路径
569
- script_dir = os.path.dirname(os.path.abspath(__file__))
570
- # 构建ip.txt的绝对路径
571
- ip_file_txt = os.path.join(script_dir, "ip.txt")
572
- with open(ip_file_txt, "r") as f:
573
- ips = f.readlines()
574
- ip_list = []
575
- for ip in ips:
576
- ip_list.append(ip.strip())
577
- choose_ip = random.choice(ip_list)
578
- proxies = {"http": "http://" + choose_ip, "https": "https://" + choose_ip}
579
- # print(f'Using proxy: {proxies}')
580
- return proxies
581
-
582
-
583
- def scrape_and_categorize_proxies(choose_protocol="http"):
584
- url = "https://topproxylinks.com/"
585
- # 发送HTTP请求获取网页内容
586
- response = requests.get(url)
587
- # 使用BeautifulSoup解析网页
588
- soup = BeautifulSoup(response.text, "html.parser")
589
-
590
- # 初始化字典来存储不同协议的代理
591
- proxies_dict = {"http": [], "socks4": [], "socks5": []}
592
-
593
- # 查找表格中的所有行
594
- tbody = soup.find("tbody")
595
-
596
- if tbody:
597
- for row in tbody.find_all("tr"):
598
- # 提取协议、代理和国家的单元格
599
- cells = row.find_all("td")
600
- protocol = cells[0].text.strip().lower()
601
- proxy = cells[1].text.strip()
602
-
603
- # 根据协议分类存储代理
604
- if protocol in proxies_dict:
605
- proxies_dict[protocol].append(proxy)
606
-
607
- if choose_protocol in proxies_dict:
608
- proxies_list = proxies_dict[choose_protocol]
609
- else:
610
- proxies_list = proxies_dict["http"]
611
-
612
- return proxies_list
613
-
614
-
615
- def get_proxy():
616
- ip_list = scrape_and_categorize_proxies(choose_protocol="http")
617
- choose_ip = random.choice(ip_list)
618
- proxies = {"http": f"http://{choose_ip}", "https": f"http://{choose_ip}"}
619
- print(f"Using proxy: {proxies}")
620
- return proxies
621
-
622
-
623
- def download_file(target_url, store_path, file_name, check=False):
624
- # Check if the file exists
625
- fname = Path(store_path) / file_name
626
- if check:
627
- if check_existing_file(fname):
628
- count_dict["skip"] += 1
629
- return
630
- clear_existing_file(fname)
631
-
632
- # -----------------------------------------------
633
- print(f"[bold #f0f6d0]Requesting {file_name}...")
634
- # 创建会话
635
- s = requests.Session()
636
- download_success = False
637
- request_times = 0
638
-
639
- def calculate_wait_time(time_str, target_url):
640
- import re
641
-
642
- # 定义正则表达式,匹配YYYYMMDDHH格式的时间
643
- time_pattern = r"\d{10}"
644
-
645
- # 定义两个字符串
646
- # str1 = 'HYCOM_water_u_2018010100_2018010112.nc'
647
- # str2 = 'HYCOM_water_u_2018010100.nc'
648
-
649
- # 使用正则表达式查找时间
650
- times_in_str = re.findall(time_pattern, time_str)
651
-
652
- # 计算每个字符串中的时间数量
653
- num_times_str = len(times_in_str)
654
-
655
- if num_times_str > 1:
656
- delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
657
- delta_t = delta_t.total_seconds() / 3600
658
- delta_t = delta_t / 3 + 1
659
- else:
660
- delta_t = 1
661
- # 单个要素最多等待5分钟,不宜太短,太短可能请求失败;也不宜太长,太长可能会浪费时间
662
- num_var = int(target_url.count("var="))
663
- if num_var <= 0:
664
- num_var = 1
665
- return int(delta_t * 5 * 60 * num_var)
666
-
667
- max_timeout = calculate_wait_time(file_name, target_url)
668
- print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
669
-
670
- # print(f'Download_start_time: {datetime.datetime.now()}')
671
- download_time_s = datetime.datetime.now()
672
- order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
673
- while not download_success:
674
- if request_times >= 10:
675
- # print(f'下载失败,已重试 {request_times} 次\n可先跳过,后续再试')
676
- print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
677
- count_dict["fail"] += 1
678
- break
679
- if request_times > 0:
680
- # print(f'\r正在重试第 {request_times} 次', end="")
681
- print(f"[bold #ffe5c0]Retrying the {order_list[request_times-1]} time...")
682
- # 尝试下载文件
683
- try:
684
- headers = {"User-Agent": get_ua()}
685
- """ response = s.get(target_url, headers=headers, timeout=random.randint(5, max_timeout))
686
- response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
687
-
688
- # 保存文件
689
- with open(filename, 'wb') as f:
690
- f.write(response.content) """
691
-
692
- if find_proxy:
693
- proxies = get_proxy()
694
- response = s.get(target_url, headers=headers, proxies=proxies, stream=True, timeout=random.randint(5, max_timeout))
695
- else:
696
- response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout)) # 启用流式传输
697
- response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
698
-
699
- # 保存文件
700
- with open(fname, "wb") as f:
701
- print(f"[bold #96cbd7]Downloading {file_name}...")
702
- for chunk in response.iter_content(chunk_size=1024):
703
- if chunk:
704
- f.write(chunk)
705
-
706
- f.close()
707
-
708
- # print(f'\r文件 {fname} 下载成功', end="")
709
- if os.path.exists(fname):
710
- download_success = True
711
- download_time_e = datetime.datetime.now()
712
- download_delta = download_time_e - download_time_s
713
- print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
714
- count_dict["success"] += 1
715
- # print(f'Download_end_time: {datetime.datetime.now()}')
716
-
717
- except requests.exceptions.HTTPError as errh:
718
- print(f"Http Error: {errh}")
719
- except requests.exceptions.ConnectionError as errc:
720
- print(f"Error Connecting: {errc}")
721
- except requests.exceptions.Timeout as errt:
722
- print(f"Timeout Error: {errt}")
723
- except requests.exceptions.RequestException as err:
724
- print(f"OOps: Something Else: {err}")
725
-
726
- time.sleep(3)
727
- request_times += 1
728
-
729
-
730
- def check_hour_is_valid(ymdh_str):
731
- # hour should be 00, 03, 06, 09, 12, 15, 18, 21
732
- hh = int(str(ymdh_str[-2:]))
733
- if hh in [0, 3, 6, 9, 12, 15, 18, 21]:
734
- return True
735
- else:
736
- return False
737
-
738
-
739
- def check_dataset_version(dataset_name, version_name, download_time, download_time_end=None):
740
- if dataset_name is not None and version_name is not None:
741
- just_ensure = ensure_time_in_specific_dataset_and_version(dataset_name, version_name, download_time, download_time_end)
742
- if just_ensure:
743
- return dataset_name, version_name
744
- else:
745
- return None, None
746
-
747
- # 确保下载时间是一个字符串
748
- download_time_str = str(download_time)
749
-
750
- if len(download_time_str) == 8:
751
- download_time_str = download_time_str + "00"
752
-
753
- # 检查小时是否有效(如果需要的话)
754
- if download_time_end is None and not check_hour_is_valid(download_time_str):
755
- print("Please ensure the hour is 00, 03, 06, 09, 12, 15, 18, 21")
756
- raise ValueError("The hour is invalid")
757
-
758
- # 根据是否检查整个天来设置时间范围
759
- if download_time_end is not None:
760
- if len(str(download_time_end)) == 8:
761
- download_time_end = str(download_time_end) + "21"
762
- have_data = check_time_in_dataset_and_version(download_time_str, download_time_end)
763
- if have_data:
764
- return direct_choose_dataset_and_version(download_time_str, download_time_end)
765
- else:
766
- have_data = check_time_in_dataset_and_version(download_time_str)
767
- if have_data:
768
- return direct_choose_dataset_and_version(download_time_str)
769
-
770
- return None, None
771
-
772
-
773
- def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end=None):
774
- # year_str = str(download_time)[:4]
775
- ymdh_str = str(download_time)
776
- if depth is not None and level_num is not None:
777
- print("Please ensure the depth or level_num is None")
778
- print("Progress will use the depth")
779
- which_mode = "depth"
780
- elif depth is not None and level_num is None:
781
- print(f"Data of single depth (~{depth} m) will be downloaded...")
782
- which_mode = "depth"
783
- elif level_num is not None and depth is None:
784
- print(f"Data of single level ({level_num}) will be downloaded...")
785
- which_mode = "level"
786
- else:
787
- # print("Full depth or full level data will be downloaded...")
788
- which_mode = "full"
789
- query_dict = get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, download_time_end, which_mode, depth, level_num)
790
- submit_url = get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict)
791
- return submit_url
792
-
793
-
794
- def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time="2024083100", download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
795
- print("[bold #ecdbfe]-" * 160)
796
- download_time = str(download_time)
797
- if download_time_end is not None:
798
- download_time_end = str(download_time_end)
799
- dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time, download_time_end)
800
- else:
801
- dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time)
802
- if dataset_name is None and version_name is None:
803
- count_dict["no_data"] += 1
804
- if download_time_end is not None:
805
- count_dict["no_data_list"].append(f"{download_time}-{download_time_end}")
806
- else:
807
- count_dict["no_data_list"].append(download_time)
808
- return
809
-
810
- if isinstance(var, str):
811
- var = [var]
812
-
813
- if isinstance(var, list):
814
- if len(var) == 1:
815
- var = var[0]
816
- submit_url = get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
817
- file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
818
- if download_time_end is not None:
819
- file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}_{download_time_end}.nc"
820
- download_file(submit_url, store_path, file_name, check)
821
- else:
822
- varlist = [_ for _ in var]
823
- for key, value in var_group.items():
824
- current_group = []
825
- for v in varlist:
826
- if v in value:
827
- current_group.append(v)
828
- if len(current_group) == 0:
829
- continue
830
-
831
- var = current_group[0]
832
- submit_url = get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
833
- file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
834
- old_str = f'var={variable_info[var]["var_name"]}'
835
- new_str = f'var={variable_info[var]["var_name"]}'
836
- if len(current_group) > 1:
837
- for v in current_group[1:]:
838
- new_str = f'{new_str}&var={variable_info[v]["var_name"]}'
839
- submit_url = submit_url.replace(old_str, new_str)
840
- # file_name = f'HYCOM_{'-'.join([variable_info[v]["var_name"] for v in current_group])}_{download_time}.nc'
841
- file_name = f"HYCOM_{key}_{download_time}.nc"
842
- if download_time_end is not None:
843
- file_name = f"HYCOM_{key}_{download_time}_{download_time_end}.nc"
844
- download_file(submit_url, store_path, file_name, check)
845
-
846
-
847
- def convert_full_name_to_short_name(full_name):
848
- for var, info in variable_info.items():
849
- if full_name == info["var_name"] or full_name == info["standard_name"] or full_name == var:
850
- return var
851
- print("[bold #FFE4E1]Please ensure the var is in:\n[bold blue]u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b")
852
- print("or")
853
- print("[bold blue]water_u, water_v, water_temp, salinity, surf_el, water_u_bottom, water_v_bottom, water_temp_bottom, salinity_bottom")
854
- return False
855
-
856
-
857
- def download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check):
858
- """
859
- # 并行下载任务
860
- # 这个函数是为了并行下载而设置的,是必须的,直接调用direct_download并行下载会出问题
861
-
862
- 任务封装:将每个任务需要的数据和操作封装在一个函数中,这样每个任务都是独立的,不会相互干扰。
863
- 本情况下,download_task函数的作用是将每个下载任务封装起来,包括它所需的所有参数。
864
- 这样,每个任务都是独立的,有自己的参数和数据,不会与其他任务共享或修改任何数据。
865
- 因此,即使多个任务同时执行,也不会出现数据交互错乱的问题。
866
- """
867
-
868
- prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
869
-
870
-
871
- def done_callback(future, progress, task, total, counter_lock):
872
- """
873
- # 并行下载任务的回调函数
874
- # 这个函数是为了并行下载而设置的,是必须的,直接调用direct_download并行下载会出问题
875
-
876
- 回调函数:当一个任务完成后,会调用这个函数,这样可以及时更新进度条,显示任务的完成情况。
877
- 本情况下,done_callback函数的作用是当一个任务完成后,更新进度条的进度,显示任务的完成情况。
878
- 这样,即使多个任务同时执行,也可以及时看到每个任务的完成情况,不会等到所有任务都完成才显示。
879
- """
880
-
881
- global parallel_counter
882
- with counter_lock:
883
- parallel_counter += 1
884
- progress.update(task, advance=1, description=f"[cyan]Downloading... {parallel_counter}/{total}")
885
-
886
-
887
- def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
888
- """
889
- Description:
890
- Download the data of single time or a series of time
891
-
892
- Parameters:
893
- var: str, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
894
- time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
895
- time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
896
- lon_min: float, the minimum longitude, default is 0
897
- lon_max: float, the maximum longitude, default is 359.92
898
- lat_min: float, the minimum latitude, default is -80
899
- lat_max: float, the maximum latitude, default is 90
900
- depth: float, the depth, default is None
901
- level: int, the level number, default is None
902
- store_path: str, the path to store the data, default is None
903
- dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08'
904
- version_name: str, the version name, default is None, example: '53.X', '56.3'
905
- num_workers: int, the number of workers, default is None
906
-
907
- Returns:
908
- None
909
- """
910
- ymdh_time_s, ymdh_time_e = str(time_s), str(time_e)
911
- if num_workers is not None and num_workers > 1: # 如果使用多线程下载,用于进度条显示
912
- global parallel_counter
913
- parallel_counter = 0
914
- counter_lock = Lock() # 创建一个锁,线程安全的计数器
915
- if ymdh_time_s == ymdh_time_e:
916
- prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name)
917
- elif int(ymdh_time_s) < int(ymdh_time_e):
918
- print("Downloading a series of files...")
919
- time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3, "hour")
920
- with Progress() as progress:
921
- task = progress.add_task("[cyan]Downloading...", total=len(time_list))
922
- if ftimes == 1:
923
- if num_workers is None or num_workers <= 1:
924
- # 串行方式
925
- for i, time_str in enumerate(time_list):
926
- prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, check)
927
- progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")
928
- else:
929
- # 并行方式
930
- with ThreadPoolExecutor(max_workers=num_workers) as executor:
931
- futures = [executor.submit(download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
932
- """ for i, future in enumerate(futures):
933
- future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")) """
934
- for feature in as_completed(futures):
935
- done_callback(feature, progress, task, len(time_list), counter_lock)
936
- else:
937
- new_time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3 * ftimes, "hour")
938
- total_num = len(new_time_list)
939
- if num_workers is None or num_workers <= 1:
940
- # 串行方式
941
- for i, time_str in enumerate(new_time_list):
942
- time_str_end_index = int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))
943
- time_str_end = time_list[time_str_end_index]
944
- prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
945
- progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")
946
- else:
947
- # 并行方式
948
- with ThreadPoolExecutor(max_workers=num_workers) as executor:
949
- futures = [executor.submit(download_task, var, new_time_list[i], time_list[int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
950
- """ for i, future in enumerate(futures):
951
- future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")) """
952
- for feature in as_completed(futures):
953
- done_callback(feature, progress, task, len(time_list), counter_lock)
954
- else:
955
- print("Please ensure the time_s is no more than time_e")
956
-
957
-
958
- def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
959
- """
960
- Description:
961
- Download the data of single time or a series of time
962
-
963
- Parameters:
964
- var: str or list, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
965
- time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
966
- time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21; default is None, if not set, the data of single time will be downloaded; or same as time_s, the data of single time will be downloaded
967
- lon_min: float, the minimum longitude, default is 0
968
- lon_max: float, the maximum longitude, default is 359.92
969
- lat_min: float, the minimum latitude, default is -80
970
- lat_max: float, the maximum latitude, default is 90
971
- depth: float, the depth, default is None, if you wanna get the data of single depth, you can set the depth, suggest to set the depth in [0, 5000]
972
- level: int, the level number, default is None, if you wanna get the data of single level, you can set the level, suggest to set the level in [1, 40]
973
- store_path: str, the path to store the data, default is None, if not set, the data will be stored in the current working directory
974
- dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08', if not set, the dataset will be chosen according to the download_time
975
- version_name: str, the version name, default is None, example: '53.X', '56.3', if not set, the version will be chosen according to the download_time
976
- num_workers: int, the number of workers, default is None, if not set, the number of workers will be 1; suggest not to set the number of workers too large
977
- check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
978
- ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
979
-
980
- Returns:
981
- None
982
- """
983
- # 打印信息并处理数据集和版本名称
984
- if dataset_name is None and version_name is None:
985
- print("The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.")
986
- print("If you wanna choose the dataset and version by yourself, please set the dataset_name and version_name together.")
987
- elif dataset_name is None and version_name is not None:
988
- print("Please ensure the dataset_name is not None")
989
- print("If you do not add the dataset_name, both the dataset and version will be chosen according to the download_time.")
990
- elif dataset_name is not None and version_name is None:
991
- print("Please ensure the version_name is not None")
992
- print("If you do not add the version_name, both the dataset and version will be chosen according to the download_time.")
993
- else:
994
- print("The dataset_name and version_name are both set by yourself.")
995
- print("Please ensure the dataset_name and version_name are correct.")
996
-
997
- if isinstance(var, list):
998
- if len(var) == 1:
999
- var = convert_full_name_to_short_name(var[0])
1000
- else:
1001
- var = [convert_full_name_to_short_name(v) for v in var]
1002
- elif isinstance(var, str):
1003
- var = convert_full_name_to_short_name(var)
1004
- else:
1005
- raise ValueError("The var is invalid")
1006
- if var is False:
1007
- raise ValueError("The var is invalid")
1008
- if lon_min < 0 or lon_min > 359.92 or lon_max < 0 or lon_max > 359.92 or lat_min < -80 or lat_min > 90 or lat_max < -80 or lat_max > 90:
1009
- print("Please ensure the lon_min, lon_max, lat_min, lat_max are in the range")
1010
- print("The range of lon_min, lon_max is 0~359.92")
1011
- print("The range of lat_min, lat_max is -80~90")
1012
- raise ValueError("The lon or lat is invalid")
1013
-
1014
- if ftimes != 1:
1015
- print("Please ensure the ftimes is in [1, 8]")
1016
- ftimes = max(min(ftimes, 8), 1)
1017
-
1018
- if store_path is None:
1019
- store_path = str(Path.cwd())
1020
- else:
1021
- os.makedirs(str(store_path), exist_ok=True)
1022
-
1023
- if num_workers is not None:
1024
- num_workers = max(min(num_workers, 10), 1)
1025
-
1026
- time_s = str(time_s)
1027
- if len(time_s) == 8:
1028
- time_s += "00"
1029
- if time_e is None:
1030
- time_e = time_s[:]
1031
- else:
1032
- time_e = str(time_e)
1033
- if len(time_e) == 8:
1034
- time_e += "21"
1035
-
1036
- global count_dict
1037
- count_dict = {"success": 0, "fail": 0, "skip": 0, "no_data": 0, "total": 0, "no_data_list": []}
1038
-
1039
- """ global current_platform
1040
- current_platform = platform.system() """
1041
-
1042
- global find_proxy
1043
- find_proxy = False
1044
-
1045
- download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
1046
-
1047
- count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
1048
-
1049
- print("[bold #ecdbfe]-" * 160)
1050
- print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}")
1051
- if count_dict["fail"] > 0:
1052
- print("[bold #be5528]Please try again to download the failed data later")
1053
- if count_dict["no_data"] > 0:
1054
- if count_dict["no_data"] == 1:
1055
- print(f"[bold #f90000]There is {count_dict['no_data']} data that does not exist in any dataset and version")
1056
- else:
1057
- print(f"[bold #f90000]These are {count_dict['no_data']} data that do not exist in any dataset and version")
1058
- for no_data in count_dict["no_data_list"]:
1059
- print(f"[bold #d81b60]{no_data}")
1060
- print("[bold #ecdbfe]-" * 160)
1061
-
1062
-
1063
- def how_to_use():
1064
- print("""
1065
- # 1. Choose the dataset and version according to the time:
1066
- # 1.1 Use function to query
1067
- You can use the function check_time_in_dataset_and_version(time_input=20241101) to find the dataset and version according to the time.
1068
- Then, you can see the dataset and version in the output.
1069
- # 1.2 Draw a picture to see
1070
- You can draw a picture to see the time range of each dataset and version.
1071
- Using the function draw_time_range(pic_save_folder=None) to draw the picture.
1072
-
1073
- # 2. Get the base url according to the dataset, version, var and year:
1074
- # 2.1 Dataset and version were found in step 1
1075
- # 2.2 Var: u, v, temp, salt, ssh, u_b, v_b, temp_b, salt_b
1076
- # 2.3 Year: 1994-2024(current year)
1077
-
1078
- # 3. Get the query_dict according to the var, lon_min, lon_max, lat_min, lat_max, depth, level_num, time_str_ymdh:
1079
- # 3.1 Var: u, v, temp, salt, ssh, u_b, v_b, temp_b, salt_b
1080
- # 3.2 Lon_min, lon_max, lat_min, lat_max: float
1081
- # 3.3 Depth: 0-5000m, if you wanna get single depth data, you can set the depth
1082
- # 3.4 Level_num: 1-40, if you wanna get single level data, you can set the level_num
1083
- # 3.5 Time_str_ymdh: '2024110112', the hour normally is 00, 03, 06, 09, 12, 15, 18, 21, besides 1 hourly data
1084
- # 3.6 Use the function to get the query_dict
1085
- # 3.7 Note: If you wanna get the full depth or full level data, you can needn't set the depth or level_num
1086
-
1087
- # 4. Get the submit url according to the dataset, version, var, year, query_dict:
1088
- # 4.1 Use the function to get the submit url
1089
- # 4.2 You can use the submit url to download the data
1090
-
1091
- # 5. Download the data according to the submit url:
1092
- # 5.1 Use the function to download the data
1093
- # 5.2 You can download the data of single time or a series of time
1094
- # 5.3 Note: If you wanna download a series of data, you can set the ymdh_time_s and ymdh_time_e different
1095
- # 5.4 Note: The time resolution is 3 hours
1096
-
1097
- # 6. Direct download the data:
1098
- # 6.1 Use the function to direct download the data
1099
- # 6.2 You can set the dataset_name and version_name by yourself
1100
- # 6.3 Note: If you do not set the dataset_name and version_name, the dataset and version will be chosen according to the download_time
1101
- # 6.4 Note: If you set the dataset_name and version_name, please ensure the dataset_name and version_name are correct
1102
- # 6.5 Note: If you just set one of the dataset_name and version_name, both the dataset and version will be chosen according to the download_time
1103
-
1104
- # 7. Simple use:
1105
- # 7.1 You can use the function: download(var, ymdh_time_s, ymdh_time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None)
1106
- # 7.2 You can download the data of single time or a series of time
1107
- # 7.3 The parameters you must set are var, ymdh_time_s, ymdh_time_e
1108
- # 7.4 Example: download('u', '2024110112', '2024110212', lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None)
1109
- """)
1110
-
1111
-
1112
- if __name__ == "__main__":
1113
- # help(hycom3h.download)
1114
- time_s, time_e = "2023010100", "2023123121"
1115
- merge_name = f"{time_s}_{time_e}" # 合并后的文件名
1116
- root_path = r"G:\Data\HYCOM\3hourly"
1117
- location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
1118
- download_dict = {
1119
- "water_u": {"simple_name": "u", "download": 1},
1120
- "water_v": {"simple_name": "v", "download": 1},
1121
- "surf_el": {"simple_name": "ssh", "download": 1},
1122
- "water_temp": {"simple_name": "temp", "download": 1},
1123
- "salinity": {"simple_name": "salt", "download": 1},
1124
- "water_u_bottom": {"simple_name": "u_b", "download": 0},
1125
- "water_v_bottom": {"simple_name": "v_b", "download": 0},
1126
- "water_temp_bottom": {"simple_name": "temp_b", "download": 0},
1127
- "salinity_bottom": {"simple_name": "salt_b", "download": 0},
1128
- }
1129
-
1130
- var_list = [var_name for var_name in download_dict.keys() if download_dict[var_name]["download"]]
1131
-
1132
- # set depth or level, only one can be True
1133
- # if you wanna download all depth or level, set both False
1134
- depth = None # or 0-5000 meters
1135
- level = None # or 1-40 levels
1136
- num_workers = 3
1137
-
1138
- check = True
1139
- ftimes = 1
1140
-
1141
- download_switch, single_var = True, False
1142
- combine_switch = False
1143
- copy_switch, copy_dir = False, r"G:\Data\HYCOM\3hourly"
1144
-
1145
- if download_switch:
1146
- if single_var:
1147
- for var_name in var_list:
1148
- download(var=var_name, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes)
1149
- else:
1150
- download(var=var_list, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes)
1151
-
1152
- """ if combine_switch or copy_switch:
1153
- time_list = get_time_list(time_s, time_e, 3, 'hour')
1154
- for var_name in var_list:
1155
- file_list = []
1156
- if single_var:
1157
- for time_str in time_list:
1158
- file_list.append(Path(root_path)/f'HYCOM_{var_name}_{time_str}.nc')
1159
- merge_path_name = Path(root_path)/f'HYCOM_{var_name}_{merge_name}.nc'
1160
- else:
1161
- # 如果混合,需要看情况获取文件列表
1162
- fname = ''
1163
- if var_name in ['water_u', 'water_v', 'water_u_bottom', 'water_v_bottom']:
1164
- fname = 'uv3z'
1165
- elif var_name in ['water_temp', 'salinity', 'water_temp_bottom', 'salinity_bottom']:
1166
- fname = 'ts3z'
1167
- elif var_name in ['surf_el']:
1168
- fname = 'surf_el'
1169
- for time_str in time_list:
1170
- file_list.append(Path(root_path)/f'HYCOM_{fname}_{time_str}.nc')
1171
- merge_path_name = Path(root_path)/f'HYCOM_{fname}_{merge_name}.nc'
1172
- if combine_switch:
1173
- # 这里的var_name必须是官方变量名,不能再是简写了
1174
- merge5nc(file_list, var_name, 'time', merge_path_name)
1175
- if copy_switch:
1176
- copy_file(merge_path_name, copy_dir) """