oafuncs 0.0.65__py2.py3-none-any.whl → 0.0.67__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_down/hycom_3hourly.py +222 -312
- oafuncs/oa_down/hycom_3hourly_20241130.py +1232 -0
- oafuncs/oa_tool/email.py +8 -4
- {oafuncs-0.0.65.dist-info → oafuncs-0.0.67.dist-info}/METADATA +1 -1
- {oafuncs-0.0.65.dist-info → oafuncs-0.0.67.dist-info}/RECORD +8 -7
- {oafuncs-0.0.65.dist-info → oafuncs-0.0.67.dist-info}/LICENSE.txt +0 -0
- {oafuncs-0.0.65.dist-info → oafuncs-0.0.67.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.65.dist-info → oafuncs-0.0.67.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -4,13 +4,13 @@
|
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-11-01 10:31:09
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-11-
|
7
|
+
LastEditTime: 2024-11-30 16:06:37
|
8
8
|
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
12
12
|
SystemInfo: Windows 11
|
13
|
-
Python Version: 3.
|
13
|
+
Python Version: 3.12
|
14
14
|
'''
|
15
15
|
import datetime
|
16
16
|
import os
|
@@ -29,7 +29,7 @@ from rich.progress import Progress
|
|
29
29
|
|
30
30
|
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
|
31
31
|
|
32
|
-
__all__ = ['draw_time_range', 'download', 'how_to_use', 'get_time_list'
|
32
|
+
__all__ = ['draw_time_range', 'download', 'how_to_use', 'get_time_list']
|
33
33
|
|
34
34
|
# time resolution
|
35
35
|
data_info = {'yearly': {}, 'monthly': {}, 'daily': {}, 'hourly': {}}
|
@@ -52,7 +52,7 @@ data_info['hourly']['dataset']['GLBy0.08']['version'] = {'93.0': {}}
|
|
52
52
|
# 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
|
53
53
|
# 目前只纠正了GLBv0.08 93.0的时间范围,具体到小时了
|
54
54
|
# 其他数据集的时刻暂时默认为00起,21止
|
55
|
-
data_info['hourly']['dataset']['GLBv0.08']['version']['53.X']['time_range'] = {'time_start': '19940101', 'time_end': '
|
55
|
+
data_info['hourly']['dataset']['GLBv0.08']['version']['53.X']['time_range'] = {'time_start': '19940101', 'time_end': '20151231'}
|
56
56
|
data_info['hourly']['dataset']['GLBv0.08']['version']['56.3']['time_range'] = {'time_start': '20140701', 'time_end': '20160430'}
|
57
57
|
data_info['hourly']['dataset']['GLBv0.08']['version']['57.2']['time_range'] = {'time_start': '20160501', 'time_end': '20170131'}
|
58
58
|
data_info['hourly']['dataset']['GLBv0.08']['version']['92.8']['time_range'] = {'time_start': '20170201', 'time_end': '20170531'}
|
@@ -229,106 +229,46 @@ def draw_time_range(pic_save_folder=None):
|
|
229
229
|
plt.close()
|
230
230
|
|
231
231
|
|
232
|
-
def
|
233
|
-
# old_time = '2023080203'
|
234
|
-
# time_new = '2023-08-02T03%3A00%3A00Z'
|
235
|
-
time_new = f'{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z'
|
236
|
-
return time_new
|
237
|
-
|
238
|
-
|
239
|
-
def get_time_list(time_s, time_e, delta_hour):
|
232
|
+
def get_time_list(time_s, time_e, delta, interval_type='hour'):
|
240
233
|
'''
|
241
|
-
Description: get a list of time strings from time_s to time_e with
|
234
|
+
Description: get a list of time strings from time_s to time_e with a specified interval
|
242
235
|
Args:
|
243
|
-
time_s: start time string, e.g. '2023080203'
|
244
|
-
time_e: end time string, e.g. '2023080303'
|
245
|
-
|
236
|
+
time_s: start time string, e.g. '2023080203' for hours or '20230802' for days
|
237
|
+
time_e: end time string, e.g. '2023080303' for hours or '20230803' for days
|
238
|
+
delta: interval of hours or days
|
239
|
+
interval_type: 'hour' for hour interval, 'day' for day interval
|
246
240
|
Returns:
|
247
241
|
dt_list: a list of time strings
|
248
242
|
'''
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
time_s: start time string, e.g. '2023080203'
|
262
|
-
time_e: end time string, e.g. '2023080303'
|
263
|
-
delta_hour: interval of hours
|
264
|
-
Returns:
|
265
|
-
dt_list: a list of time strings
|
266
|
-
'''
|
267
|
-
dt = datetime.datetime.strptime(time_s, '%Y%m%d%H')
|
268
|
-
dt_list = []
|
269
|
-
while dt.strftime('%Y%m%d%H') <= time_e:
|
270
|
-
dt_list.append(dt.strftime('%Y%m%d%H'))
|
271
|
-
dt = dt + datetime.timedelta(hours=delta_hour)
|
272
|
-
return dt_list
|
273
|
-
|
243
|
+
time_s, time_e = str(time_s), str(time_e)
|
244
|
+
if interval_type == 'hour':
|
245
|
+
time_format = '%Y%m%d%H'
|
246
|
+
delta_type = 'hours'
|
247
|
+
elif interval_type == 'day':
|
248
|
+
time_format = '%Y%m%d'
|
249
|
+
delta_type = 'days'
|
250
|
+
# Ensure time strings are in the correct format for days
|
251
|
+
time_s = time_s[:8]
|
252
|
+
time_e = time_e[:8]
|
253
|
+
else:
|
254
|
+
raise ValueError("interval_type must be 'hour' or 'day'")
|
274
255
|
|
275
|
-
|
276
|
-
'''
|
277
|
-
Description: get a list of time strings from time_s to time_e with delta_hour
|
278
|
-
Args:
|
279
|
-
time_s: start time string, e.g. '20230802'
|
280
|
-
time_e: end time string, e.g. '20230803'
|
281
|
-
delta_hour: interval of hours
|
282
|
-
Returns:
|
283
|
-
dt_list: a list of time strings
|
284
|
-
'''
|
285
|
-
time_s = time_s[:8]
|
286
|
-
time_e = time_e[:8]
|
287
|
-
dt = datetime.datetime.strptime(time_s, '%Y%m%d')
|
256
|
+
dt = datetime.datetime.strptime(time_s, time_format)
|
288
257
|
dt_list = []
|
289
|
-
while dt.strftime(
|
290
|
-
dt_list.append(dt.strftime(
|
291
|
-
dt
|
258
|
+
while dt.strftime(time_format) <= time_e:
|
259
|
+
dt_list.append(dt.strftime(time_format))
|
260
|
+
dt += datetime.timedelta(**{delta_type: delta})
|
292
261
|
return dt_list
|
293
262
|
|
294
263
|
|
295
|
-
def
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
def set_query_dict_no_vertical(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh):
|
301
|
-
query_dict = {
|
302
|
-
'var': variable_info[var]['var_name'],
|
303
|
-
'north': lat_max,
|
304
|
-
'west': lon_min,
|
305
|
-
'east': lon_max,
|
306
|
-
'south': lat_min,
|
307
|
-
'horizStride': 1,
|
308
|
-
'time': transform_time(time_str_ymdh),
|
309
|
-
'addLatLon': 'true',
|
310
|
-
'accept': 'netcdf4',
|
311
|
-
}
|
312
|
-
return query_dict
|
313
|
-
|
314
|
-
|
315
|
-
def set_query_dict_depth_or_level(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh):
|
316
|
-
query_dict = {
|
317
|
-
'var': variable_info[var]['var_name'],
|
318
|
-
'north': lat_max,
|
319
|
-
'west': lon_min,
|
320
|
-
'east': lon_max,
|
321
|
-
'south': lat_min,
|
322
|
-
'horizStride': 1,
|
323
|
-
'time': transform_time(time_str_ymdh),
|
324
|
-
'vertCoord': 0,
|
325
|
-
'addLatLon': 'true',
|
326
|
-
'accept': 'netcdf4',
|
327
|
-
}
|
328
|
-
return query_dict
|
264
|
+
def transform_time(time_str):
|
265
|
+
# old_time = '2023080203'
|
266
|
+
# time_new = '2023-08-02T03%3A00%3A00Z'
|
267
|
+
time_new = f'{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z'
|
268
|
+
return time_new
|
329
269
|
|
330
270
|
|
331
|
-
def
|
271
|
+
def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, mode='single_depth', depth=None, level_num=None):
|
332
272
|
query_dict = {
|
333
273
|
'var': variable_info[var]['var_name'],
|
334
274
|
'north': lat_max,
|
@@ -337,64 +277,62 @@ def set_query_dict_full(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh):
|
|
337
277
|
'south': lat_min,
|
338
278
|
'horizStride': 1,
|
339
279
|
'time': transform_time(time_str_ymdh),
|
340
|
-
'
|
280
|
+
'vertCoord': None,
|
281
|
+
'vertStride': None,
|
341
282
|
'addLatLon': 'true',
|
342
283
|
'accept': 'netcdf4',
|
343
284
|
}
|
344
|
-
return query_dict
|
345
|
-
|
346
285
|
|
347
|
-
def
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
286
|
+
def get_nearest_level_index(depth):
|
287
|
+
level_depth = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 125.0, 150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0, 1250.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000]
|
288
|
+
return min(range(len(level_depth)), key=lambda i: abs(level_depth[i]-depth))
|
289
|
+
|
290
|
+
if var not in ['ssh', 'u_b', 'v_b', 'temp_b', 'salt_b'] and var in ['u', 'v', 'temp', 'salt']:
|
291
|
+
if mode == 'depth':
|
292
|
+
if depth < 0 or depth > 5000:
|
293
|
+
print('Please ensure the depth is in the range of 0-5000 m')
|
294
|
+
query_dict['vertCoord'] = get_nearest_level_index(depth) + 1
|
295
|
+
elif mode == 'level':
|
296
|
+
if level_num < 1 or level_num > 40:
|
297
|
+
print('Please ensure the level_num is in the range of 1-40')
|
298
|
+
query_dict['vertCoord'] = max(1, min(level_num, 40))
|
299
|
+
elif mode == 'full':
|
300
|
+
query_dict['vertStride'] = 1
|
301
|
+
else:
|
302
|
+
raise ValueError("Invalid mode. Choose from 'depth', 'level', or 'full'")
|
356
303
|
|
304
|
+
query_dict = {k: v for k, v in query_dict.items() if v is not None}
|
357
305
|
|
358
|
-
def get_query_dict_single_level(var, lon_min, lon_max, lat_min, lat_max, level_num, time_str_ymdh):
|
359
|
-
if var in ['ssh']:
|
360
|
-
query_dict = set_query_dict_no_vertical(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
361
|
-
else:
|
362
|
-
# level_num: 1-40
|
363
|
-
query_dict = set_query_dict_depth_or_level(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
364
|
-
if var in ['u', 'v', 'temp', 'salt']:
|
365
|
-
print('Please ensure the level_num is in the range of 1-40')
|
366
|
-
if level_num == 0:
|
367
|
-
level_num = 1
|
368
|
-
print('The level_num is set to 1')
|
369
|
-
if level_num > 40:
|
370
|
-
level_num = 40
|
371
|
-
print('The level_num is set to 40')
|
372
|
-
query_dict['vertCoord'] = level_num
|
373
306
|
return query_dict
|
374
307
|
|
375
308
|
|
376
|
-
def
|
377
|
-
|
378
|
-
|
379
|
-
else:
|
380
|
-
query_dict = set_query_dict_full(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
381
|
-
return query_dict
|
382
|
-
|
309
|
+
def check_time_in_dataset_and_version(time_input, time_end=None):
|
310
|
+
# 判断是处理单个时间点还是时间范围
|
311
|
+
is_single_time = time_end is None
|
383
312
|
|
384
|
-
|
385
|
-
if
|
386
|
-
|
313
|
+
# 如果是单个时间点,初始化时间范围
|
314
|
+
if is_single_time:
|
315
|
+
time_start = int(time_input)
|
316
|
+
time_end = time_start
|
317
|
+
time_input_str = str(time_input)
|
387
318
|
else:
|
388
|
-
|
389
|
-
|
319
|
+
time_start = int(time_input)
|
320
|
+
time_end = int(time_end)
|
321
|
+
time_input_str = f'{time_input}-{time_end}'
|
390
322
|
|
323
|
+
# 根据时间长度补全时间格式
|
324
|
+
if len(str(time_start)) == 8:
|
325
|
+
time_start = str(time_start) + '00'
|
326
|
+
if len(str(time_end)) == 8:
|
327
|
+
time_end = str(time_end) + '21'
|
328
|
+
time_start, time_end = int(time_start), int(time_end)
|
391
329
|
|
392
|
-
def ymdh_in_which_dataset_and_version(time_ymdh):
|
393
|
-
time_ymdh = int(time_ymdh)
|
394
330
|
d_list = []
|
395
331
|
v_list = []
|
396
332
|
trange_list = []
|
397
333
|
have_data = False
|
334
|
+
|
335
|
+
# 遍历数据集和版本
|
398
336
|
for dataset_name in data_info['hourly']['dataset'].keys():
|
399
337
|
for version_name in data_info['hourly']['dataset'][dataset_name]['version'].keys():
|
400
338
|
time_s, time_e = list(data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range'].values())
|
@@ -403,63 +341,84 @@ def ymdh_in_which_dataset_and_version(time_ymdh):
|
|
403
341
|
time_s = time_s + '00'
|
404
342
|
if len(time_e) == 8:
|
405
343
|
time_e = time_e + '21'
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
344
|
+
# 检查时间是否在数据集的时间范围内
|
345
|
+
if is_single_time:
|
346
|
+
if time_start >= int(time_s) and time_start <= int(time_e):
|
347
|
+
d_list.append(dataset_name)
|
348
|
+
v_list.append(version_name)
|
349
|
+
trange_list.append(f'{time_s}-{time_e}')
|
350
|
+
have_data = True
|
351
|
+
else:
|
352
|
+
if time_start >= int(time_s) and time_end <= int(time_e):
|
353
|
+
d_list.append(dataset_name)
|
354
|
+
v_list.append(version_name)
|
355
|
+
trange_list.append(f'{time_s}-{time_e}')
|
356
|
+
have_data = True
|
357
|
+
|
358
|
+
# 输出结果
|
359
|
+
print(f'[bold red]{time_input_str} is in the following dataset and version:')
|
412
360
|
if have_data:
|
413
361
|
for d, v, trange in zip(d_list, v_list, trange_list):
|
414
362
|
print(f'[bold blue]{d} {v} {trange}')
|
363
|
+
return True
|
415
364
|
else:
|
416
|
-
|
365
|
+
print(f'[bold red]{time_input_str} is not in any dataset and version')
|
366
|
+
return False
|
417
367
|
|
418
368
|
|
419
|
-
def
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
time_s, time_e = str(time_s), str(time_e)
|
429
|
-
if len(time_s) == 8:
|
430
|
-
time_s = time_s + '00'
|
431
|
-
if len(time_e) == 8:
|
432
|
-
time_e = time_e + '21'
|
433
|
-
if time_ymd*100 >= int(time_s) and time_ymd*100+21 <= int(time_e):
|
434
|
-
d_list.append(dataset_name)
|
435
|
-
v_list.append(version_name)
|
436
|
-
trange_list.append(f'{time_s}-{time_e}')
|
437
|
-
have_data = True
|
438
|
-
print(f'[bold red]{time_ymd} is in the following dataset and version:')
|
439
|
-
if have_data:
|
440
|
-
for d, v, trange in zip(d_list, v_list, trange_list):
|
441
|
-
print(f'[bold blue]{d} {v} {trange}')
|
369
|
+
def ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time_input, time_end=None):
|
370
|
+
# 根据时间长度补全时间格式
|
371
|
+
if len(str(time_input)) == 8:
|
372
|
+
time_input = str(time_input) + '00'
|
373
|
+
time_start = int(time_input)
|
374
|
+
if time_end is not None:
|
375
|
+
if len(str(time_end)) == 8:
|
376
|
+
time_end = str(time_end) + '21'
|
377
|
+
time_end = int(time_end)
|
442
378
|
else:
|
443
|
-
|
379
|
+
time_end = time_start
|
444
380
|
|
381
|
+
# 检查指定的数据集和版本是否存在
|
382
|
+
if dataset_name not in data_info['hourly']['dataset']:
|
383
|
+
print(f'[bold red]Dataset {dataset_name} not found.')
|
384
|
+
return False
|
385
|
+
if version_name not in data_info['hourly']['dataset'][dataset_name]['version']:
|
386
|
+
print(f'[bold red]Version {version_name} not found in dataset {dataset_name}.')
|
387
|
+
return False
|
445
388
|
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
389
|
+
# 获取指定数据集和版本的时间范围
|
390
|
+
time_range = data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
|
391
|
+
time_s, time_e = list(time_range.values())
|
392
|
+
time_s, time_e = str(time_s), str(time_e)
|
393
|
+
if len(time_s) == 8:
|
394
|
+
time_s = time_s + '00'
|
395
|
+
if len(time_e) == 8:
|
396
|
+
time_e = time_e + '21'
|
397
|
+
time_s, time_e = int(time_s), int(time_e)
|
398
|
+
|
399
|
+
# 检查时间是否在指定数据集和版本的时间范围内
|
400
|
+
if time_start >= time_s and time_end <= time_e:
|
401
|
+
print(f'[bold blue]Time {time_input} to {time_end} is within dataset {dataset_name} and version {version_name}.')
|
402
|
+
return True
|
403
|
+
else:
|
404
|
+
print(f'[bold red]Time {time_input} to {time_end} is not within dataset {dataset_name} and version {version_name}.')
|
405
|
+
return False
|
406
|
+
|
407
|
+
|
408
|
+
def direct_choose_dataset_and_version(time_input, time_end=None):
|
409
|
+
# 假设 data_info 是一个字典,包含了数据集和版本的信息
|
410
|
+
# 示例结构:data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
|
459
411
|
|
412
|
+
if len(str(time_input)) == 8:
|
413
|
+
time_input = str(time_input) + '00'
|
414
|
+
|
415
|
+
# 如果 time_end 是 None,则将 time_input 的值赋给它
|
416
|
+
if time_end is None:
|
417
|
+
time_end = time_input
|
418
|
+
|
419
|
+
# 处理开始和结束时间,确保它们是完整的 ymdh 格式
|
420
|
+
time_start, time_end = int(str(time_input)[:10]), int(str(time_end)[:10])
|
460
421
|
|
461
|
-
def direct_choose_dataset_and_version_whole_day(time_ymd):
|
462
|
-
time_ymd = int(str(time_ymd)[:8])
|
463
422
|
for dataset_name in data_info['hourly']['dataset'].keys():
|
464
423
|
for version_name in data_info['hourly']['dataset'][dataset_name]['version'].keys():
|
465
424
|
[time_s, time_e] = list(data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range'].values())
|
@@ -468,10 +427,16 @@ def direct_choose_dataset_and_version_whole_day(time_ymd):
|
|
468
427
|
time_s = time_s + '00'
|
469
428
|
if len(time_e) == 8:
|
470
429
|
time_e = time_e + '21'
|
471
|
-
|
430
|
+
time_s, time_e = int(time_s), int(time_e)
|
431
|
+
|
432
|
+
# 检查时间是否在数据集版本的时间范围内
|
433
|
+
if time_start >= time_s and time_end <= time_e:
|
472
434
|
print(f'[bold purple]dataset: {dataset_name}, version: {version_name} is chosen')
|
473
435
|
return dataset_name, version_name
|
474
436
|
|
437
|
+
# 如果没有找到匹配的数据集和版本,返回 None
|
438
|
+
return None, None
|
439
|
+
|
475
440
|
|
476
441
|
def get_base_url(dataset_name, version_name, var, year_str):
|
477
442
|
url_dict = data_info['hourly']['dataset'][dataset_name]['version'][version_name]['url']
|
@@ -536,8 +501,8 @@ def get_ua():
|
|
536
501
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
|
537
502
|
"Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
|
538
503
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
|
539
|
-
"
|
540
|
-
"
|
504
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
|
505
|
+
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
|
541
506
|
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
542
507
|
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
543
508
|
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
@@ -587,10 +552,10 @@ def get_ua():
|
|
587
552
|
"NOKIA5700/UCWEB7.0.2.37/28/999",
|
588
553
|
"Openwave/UCWEB7.0.2.37/28/999",
|
589
554
|
"Openwave/UCWEB7.0.2.37/28/999",
|
555
|
+
|
590
556
|
]
|
591
|
-
ua_index = random.randint(0, len(ua_list)-1)
|
592
|
-
ua = ua_list[ua_index]
|
593
557
|
# print(f'Using User-Agent: {ua}')
|
558
|
+
ua = random.choice(ua_list)
|
594
559
|
return ua
|
595
560
|
|
596
561
|
|
@@ -631,8 +596,7 @@ def dlownload_file(target_url, store_path, file_name, check=False):
|
|
631
596
|
print(f'\r正在重试第 {request_times} 次', end="")
|
632
597
|
# 尝试下载文件
|
633
598
|
try:
|
634
|
-
headers = {
|
635
|
-
'User-Agent': get_ua()}
|
599
|
+
headers = {'User-Agent': get_ua()}
|
636
600
|
response = s.get(target_url, headers=headers, timeout=5)
|
637
601
|
response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
|
638
602
|
|
@@ -668,53 +632,79 @@ def check_hour_is_valid(ymdh_str):
|
|
668
632
|
return False
|
669
633
|
|
670
634
|
|
671
|
-
def
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
635
|
+
def check_dataset_version(dataset_name, version_name, download_time, download_time_end=None):
|
636
|
+
if dataset_name is not None and version_name is not None:
|
637
|
+
just_ensure = ensure_time_in_specific_dataset_and_version(dataset_name, version_name, download_time, download_time_end)
|
638
|
+
if just_ensure:
|
639
|
+
return dataset_name, version_name
|
640
|
+
else:
|
641
|
+
return None, None
|
642
|
+
|
643
|
+
# 打印信息并处理数据集和版本名称
|
676
644
|
if dataset_name is None and version_name is None:
|
677
645
|
print('The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.')
|
678
646
|
print('If you wanna choose the dataset and version by yourself, please set the dataset_name and version_name together.')
|
679
|
-
ymdh_in_which_dataset_and_version(download_time)
|
680
|
-
dataset_name, version_name = direct_choose_dataset_and_version(download_time)
|
681
647
|
elif dataset_name is None and version_name is not None:
|
682
648
|
print('Please ensure the dataset_name is not None')
|
683
649
|
print('If you do not add the dataset_name, both the dataset and version will be chosen according to the download_time.')
|
684
|
-
ymdh_in_which_dataset_and_version(download_time)
|
685
|
-
dataset_name, version_name = direct_choose_dataset_and_version(download_time)
|
686
650
|
elif dataset_name is not None and version_name is None:
|
687
651
|
print('Please ensure the version_name is not None')
|
688
652
|
print('If you do not add the version_name, both the dataset and version will be chosen according to the download_time.')
|
689
|
-
ymdh_in_which_dataset_and_version(download_time)
|
690
|
-
dataset_name, version_name = direct_choose_dataset_and_version(download_time)
|
691
653
|
else:
|
692
654
|
print('The dataset_name and version_name are both set by yourself.')
|
693
655
|
|
694
|
-
|
656
|
+
# 确保下载时间是一个字符串
|
657
|
+
download_time_str = str(download_time)
|
658
|
+
|
659
|
+
if len(download_time_str) == 8:
|
660
|
+
download_time_str = download_time_str + '00'
|
661
|
+
|
662
|
+
# 检查小时是否有效(如果需要的话)
|
663
|
+
if download_time_end is None and not check_hour_is_valid(download_time_str):
|
664
|
+
print('Please ensure the hour is 00, 03, 06, 09, 12, 15, 18, 21')
|
665
|
+
raise ValueError('The hour is invalid')
|
666
|
+
|
667
|
+
# 根据是否检查整个天来设置时间范围
|
668
|
+
if download_time_end is not None:
|
669
|
+
if len(str(download_time_end)) == 8:
|
670
|
+
download_time_end = str(download_time_end) + '21'
|
671
|
+
have_data = check_time_in_dataset_and_version(download_time_str, download_time_end)
|
672
|
+
if have_data:
|
673
|
+
return direct_choose_dataset_and_version(download_time_str, download_time_end)
|
674
|
+
else:
|
675
|
+
have_data = check_time_in_dataset_and_version(download_time_str)
|
676
|
+
if have_data:
|
677
|
+
return direct_choose_dataset_and_version(download_time_str)
|
678
|
+
|
679
|
+
return None, None
|
695
680
|
|
696
681
|
|
697
682
|
def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time):
|
698
683
|
year_str = str(download_time)[:4]
|
699
684
|
if depth is not None and level_num is not None:
|
700
685
|
print('Please ensure the depth or level_num is None')
|
701
|
-
|
686
|
+
print('Progress will use the depth')
|
687
|
+
which_mode = 'depth'
|
688
|
+
elif depth is not None and level_num is None:
|
702
689
|
print(f'Data of single depth ({depth}m) will be downloaded...')
|
703
|
-
|
704
|
-
elif level_num is not None:
|
690
|
+
which_mode = 'depth'
|
691
|
+
elif level_num is not None and depth is None:
|
705
692
|
print(f'Data of single level ({level_num}) will be downloaded...')
|
706
|
-
|
693
|
+
which_mode = 'level'
|
707
694
|
else:
|
708
695
|
print('Full depth or full level data will be downloaded...')
|
709
|
-
|
696
|
+
which_mode = 'full'
|
697
|
+
query_dict = get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, which_mode, depth, level_num)
|
710
698
|
submit_url = get_submit_url(
|
711
699
|
dataset_name, version_name, var, year_str, query_dict)
|
712
700
|
return submit_url
|
713
701
|
|
714
702
|
|
715
|
-
def
|
703
|
+
def direct_download_single_hour(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time='2024083100', depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
|
716
704
|
download_time = str(download_time)
|
717
|
-
dataset_name, version_name =
|
705
|
+
dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time)
|
706
|
+
if dataset_name is None and version_name is None:
|
707
|
+
return
|
718
708
|
|
719
709
|
if store_path is None:
|
720
710
|
store_path = str(Path.cwd())
|
@@ -754,32 +744,11 @@ def direct_download_single_time(var, lon_min=0, lon_max=359.92, lat_min=-80, lat
|
|
754
744
|
dlownload_file(submit_url, store_path, file_name, check)
|
755
745
|
|
756
746
|
|
757
|
-
def check_dataset_version_whold_day(dataset_name, version_name, download_time):
|
758
|
-
download_time = str(download_time)
|
759
|
-
if dataset_name is None and version_name is None:
|
760
|
-
print('The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.')
|
761
|
-
print('If you wanna choose the dataset and version by yourself, please set the dataset_name and version_name together.')
|
762
|
-
ymd_in_which_dataset_and_version(download_time)
|
763
|
-
dataset_name, version_name = direct_choose_dataset_and_version_whole_day(download_time)
|
764
|
-
elif dataset_name is None and version_name is not None:
|
765
|
-
print('Please ensure the dataset_name is not None')
|
766
|
-
print('If you do not add the dataset_name, both the dataset and version will be chosen according to the download_time.')
|
767
|
-
ymd_in_which_dataset_and_version(download_time)
|
768
|
-
dataset_name, version_name = direct_choose_dataset_and_version_whole_day(download_time)
|
769
|
-
elif dataset_name is not None and version_name is None:
|
770
|
-
print('Please ensure the version_name is not None')
|
771
|
-
print('If you do not add the version_name, both the dataset and version will be chosen according to the download_time.')
|
772
|
-
ymd_in_which_dataset_and_version(download_time)
|
773
|
-
dataset_name, version_name = direct_choose_dataset_and_version_whole_day(download_time)
|
774
|
-
else:
|
775
|
-
print('The dataset_name and version_name are both set by yourself.')
|
776
|
-
|
777
|
-
return dataset_name, version_name
|
778
|
-
|
779
|
-
|
780
747
|
def direct_download_whole_day(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time='20240831', depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
|
781
748
|
download_time = str(download_time)[:8]+'00'
|
782
|
-
dataset_name, version_name =
|
749
|
+
dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time, str(download_time)[:8]+'21')
|
750
|
+
if dataset_name is None and version_name is None:
|
751
|
+
return
|
783
752
|
|
784
753
|
if store_path is None:
|
785
754
|
store_path = str(Path.cwd())
|
@@ -856,10 +825,10 @@ def download_task(var, time_str, lon_min, lon_max, lat_min, lat_max, depth, leve
|
|
856
825
|
if len(time_str) == 8:
|
857
826
|
direct_download_whole_day(var, lon_min, lon_max, lat_min, lat_max, time_str, depth, level, store_path, dataset_name, version_name, check)
|
858
827
|
else:
|
859
|
-
|
828
|
+
direct_download_single_hour(var, lon_min, lon_max, lat_min, lat_max, time_str, depth, level, store_path, dataset_name, version_name, check)
|
860
829
|
|
861
830
|
|
862
|
-
def download_single_hour(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False):
|
831
|
+
def download_single_hour(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False):
|
863
832
|
'''
|
864
833
|
Description:
|
865
834
|
Download the data of single time or a series of time
|
@@ -901,20 +870,23 @@ def download_single_hour(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
|
|
901
870
|
ymdh_time_s = str(time_s)
|
902
871
|
if len(ymdh_time_s) == 8:
|
903
872
|
ymdh_time_s += '00'
|
904
|
-
|
905
|
-
|
906
|
-
|
873
|
+
if time_e is None:
|
874
|
+
ymdh_time_e = ymdh_time_s[:]
|
875
|
+
else:
|
876
|
+
ymdh_time_e = str(time_e)
|
877
|
+
if len(ymdh_time_e) == 8:
|
878
|
+
ymdh_time_e += '21'
|
907
879
|
if ymdh_time_s == ymdh_time_e:
|
908
|
-
|
880
|
+
direct_download_single_hour(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, depth, level, store_path, dataset_name, version_name)
|
909
881
|
elif int(ymdh_time_s) < int(ymdh_time_e):
|
910
882
|
print('Downloading a series of files...')
|
911
|
-
time_list =
|
883
|
+
time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3, 'hour')
|
912
884
|
with Progress() as progress:
|
913
885
|
task = progress.add_task("[cyan]Downloading...", total=len(time_list))
|
914
886
|
if num_workers is None or num_workers <= 1:
|
915
887
|
# 串行方式
|
916
888
|
for time_str in time_list:
|
917
|
-
|
889
|
+
direct_download_single_hour(var, lon_min, lon_max, lat_min, lat_max, time_str, depth, level, store_path, dataset_name, version_name, check)
|
918
890
|
progress.update(task, advance=1)
|
919
891
|
else:
|
920
892
|
# 并行方式
|
@@ -930,7 +902,7 @@ def download_single_hour(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
|
|
930
902
|
print('Please ensure the time_s is less than the time_e')
|
931
903
|
|
932
904
|
|
933
|
-
def download_whole_day(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False):
|
905
|
+
def download_whole_day(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False):
|
934
906
|
'''
|
935
907
|
Description:
|
936
908
|
Download the data of single time or a series of time
|
@@ -969,13 +941,17 @@ def download_whole_day(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-
|
|
969
941
|
print('The range of lon_min, lon_max is 0~359.92')
|
970
942
|
print('The range of lat_min, lat_max is -80~90')
|
971
943
|
raise ValueError('The lon or lat is invalid')
|
972
|
-
time_s
|
944
|
+
time_s = str(time_s)[:8]
|
945
|
+
if time_e is None:
|
946
|
+
time_e = time_s[:]
|
947
|
+
else:
|
948
|
+
time_e = str(time_e)[:8]
|
973
949
|
|
974
950
|
if time_s == time_e:
|
975
951
|
direct_download_whole_day(var, lon_min, lon_max, lat_min, lat_max, time_s, depth, level, store_path, dataset_name, version_name)
|
976
952
|
elif int(time_s) < int(time_e):
|
977
953
|
print('Downloading a series of files...')
|
978
|
-
time_list =
|
954
|
+
time_list = get_time_list(time_s, time_e, 1, 'day')
|
979
955
|
with Progress() as progress:
|
980
956
|
task = progress.add_task("[cyan]Downloading...", total=len(time_list))
|
981
957
|
if num_workers is None or num_workers <= 1:
|
@@ -997,73 +973,7 @@ def download_whole_day(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-
|
|
997
973
|
print('Please ensure the time_s is less than the time_e')
|
998
974
|
|
999
975
|
|
1000
|
-
def
|
1001
|
-
'''
|
1002
|
-
Description:
|
1003
|
-
Download the data of single time or a series of time
|
1004
|
-
|
1005
|
-
Parameters:
|
1006
|
-
var: str, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
|
1007
|
-
time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
|
1008
|
-
time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
|
1009
|
-
lon_min: float, the minimum longitude, default is 0
|
1010
|
-
lon_max: float, the maximum longitude, default is 359.92
|
1011
|
-
lat_min: float, the minimum latitude, default is -80
|
1012
|
-
lat_max: float, the maximum latitude, default is 90
|
1013
|
-
depth: float, the depth, default is None
|
1014
|
-
level: int, the level number, default is None
|
1015
|
-
store_path: str, the path to store the data, default is None
|
1016
|
-
dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08'
|
1017
|
-
version_name: str, the version name, default is None, example: '53.X', '56.3'
|
1018
|
-
num_workers: int, the number of workers, default is None
|
1019
|
-
|
1020
|
-
Returns:
|
1021
|
-
None
|
1022
|
-
'''
|
1023
|
-
if isinstance(var, list):
|
1024
|
-
var = var[0]
|
1025
|
-
var = convert_full_name_to_short_name(var)
|
1026
|
-
if var is False:
|
1027
|
-
raise ValueError('The var is invalid')
|
1028
|
-
if lon_min < 0 or lon_min > 359.92 or lon_max < 0 or lon_max > 359.92 or lat_min < -80 or lat_min > 90 or lat_max < -80 or lat_max > 90:
|
1029
|
-
print('Please ensure the lon_min, lon_max, lat_min, lat_max are in the range')
|
1030
|
-
print('The range of lon_min, lon_max is 0~359.92')
|
1031
|
-
print('The range of lat_min, lat_max is -80~90')
|
1032
|
-
raise ValueError('The lon or lat is invalid')
|
1033
|
-
ymdh_time_s = str(time_s)
|
1034
|
-
if len(ymdh_time_s) == 8:
|
1035
|
-
ymdh_time_s += '00'
|
1036
|
-
ymdh_time_e = str(time_e)
|
1037
|
-
if len(ymdh_time_e) == 8:
|
1038
|
-
ymdh_time_e += '21'
|
1039
|
-
if ymdh_time_s == ymdh_time_e:
|
1040
|
-
direct_download_single_time(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, depth, level, store_path, dataset_name, version_name)
|
1041
|
-
elif int(ymdh_time_s) < int(ymdh_time_e):
|
1042
|
-
print('Downloading a series of files...')
|
1043
|
-
time_list = get_hour_list(ymdh_time_s, ymdh_time_e, 3)
|
1044
|
-
with Progress() as progress:
|
1045
|
-
task = progress.add_task("[cyan]Downloading...", total=len(time_list))
|
1046
|
-
if num_workers is None or num_workers <= 1:
|
1047
|
-
# 串行方式
|
1048
|
-
for time_str in time_list:
|
1049
|
-
direct_download_single_time(var, lon_min, lon_max, lat_min, lat_max, time_str, depth, level, store_path, dataset_name, version_name, check)
|
1050
|
-
progress.update(task, advance=1)
|
1051
|
-
else:
|
1052
|
-
# 并行方式
|
1053
|
-
if num_workers > 10:
|
1054
|
-
print('The number of workers is too large!')
|
1055
|
-
print('In order to avoid the server being blocked, the number of workers is set to 10')
|
1056
|
-
num_workers = 10
|
1057
|
-
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
1058
|
-
futures = [executor.submit(download_task, var, time_str, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
|
1059
|
-
for future in futures:
|
1060
|
-
future.add_done_callback(
|
1061
|
-
lambda _: progress.update(task, advance=1))
|
1062
|
-
else:
|
1063
|
-
print('Please ensure the ymdh_time_s is less than the ymdh_time_e')
|
1064
|
-
|
1065
|
-
|
1066
|
-
def download(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, resolution='hour'):
|
976
|
+
def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, resolution='hour'):
|
1067
977
|
'''
|
1068
978
|
Description:
|
1069
979
|
Download the data of single time or a series of time
|
@@ -1101,7 +1011,7 @@ def how_to_use():
|
|
1101
1011
|
print('''
|
1102
1012
|
# 1. Choose the dataset and version according to the time:
|
1103
1013
|
# 1.1 Use function to query
|
1104
|
-
You can use the function
|
1014
|
+
You can use the function check_time_in_dataset_and_version(time_input=20241101) to find the dataset and version according to the time.
|
1105
1015
|
Then, you can see the dataset and version in the output.
|
1106
1016
|
# 1.2 Draw a picture to see
|
1107
1017
|
You can draw a picture to see the time range of each dataset and version.
|
@@ -1148,8 +1058,8 @@ def how_to_use():
|
|
1148
1058
|
|
1149
1059
|
if __name__ == '__main__':
|
1150
1060
|
# help(hycom3h.download)
|
1151
|
-
time_s, time_e = '
|
1152
|
-
merge_name = '
|
1061
|
+
time_s, time_e = '2018070100', '2019123121'
|
1062
|
+
merge_name = '2018_2024'
|
1153
1063
|
root_path = r'G:\Data\HYCOM\3hourly'
|
1154
1064
|
location_dict = {'west': 105, 'east': 130, 'south': 15, 'north': 45}
|
1155
1065
|
download_dict = {
|
@@ -1173,7 +1083,7 @@ if __name__ == '__main__':
|
|
1173
1083
|
# if you wanna download all depth or level, set both False
|
1174
1084
|
depth = None # or 0-4000 meters
|
1175
1085
|
level = None # or 1-40 levels
|
1176
|
-
num_workers =
|
1086
|
+
num_workers = 1
|
1177
1087
|
|
1178
1088
|
check = True
|
1179
1089
|
|
@@ -1189,7 +1099,7 @@ if __name__ == '__main__':
|
|
1189
1099
|
download(var=var_list, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict['west'], lon_max=location_dict['east'], lat_min=location_dict['south'], lat_max=location_dict['north'], num_workers=num_workers, check=check, depth=depth, level=level)
|
1190
1100
|
|
1191
1101
|
""" if combine_switch or copy_switch:
|
1192
|
-
time_list =
|
1102
|
+
time_list = get_time_list(time_s, time_e, 3, 'hour')
|
1193
1103
|
for var_name in var_list:
|
1194
1104
|
file_list = []
|
1195
1105
|
if single_var:
|