oafuncs 0.0.66__py2.py3-none-any.whl → 0.0.67__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_down/hycom_3hourly.py +213 -314
- oafuncs/oa_down/hycom_3hourly_20241130.py +1232 -0
- {oafuncs-0.0.66.dist-info → oafuncs-0.0.67.dist-info}/METADATA +1 -1
- {oafuncs-0.0.66.dist-info → oafuncs-0.0.67.dist-info}/RECORD +7 -6
- {oafuncs-0.0.66.dist-info → oafuncs-0.0.67.dist-info}/LICENSE.txt +0 -0
- {oafuncs-0.0.66.dist-info → oafuncs-0.0.67.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.66.dist-info → oafuncs-0.0.67.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -4,13 +4,13 @@
|
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-11-01 10:31:09
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-11-30
|
7
|
+
LastEditTime: 2024-11-30 16:06:37
|
8
8
|
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
12
12
|
SystemInfo: Windows 11
|
13
|
-
Python Version: 3.
|
13
|
+
Python Version: 3.12
|
14
14
|
'''
|
15
15
|
import datetime
|
16
16
|
import os
|
@@ -29,7 +29,7 @@ from rich.progress import Progress
|
|
29
29
|
|
30
30
|
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
|
31
31
|
|
32
|
-
__all__ = ['draw_time_range', 'download', 'how_to_use', 'get_time_list'
|
32
|
+
__all__ = ['draw_time_range', 'download', 'how_to_use', 'get_time_list']
|
33
33
|
|
34
34
|
# time resolution
|
35
35
|
data_info = {'yearly': {}, 'monthly': {}, 'daily': {}, 'hourly': {}}
|
@@ -229,106 +229,46 @@ def draw_time_range(pic_save_folder=None):
|
|
229
229
|
plt.close()
|
230
230
|
|
231
231
|
|
232
|
-
def
|
233
|
-
# old_time = '2023080203'
|
234
|
-
# time_new = '2023-08-02T03%3A00%3A00Z'
|
235
|
-
time_new = f'{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z'
|
236
|
-
return time_new
|
237
|
-
|
238
|
-
|
239
|
-
def get_time_list(time_s, time_e, delta_hour):
|
240
|
-
'''
|
241
|
-
Description: get a list of time strings from time_s to time_e with delta_hour
|
242
|
-
Args:
|
243
|
-
time_s: start time string, e.g. '2023080203'
|
244
|
-
time_e: end time string, e.g. '2023080303'
|
245
|
-
delta_hour: interval of hours
|
246
|
-
Returns:
|
247
|
-
dt_list: a list of time strings
|
248
|
-
'''
|
249
|
-
dt = datetime.datetime.strptime(time_s, '%Y%m%d%H')
|
250
|
-
dt_list = []
|
251
|
-
while dt.strftime('%Y%m%d%H') <= time_e:
|
252
|
-
dt_list.append(dt.strftime('%Y%m%d%H'))
|
253
|
-
dt = dt + datetime.timedelta(hours=delta_hour)
|
254
|
-
return dt_list
|
255
|
-
|
256
|
-
|
257
|
-
def get_hour_list(time_s, time_e, delta_hour):
|
232
|
+
def get_time_list(time_s, time_e, delta, interval_type='hour'):
|
258
233
|
'''
|
259
|
-
Description: get a list of time strings from time_s to time_e with
|
234
|
+
Description: get a list of time strings from time_s to time_e with a specified interval
|
260
235
|
Args:
|
261
|
-
time_s: start time string, e.g. '2023080203'
|
262
|
-
time_e: end time string, e.g. '2023080303'
|
263
|
-
|
236
|
+
time_s: start time string, e.g. '2023080203' for hours or '20230802' for days
|
237
|
+
time_e: end time string, e.g. '2023080303' for hours or '20230803' for days
|
238
|
+
delta: interval of hours or days
|
239
|
+
interval_type: 'hour' for hour interval, 'day' for day interval
|
264
240
|
Returns:
|
265
241
|
dt_list: a list of time strings
|
266
242
|
'''
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
243
|
+
time_s, time_e = str(time_s), str(time_e)
|
244
|
+
if interval_type == 'hour':
|
245
|
+
time_format = '%Y%m%d%H'
|
246
|
+
delta_type = 'hours'
|
247
|
+
elif interval_type == 'day':
|
248
|
+
time_format = '%Y%m%d'
|
249
|
+
delta_type = 'days'
|
250
|
+
# Ensure time strings are in the correct format for days
|
251
|
+
time_s = time_s[:8]
|
252
|
+
time_e = time_e[:8]
|
253
|
+
else:
|
254
|
+
raise ValueError("interval_type must be 'hour' or 'day'")
|
274
255
|
|
275
|
-
|
276
|
-
'''
|
277
|
-
Description: get a list of time strings from time_s to time_e with delta_hour
|
278
|
-
Args:
|
279
|
-
time_s: start time string, e.g. '20230802'
|
280
|
-
time_e: end time string, e.g. '20230803'
|
281
|
-
delta_hour: interval of hours
|
282
|
-
Returns:
|
283
|
-
dt_list: a list of time strings
|
284
|
-
'''
|
285
|
-
time_s = time_s[:8]
|
286
|
-
time_e = time_e[:8]
|
287
|
-
dt = datetime.datetime.strptime(time_s, '%Y%m%d')
|
256
|
+
dt = datetime.datetime.strptime(time_s, time_format)
|
288
257
|
dt_list = []
|
289
|
-
while dt.strftime(
|
290
|
-
dt_list.append(dt.strftime(
|
291
|
-
dt
|
258
|
+
while dt.strftime(time_format) <= time_e:
|
259
|
+
dt_list.append(dt.strftime(time_format))
|
260
|
+
dt += datetime.timedelta(**{delta_type: delta})
|
292
261
|
return dt_list
|
293
262
|
|
294
263
|
|
295
|
-
def
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
def set_query_dict_no_vertical(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh):
|
301
|
-
query_dict = {
|
302
|
-
'var': variable_info[var]['var_name'],
|
303
|
-
'north': lat_max,
|
304
|
-
'west': lon_min,
|
305
|
-
'east': lon_max,
|
306
|
-
'south': lat_min,
|
307
|
-
'horizStride': 1,
|
308
|
-
'time': transform_time(time_str_ymdh),
|
309
|
-
'addLatLon': 'true',
|
310
|
-
'accept': 'netcdf4',
|
311
|
-
}
|
312
|
-
return query_dict
|
313
|
-
|
314
|
-
|
315
|
-
def set_query_dict_depth_or_level(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh):
|
316
|
-
query_dict = {
|
317
|
-
'var': variable_info[var]['var_name'],
|
318
|
-
'north': lat_max,
|
319
|
-
'west': lon_min,
|
320
|
-
'east': lon_max,
|
321
|
-
'south': lat_min,
|
322
|
-
'horizStride': 1,
|
323
|
-
'time': transform_time(time_str_ymdh),
|
324
|
-
'vertCoord': 0,
|
325
|
-
'addLatLon': 'true',
|
326
|
-
'accept': 'netcdf4',
|
327
|
-
}
|
328
|
-
return query_dict
|
264
|
+
def transform_time(time_str):
|
265
|
+
# old_time = '2023080203'
|
266
|
+
# time_new = '2023-08-02T03%3A00%3A00Z'
|
267
|
+
time_new = f'{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z'
|
268
|
+
return time_new
|
329
269
|
|
330
270
|
|
331
|
-
def
|
271
|
+
def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, mode='single_depth', depth=None, level_num=None):
|
332
272
|
query_dict = {
|
333
273
|
'var': variable_info[var]['var_name'],
|
334
274
|
'north': lat_max,
|
@@ -337,64 +277,62 @@ def set_query_dict_full(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh):
|
|
337
277
|
'south': lat_min,
|
338
278
|
'horizStride': 1,
|
339
279
|
'time': transform_time(time_str_ymdh),
|
340
|
-
'
|
280
|
+
'vertCoord': None,
|
281
|
+
'vertStride': None,
|
341
282
|
'addLatLon': 'true',
|
342
283
|
'accept': 'netcdf4',
|
343
284
|
}
|
344
|
-
return query_dict
|
345
|
-
|
346
285
|
|
347
|
-
def
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
286
|
+
def get_nearest_level_index(depth):
|
287
|
+
level_depth = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 125.0, 150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0, 1250.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000]
|
288
|
+
return min(range(len(level_depth)), key=lambda i: abs(level_depth[i]-depth))
|
289
|
+
|
290
|
+
if var not in ['ssh', 'u_b', 'v_b', 'temp_b', 'salt_b'] and var in ['u', 'v', 'temp', 'salt']:
|
291
|
+
if mode == 'depth':
|
292
|
+
if depth < 0 or depth > 5000:
|
293
|
+
print('Please ensure the depth is in the range of 0-5000 m')
|
294
|
+
query_dict['vertCoord'] = get_nearest_level_index(depth) + 1
|
295
|
+
elif mode == 'level':
|
296
|
+
if level_num < 1 or level_num > 40:
|
297
|
+
print('Please ensure the level_num is in the range of 1-40')
|
298
|
+
query_dict['vertCoord'] = max(1, min(level_num, 40))
|
299
|
+
elif mode == 'full':
|
300
|
+
query_dict['vertStride'] = 1
|
301
|
+
else:
|
302
|
+
raise ValueError("Invalid mode. Choose from 'depth', 'level', or 'full'")
|
356
303
|
|
304
|
+
query_dict = {k: v for k, v in query_dict.items() if v is not None}
|
357
305
|
|
358
|
-
def get_query_dict_single_level(var, lon_min, lon_max, lat_min, lat_max, level_num, time_str_ymdh):
|
359
|
-
if var in ['ssh']:
|
360
|
-
query_dict = set_query_dict_no_vertical(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
361
|
-
else:
|
362
|
-
# level_num: 1-40
|
363
|
-
query_dict = set_query_dict_depth_or_level(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
364
|
-
if var in ['u', 'v', 'temp', 'salt']:
|
365
|
-
print('Please ensure the level_num is in the range of 1-40')
|
366
|
-
if level_num == 0:
|
367
|
-
level_num = 1
|
368
|
-
print('The level_num is set to 1')
|
369
|
-
if level_num > 40:
|
370
|
-
level_num = 40
|
371
|
-
print('The level_num is set to 40')
|
372
|
-
query_dict['vertCoord'] = level_num
|
373
306
|
return query_dict
|
374
307
|
|
375
308
|
|
376
|
-
def
|
377
|
-
|
378
|
-
|
379
|
-
else:
|
380
|
-
query_dict = set_query_dict_full(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
381
|
-
return query_dict
|
382
|
-
|
309
|
+
def check_time_in_dataset_and_version(time_input, time_end=None):
|
310
|
+
# 判断是处理单个时间点还是时间范围
|
311
|
+
is_single_time = time_end is None
|
383
312
|
|
384
|
-
|
385
|
-
if
|
386
|
-
|
313
|
+
# 如果是单个时间点,初始化时间范围
|
314
|
+
if is_single_time:
|
315
|
+
time_start = int(time_input)
|
316
|
+
time_end = time_start
|
317
|
+
time_input_str = str(time_input)
|
387
318
|
else:
|
388
|
-
|
389
|
-
|
319
|
+
time_start = int(time_input)
|
320
|
+
time_end = int(time_end)
|
321
|
+
time_input_str = f'{time_input}-{time_end}'
|
390
322
|
|
323
|
+
# 根据时间长度补全时间格式
|
324
|
+
if len(str(time_start)) == 8:
|
325
|
+
time_start = str(time_start) + '00'
|
326
|
+
if len(str(time_end)) == 8:
|
327
|
+
time_end = str(time_end) + '21'
|
328
|
+
time_start, time_end = int(time_start), int(time_end)
|
391
329
|
|
392
|
-
def ymdh_in_which_dataset_and_version(time_ymdh):
|
393
|
-
time_ymdh = int(time_ymdh)
|
394
330
|
d_list = []
|
395
331
|
v_list = []
|
396
332
|
trange_list = []
|
397
333
|
have_data = False
|
334
|
+
|
335
|
+
# 遍历数据集和版本
|
398
336
|
for dataset_name in data_info['hourly']['dataset'].keys():
|
399
337
|
for version_name in data_info['hourly']['dataset'][dataset_name]['version'].keys():
|
400
338
|
time_s, time_e = list(data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range'].values())
|
@@ -403,70 +341,84 @@ def ymdh_in_which_dataset_and_version(time_ymdh):
|
|
403
341
|
time_s = time_s + '00'
|
404
342
|
if len(time_e) == 8:
|
405
343
|
time_e = time_e + '21'
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
344
|
+
# 检查时间是否在数据集的时间范围内
|
345
|
+
if is_single_time:
|
346
|
+
if time_start >= int(time_s) and time_start <= int(time_e):
|
347
|
+
d_list.append(dataset_name)
|
348
|
+
v_list.append(version_name)
|
349
|
+
trange_list.append(f'{time_s}-{time_e}')
|
350
|
+
have_data = True
|
351
|
+
else:
|
352
|
+
if time_start >= int(time_s) and time_end <= int(time_e):
|
353
|
+
d_list.append(dataset_name)
|
354
|
+
v_list.append(version_name)
|
355
|
+
trange_list.append(f'{time_s}-{time_e}')
|
356
|
+
have_data = True
|
357
|
+
|
358
|
+
# 输出结果
|
359
|
+
print(f'[bold red]{time_input_str} is in the following dataset and version:')
|
412
360
|
if have_data:
|
413
361
|
for d, v, trange in zip(d_list, v_list, trange_list):
|
414
362
|
print(f'[bold blue]{d} {v} {trange}')
|
415
363
|
return True
|
416
364
|
else:
|
417
|
-
|
418
|
-
print(f'[bold red]{time_ymdh} is not in any dataset and version')
|
365
|
+
print(f'[bold red]{time_input_str} is not in any dataset and version')
|
419
366
|
return False
|
420
367
|
|
421
368
|
|
422
|
-
def
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
369
|
+
def ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time_input, time_end=None):
|
370
|
+
# 根据时间长度补全时间格式
|
371
|
+
if len(str(time_input)) == 8:
|
372
|
+
time_input = str(time_input) + '00'
|
373
|
+
time_start = int(time_input)
|
374
|
+
if time_end is not None:
|
375
|
+
if len(str(time_end)) == 8:
|
376
|
+
time_end = str(time_end) + '21'
|
377
|
+
time_end = int(time_end)
|
378
|
+
else:
|
379
|
+
time_end = time_start
|
380
|
+
|
381
|
+
# 检查指定的数据集和版本是否存在
|
382
|
+
if dataset_name not in data_info['hourly']['dataset']:
|
383
|
+
print(f'[bold red]Dataset {dataset_name} not found.')
|
384
|
+
return False
|
385
|
+
if version_name not in data_info['hourly']['dataset'][dataset_name]['version']:
|
386
|
+
print(f'[bold red]Version {version_name} not found in dataset {dataset_name}.')
|
387
|
+
return False
|
388
|
+
|
389
|
+
# 获取指定数据集和版本的时间范围
|
390
|
+
time_range = data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
|
391
|
+
time_s, time_e = list(time_range.values())
|
392
|
+
time_s, time_e = str(time_s), str(time_e)
|
393
|
+
if len(time_s) == 8:
|
394
|
+
time_s = time_s + '00'
|
395
|
+
if len(time_e) == 8:
|
396
|
+
time_e = time_e + '21'
|
397
|
+
time_s, time_e = int(time_s), int(time_e)
|
398
|
+
|
399
|
+
# 检查时间是否在指定数据集和版本的时间范围内
|
400
|
+
if time_start >= time_s and time_end <= time_e:
|
401
|
+
print(f'[bold blue]Time {time_input} to {time_end} is within dataset {dataset_name} and version {version_name}.')
|
445
402
|
return True
|
446
403
|
else:
|
447
|
-
|
448
|
-
print(f'[bold red]{time_ymd} is not in any dataset and version')
|
404
|
+
print(f'[bold red]Time {time_input} to {time_end} is not within dataset {dataset_name} and version {version_name}.')
|
449
405
|
return False
|
450
406
|
|
451
407
|
|
452
|
-
def direct_choose_dataset_and_version(
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
if len(time_s) == 8:
|
459
|
-
time_s = time_s + '00'
|
460
|
-
if len(time_e) == 8:
|
461
|
-
time_e = time_e + '21'
|
462
|
-
if time_ymdh >= int(time_s) and time_ymdh <= int(time_e):
|
463
|
-
print(f'[bold purple]dataset: {dataset_name}, version: {version_name} is chosen')
|
464
|
-
return dataset_name, version_name
|
465
|
-
return None, None
|
408
|
+
def direct_choose_dataset_and_version(time_input, time_end=None):
|
409
|
+
# 假设 data_info 是一个字典,包含了数据集和版本的信息
|
410
|
+
# 示例结构:data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
|
411
|
+
|
412
|
+
if len(str(time_input)) == 8:
|
413
|
+
time_input = str(time_input) + '00'
|
466
414
|
|
415
|
+
# 如果 time_end 是 None,则将 time_input 的值赋给它
|
416
|
+
if time_end is None:
|
417
|
+
time_end = time_input
|
418
|
+
|
419
|
+
# 处理开始和结束时间,确保它们是完整的 ymdh 格式
|
420
|
+
time_start, time_end = int(str(time_input)[:10]), int(str(time_end)[:10])
|
467
421
|
|
468
|
-
def direct_choose_dataset_and_version_whole_day(time_ymd):
|
469
|
-
time_ymd = int(str(time_ymd)[:8])
|
470
422
|
for dataset_name in data_info['hourly']['dataset'].keys():
|
471
423
|
for version_name in data_info['hourly']['dataset'][dataset_name]['version'].keys():
|
472
424
|
[time_s, time_e] = list(data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range'].values())
|
@@ -475,10 +427,16 @@ def direct_choose_dataset_and_version_whole_day(time_ymd):
|
|
475
427
|
time_s = time_s + '00'
|
476
428
|
if len(time_e) == 8:
|
477
429
|
time_e = time_e + '21'
|
478
|
-
|
430
|
+
time_s, time_e = int(time_s), int(time_e)
|
431
|
+
|
432
|
+
# 检查时间是否在数据集版本的时间范围内
|
433
|
+
if time_start >= time_s and time_end <= time_e:
|
479
434
|
print(f'[bold purple]dataset: {dataset_name}, version: {version_name} is chosen')
|
480
435
|
return dataset_name, version_name
|
481
436
|
|
437
|
+
# 如果没有找到匹配的数据集和版本,返回 None
|
438
|
+
return None, None
|
439
|
+
|
482
440
|
|
483
441
|
def get_base_url(dataset_name, version_name, var, year_str):
|
484
442
|
url_dict = data_info['hourly']['dataset'][dataset_name]['version'][version_name]['url']
|
@@ -543,8 +501,8 @@ def get_ua():
|
|
543
501
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
|
544
502
|
"Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
|
545
503
|
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
|
546
|
-
"
|
547
|
-
"
|
504
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
|
505
|
+
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
|
548
506
|
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
549
507
|
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
550
508
|
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
@@ -594,10 +552,10 @@ def get_ua():
|
|
594
552
|
"NOKIA5700/UCWEB7.0.2.37/28/999",
|
595
553
|
"Openwave/UCWEB7.0.2.37/28/999",
|
596
554
|
"Openwave/UCWEB7.0.2.37/28/999",
|
555
|
+
|
597
556
|
]
|
598
|
-
ua_index = random.randint(0, len(ua_list)-1)
|
599
|
-
ua = ua_list[ua_index]
|
600
557
|
# print(f'Using User-Agent: {ua}')
|
558
|
+
ua = random.choice(ua_list)
|
601
559
|
return ua
|
602
560
|
|
603
561
|
|
@@ -638,8 +596,7 @@ def dlownload_file(target_url, store_path, file_name, check=False):
|
|
638
596
|
print(f'\r正在重试第 {request_times} 次', end="")
|
639
597
|
# 尝试下载文件
|
640
598
|
try:
|
641
|
-
headers = {
|
642
|
-
'User-Agent': get_ua()}
|
599
|
+
headers = {'User-Agent': get_ua()}
|
643
600
|
response = s.get(target_url, headers=headers, timeout=5)
|
644
601
|
response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
|
645
602
|
|
@@ -675,53 +632,77 @@ def check_hour_is_valid(ymdh_str):
|
|
675
632
|
return False
|
676
633
|
|
677
634
|
|
678
|
-
def
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
635
|
+
def check_dataset_version(dataset_name, version_name, download_time, download_time_end=None):
|
636
|
+
if dataset_name is not None and version_name is not None:
|
637
|
+
just_ensure = ensure_time_in_specific_dataset_and_version(dataset_name, version_name, download_time, download_time_end)
|
638
|
+
if just_ensure:
|
639
|
+
return dataset_name, version_name
|
640
|
+
else:
|
641
|
+
return None, None
|
642
|
+
|
643
|
+
# 打印信息并处理数据集和版本名称
|
686
644
|
if dataset_name is None and version_name is None:
|
687
645
|
print('The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.')
|
688
646
|
print('If you wanna choose the dataset and version by yourself, please set the dataset_name and version_name together.')
|
689
|
-
dataset_name, version_name = direct_choose_dataset_and_version(download_time)
|
690
647
|
elif dataset_name is None and version_name is not None:
|
691
648
|
print('Please ensure the dataset_name is not None')
|
692
649
|
print('If you do not add the dataset_name, both the dataset and version will be chosen according to the download_time.')
|
693
|
-
dataset_name, version_name = direct_choose_dataset_and_version(download_time)
|
694
650
|
elif dataset_name is not None and version_name is None:
|
695
651
|
print('Please ensure the version_name is not None')
|
696
652
|
print('If you do not add the version_name, both the dataset and version will be chosen according to the download_time.')
|
697
|
-
dataset_name, version_name = direct_choose_dataset_and_version(download_time)
|
698
653
|
else:
|
699
654
|
print('The dataset_name and version_name are both set by yourself.')
|
700
655
|
|
701
|
-
|
656
|
+
# 确保下载时间是一个字符串
|
657
|
+
download_time_str = str(download_time)
|
658
|
+
|
659
|
+
if len(download_time_str) == 8:
|
660
|
+
download_time_str = download_time_str + '00'
|
661
|
+
|
662
|
+
# 检查小时是否有效(如果需要的话)
|
663
|
+
if download_time_end is None and not check_hour_is_valid(download_time_str):
|
664
|
+
print('Please ensure the hour is 00, 03, 06, 09, 12, 15, 18, 21')
|
665
|
+
raise ValueError('The hour is invalid')
|
666
|
+
|
667
|
+
# 根据是否检查整个天来设置时间范围
|
668
|
+
if download_time_end is not None:
|
669
|
+
if len(str(download_time_end)) == 8:
|
670
|
+
download_time_end = str(download_time_end) + '21'
|
671
|
+
have_data = check_time_in_dataset_and_version(download_time_str, download_time_end)
|
672
|
+
if have_data:
|
673
|
+
return direct_choose_dataset_and_version(download_time_str, download_time_end)
|
674
|
+
else:
|
675
|
+
have_data = check_time_in_dataset_and_version(download_time_str)
|
676
|
+
if have_data:
|
677
|
+
return direct_choose_dataset_and_version(download_time_str)
|
678
|
+
|
679
|
+
return None, None
|
702
680
|
|
703
681
|
|
704
682
|
def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time):
|
705
683
|
year_str = str(download_time)[:4]
|
706
684
|
if depth is not None and level_num is not None:
|
707
685
|
print('Please ensure the depth or level_num is None')
|
708
|
-
|
686
|
+
print('Progress will use the depth')
|
687
|
+
which_mode = 'depth'
|
688
|
+
elif depth is not None and level_num is None:
|
709
689
|
print(f'Data of single depth ({depth}m) will be downloaded...')
|
710
|
-
|
711
|
-
elif level_num is not None:
|
690
|
+
which_mode = 'depth'
|
691
|
+
elif level_num is not None and depth is None:
|
712
692
|
print(f'Data of single level ({level_num}) will be downloaded...')
|
713
|
-
|
693
|
+
which_mode = 'level'
|
714
694
|
else:
|
715
695
|
print('Full depth or full level data will be downloaded...')
|
716
|
-
|
696
|
+
which_mode = 'full'
|
697
|
+
query_dict = get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, which_mode, depth, level_num)
|
717
698
|
submit_url = get_submit_url(
|
718
699
|
dataset_name, version_name, var, year_str, query_dict)
|
719
700
|
return submit_url
|
720
701
|
|
721
702
|
|
722
|
-
def
|
703
|
+
def direct_download_single_hour(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time='2024083100', depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
|
723
704
|
download_time = str(download_time)
|
724
|
-
dataset_name, version_name =
|
705
|
+
dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time)
|
725
706
|
if dataset_name is None and version_name is None:
|
726
707
|
return
|
727
708
|
|
@@ -763,32 +744,9 @@ def direct_download_single_time(var, lon_min=0, lon_max=359.92, lat_min=-80, lat
|
|
763
744
|
dlownload_file(submit_url, store_path, file_name, check)
|
764
745
|
|
765
746
|
|
766
|
-
def check_dataset_version_whold_day(dataset_name, version_name, download_time):
|
767
|
-
download_time = str(download_time)
|
768
|
-
have_data = ymd_in_which_dataset_and_version(download_time)
|
769
|
-
if not have_data:
|
770
|
-
return None, None
|
771
|
-
if dataset_name is None and version_name is None:
|
772
|
-
print('The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.')
|
773
|
-
print('If you wanna choose the dataset and version by yourself, please set the dataset_name and version_name together.')
|
774
|
-
dataset_name, version_name = direct_choose_dataset_and_version_whole_day(download_time)
|
775
|
-
elif dataset_name is None and version_name is not None:
|
776
|
-
print('Please ensure the dataset_name is not None')
|
777
|
-
print('If you do not add the dataset_name, both the dataset and version will be chosen according to the download_time.')
|
778
|
-
dataset_name, version_name = direct_choose_dataset_and_version_whole_day(download_time)
|
779
|
-
elif dataset_name is not None and version_name is None:
|
780
|
-
print('Please ensure the version_name is not None')
|
781
|
-
print('If you do not add the version_name, both the dataset and version will be chosen according to the download_time.')
|
782
|
-
dataset_name, version_name = direct_choose_dataset_and_version_whole_day(download_time)
|
783
|
-
else:
|
784
|
-
print('The dataset_name and version_name are both set by yourself.')
|
785
|
-
|
786
|
-
return dataset_name, version_name
|
787
|
-
|
788
|
-
|
789
747
|
def direct_download_whole_day(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time='20240831', depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
|
790
748
|
download_time = str(download_time)[:8]+'00'
|
791
|
-
dataset_name, version_name =
|
749
|
+
dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time, str(download_time)[:8]+'21')
|
792
750
|
if dataset_name is None and version_name is None:
|
793
751
|
return
|
794
752
|
|
@@ -867,10 +825,10 @@ def download_task(var, time_str, lon_min, lon_max, lat_min, lat_max, depth, leve
|
|
867
825
|
if len(time_str) == 8:
|
868
826
|
direct_download_whole_day(var, lon_min, lon_max, lat_min, lat_max, time_str, depth, level, store_path, dataset_name, version_name, check)
|
869
827
|
else:
|
870
|
-
|
828
|
+
direct_download_single_hour(var, lon_min, lon_max, lat_min, lat_max, time_str, depth, level, store_path, dataset_name, version_name, check)
|
871
829
|
|
872
830
|
|
873
|
-
def download_single_hour(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False):
|
831
|
+
def download_single_hour(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False):
|
874
832
|
'''
|
875
833
|
Description:
|
876
834
|
Download the data of single time or a series of time
|
@@ -912,20 +870,23 @@ def download_single_hour(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
|
|
912
870
|
ymdh_time_s = str(time_s)
|
913
871
|
if len(ymdh_time_s) == 8:
|
914
872
|
ymdh_time_s += '00'
|
915
|
-
|
916
|
-
|
917
|
-
|
873
|
+
if time_e is None:
|
874
|
+
ymdh_time_e = ymdh_time_s[:]
|
875
|
+
else:
|
876
|
+
ymdh_time_e = str(time_e)
|
877
|
+
if len(ymdh_time_e) == 8:
|
878
|
+
ymdh_time_e += '21'
|
918
879
|
if ymdh_time_s == ymdh_time_e:
|
919
|
-
|
880
|
+
direct_download_single_hour(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, depth, level, store_path, dataset_name, version_name)
|
920
881
|
elif int(ymdh_time_s) < int(ymdh_time_e):
|
921
882
|
print('Downloading a series of files...')
|
922
|
-
time_list =
|
883
|
+
time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3, 'hour')
|
923
884
|
with Progress() as progress:
|
924
885
|
task = progress.add_task("[cyan]Downloading...", total=len(time_list))
|
925
886
|
if num_workers is None or num_workers <= 1:
|
926
887
|
# 串行方式
|
927
888
|
for time_str in time_list:
|
928
|
-
|
889
|
+
direct_download_single_hour(var, lon_min, lon_max, lat_min, lat_max, time_str, depth, level, store_path, dataset_name, version_name, check)
|
929
890
|
progress.update(task, advance=1)
|
930
891
|
else:
|
931
892
|
# 并行方式
|
@@ -941,7 +902,7 @@ def download_single_hour(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
|
|
941
902
|
print('Please ensure the time_s is less than the time_e')
|
942
903
|
|
943
904
|
|
944
|
-
def download_whole_day(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False):
|
905
|
+
def download_whole_day(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False):
|
945
906
|
'''
|
946
907
|
Description:
|
947
908
|
Download the data of single time or a series of time
|
@@ -980,13 +941,17 @@ def download_whole_day(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-
|
|
980
941
|
print('The range of lon_min, lon_max is 0~359.92')
|
981
942
|
print('The range of lat_min, lat_max is -80~90')
|
982
943
|
raise ValueError('The lon or lat is invalid')
|
983
|
-
time_s
|
944
|
+
time_s = str(time_s)[:8]
|
945
|
+
if time_e is None:
|
946
|
+
time_e = time_s[:]
|
947
|
+
else:
|
948
|
+
time_e = str(time_e)[:8]
|
984
949
|
|
985
950
|
if time_s == time_e:
|
986
951
|
direct_download_whole_day(var, lon_min, lon_max, lat_min, lat_max, time_s, depth, level, store_path, dataset_name, version_name)
|
987
952
|
elif int(time_s) < int(time_e):
|
988
953
|
print('Downloading a series of files...')
|
989
|
-
time_list =
|
954
|
+
time_list = get_time_list(time_s, time_e, 1, 'day')
|
990
955
|
with Progress() as progress:
|
991
956
|
task = progress.add_task("[cyan]Downloading...", total=len(time_list))
|
992
957
|
if num_workers is None or num_workers <= 1:
|
@@ -1008,73 +973,7 @@ def download_whole_day(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-
|
|
1008
973
|
print('Please ensure the time_s is less than the time_e')
|
1009
974
|
|
1010
975
|
|
1011
|
-
def
|
1012
|
-
'''
|
1013
|
-
Description:
|
1014
|
-
Download the data of single time or a series of time
|
1015
|
-
|
1016
|
-
Parameters:
|
1017
|
-
var: str, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
|
1018
|
-
time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
|
1019
|
-
time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
|
1020
|
-
lon_min: float, the minimum longitude, default is 0
|
1021
|
-
lon_max: float, the maximum longitude, default is 359.92
|
1022
|
-
lat_min: float, the minimum latitude, default is -80
|
1023
|
-
lat_max: float, the maximum latitude, default is 90
|
1024
|
-
depth: float, the depth, default is None
|
1025
|
-
level: int, the level number, default is None
|
1026
|
-
store_path: str, the path to store the data, default is None
|
1027
|
-
dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08'
|
1028
|
-
version_name: str, the version name, default is None, example: '53.X', '56.3'
|
1029
|
-
num_workers: int, the number of workers, default is None
|
1030
|
-
|
1031
|
-
Returns:
|
1032
|
-
None
|
1033
|
-
'''
|
1034
|
-
if isinstance(var, list):
|
1035
|
-
var = var[0]
|
1036
|
-
var = convert_full_name_to_short_name(var)
|
1037
|
-
if var is False:
|
1038
|
-
raise ValueError('The var is invalid')
|
1039
|
-
if lon_min < 0 or lon_min > 359.92 or lon_max < 0 or lon_max > 359.92 or lat_min < -80 or lat_min > 90 or lat_max < -80 or lat_max > 90:
|
1040
|
-
print('Please ensure the lon_min, lon_max, lat_min, lat_max are in the range')
|
1041
|
-
print('The range of lon_min, lon_max is 0~359.92')
|
1042
|
-
print('The range of lat_min, lat_max is -80~90')
|
1043
|
-
raise ValueError('The lon or lat is invalid')
|
1044
|
-
ymdh_time_s = str(time_s)
|
1045
|
-
if len(ymdh_time_s) == 8:
|
1046
|
-
ymdh_time_s += '00'
|
1047
|
-
ymdh_time_e = str(time_e)
|
1048
|
-
if len(ymdh_time_e) == 8:
|
1049
|
-
ymdh_time_e += '21'
|
1050
|
-
if ymdh_time_s == ymdh_time_e:
|
1051
|
-
direct_download_single_time(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, depth, level, store_path, dataset_name, version_name)
|
1052
|
-
elif int(ymdh_time_s) < int(ymdh_time_e):
|
1053
|
-
print('Downloading a series of files...')
|
1054
|
-
time_list = get_hour_list(ymdh_time_s, ymdh_time_e, 3)
|
1055
|
-
with Progress() as progress:
|
1056
|
-
task = progress.add_task("[cyan]Downloading...", total=len(time_list))
|
1057
|
-
if num_workers is None or num_workers <= 1:
|
1058
|
-
# 串行方式
|
1059
|
-
for time_str in time_list:
|
1060
|
-
direct_download_single_time(var, lon_min, lon_max, lat_min, lat_max, time_str, depth, level, store_path, dataset_name, version_name, check)
|
1061
|
-
progress.update(task, advance=1)
|
1062
|
-
else:
|
1063
|
-
# 并行方式
|
1064
|
-
if num_workers > 10:
|
1065
|
-
print('The number of workers is too large!')
|
1066
|
-
print('In order to avoid the server being blocked, the number of workers is set to 10')
|
1067
|
-
num_workers = 10
|
1068
|
-
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
1069
|
-
futures = [executor.submit(download_task, var, time_str, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
|
1070
|
-
for future in futures:
|
1071
|
-
future.add_done_callback(
|
1072
|
-
lambda _: progress.update(task, advance=1))
|
1073
|
-
else:
|
1074
|
-
print('Please ensure the ymdh_time_s is less than the ymdh_time_e')
|
1075
|
-
|
1076
|
-
|
1077
|
-
def download(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, resolution='hour'):
|
976
|
+
def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, resolution='hour'):
|
1078
977
|
'''
|
1079
978
|
Description:
|
1080
979
|
Download the data of single time or a series of time
|
@@ -1112,7 +1011,7 @@ def how_to_use():
|
|
1112
1011
|
print('''
|
1113
1012
|
# 1. Choose the dataset and version according to the time:
|
1114
1013
|
# 1.1 Use function to query
|
1115
|
-
You can use the function
|
1014
|
+
You can use the function check_time_in_dataset_and_version(time_input=20241101) to find the dataset and version according to the time.
|
1116
1015
|
Then, you can see the dataset and version in the output.
|
1117
1016
|
# 1.2 Draw a picture to see
|
1118
1017
|
You can draw a picture to see the time range of each dataset and version.
|
@@ -1159,8 +1058,8 @@ def how_to_use():
|
|
1159
1058
|
|
1160
1059
|
if __name__ == '__main__':
|
1161
1060
|
# help(hycom3h.download)
|
1162
|
-
time_s, time_e = '
|
1163
|
-
merge_name = '
|
1061
|
+
time_s, time_e = '2018070100', '2019123121'
|
1062
|
+
merge_name = '2018_2024'
|
1164
1063
|
root_path = r'G:\Data\HYCOM\3hourly'
|
1165
1064
|
location_dict = {'west': 105, 'east': 130, 'south': 15, 'north': 45}
|
1166
1065
|
download_dict = {
|
@@ -1184,7 +1083,7 @@ if __name__ == '__main__':
|
|
1184
1083
|
# if you wanna download all depth or level, set both False
|
1185
1084
|
depth = None # or 0-4000 meters
|
1186
1085
|
level = None # or 1-40 levels
|
1187
|
-
num_workers =
|
1086
|
+
num_workers = 1
|
1188
1087
|
|
1189
1088
|
check = True
|
1190
1089
|
|
@@ -1200,7 +1099,7 @@ if __name__ == '__main__':
|
|
1200
1099
|
download(var=var_list, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict['west'], lon_max=location_dict['east'], lat_min=location_dict['south'], lat_max=location_dict['north'], num_workers=num_workers, check=check, depth=depth, level=level)
|
1201
1100
|
|
1202
1101
|
""" if combine_switch or copy_switch:
|
1203
|
-
time_list =
|
1102
|
+
time_list = get_time_list(time_s, time_e, 3, 'hour')
|
1204
1103
|
for var_name in var_list:
|
1205
1104
|
file_list = []
|
1206
1105
|
if single_var:
|