oafuncs 0.0.77__py2.py3-none-any.whl → 0.0.79__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,50 +1,53 @@
1
1
  #!/usr/bin/env python
2
2
  # coding=utf-8
3
- '''
3
+ """
4
4
  Author: Liu Kun && 16031215@qq.com
5
5
  Date: 2024-11-01 10:31:09
6
6
  LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2024-12-01 19:10:10
7
+ LastEditTime: 2024-12-08 10:20:45
8
8
  FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
9
9
  Description:
10
10
  EditPlatform: vscode
11
11
  ComputerInfo: XPS 15 9510
12
12
  SystemInfo: Windows 11
13
13
  Python Version: 3.12
14
- '''
14
+ """
15
+
15
16
  import datetime
16
17
  import os
17
18
  import random
18
19
  import time
19
20
  import warnings
20
- from concurrent.futures import ThreadPoolExecutor
21
+ from concurrent.futures import ThreadPoolExecutor, as_completed
21
22
  from pathlib import Path
23
+ from threading import Lock
22
24
 
23
25
  import matplotlib.pyplot as plt
24
26
  import numpy as np
25
27
  import pandas as pd
26
28
  import requests
29
+ from bs4 import BeautifulSoup
27
30
  from rich import print
28
31
  from rich.progress import Progress
29
32
 
30
33
  warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
31
34
 
32
- __all__ = ['draw_time_range', 'download', 'how_to_use', 'get_time_list', 'get_ua']
35
+ __all__ = ["draw_time_range", "download", "how_to_use", "get_time_list", "get_ua"]
33
36
 
34
37
  # time resolution
35
- data_info = {'yearly': {}, 'monthly': {}, 'daily': {}, 'hourly': {}}
38
+ data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
36
39
 
37
40
  # hourly data
38
41
  # dataset: GLBv0.08, GLBu0.08, GLBy0.08
39
- data_info['hourly']['dataset'] = {'GLBv0.08': {}, 'GLBu0.08': {}, 'GLBy0.08': {}}
42
+ data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}}
40
43
 
41
44
  # version
42
45
  # version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
43
- data_info['hourly']['dataset']['GLBv0.08']['version'] = {'53.X': {}, '56.3': {}, '57.2': {}, '92.8': {}, '57.7': {}, '92.9': {}, '93.0': {}}
46
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
44
47
  # version of GLBu0.08: 93.0
45
- data_info['hourly']['dataset']['GLBu0.08']['version'] = {'93.0': {}}
48
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
46
49
  # version of GLBy0.08: 93.0
47
- data_info['hourly']['dataset']['GLBy0.08']['version'] = {'93.0': {}}
50
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
48
51
 
49
52
  # info details
50
53
  # time range
@@ -52,29 +55,29 @@ data_info['hourly']['dataset']['GLBy0.08']['version'] = {'93.0': {}}
52
55
  # 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
53
56
  # 目前只纠正了GLBv0.08 93.0的时间范围,具体到小时了
54
57
  # 其他数据集的时刻暂时默认为00起,21止
55
- data_info['hourly']['dataset']['GLBv0.08']['version']['53.X']['time_range'] = {'time_start': '19940101', 'time_end': '20151231'}
56
- data_info['hourly']['dataset']['GLBv0.08']['version']['56.3']['time_range'] = {'time_start': '20140701', 'time_end': '20160430'}
57
- data_info['hourly']['dataset']['GLBv0.08']['version']['57.2']['time_range'] = {'time_start': '20160501', 'time_end': '20170131'}
58
- data_info['hourly']['dataset']['GLBv0.08']['version']['92.8']['time_range'] = {'time_start': '20170201', 'time_end': '20170531'}
59
- data_info['hourly']['dataset']['GLBv0.08']['version']['57.7']['time_range'] = {'time_start': '20170601', 'time_end': '20170930'}
60
- data_info['hourly']['dataset']['GLBv0.08']['version']['92.9']['time_range'] = {'time_start': '20171001', 'time_end': '20171231'}
61
- data_info['hourly']['dataset']['GLBv0.08']['version']['93.0']['time_range'] = {'time_start': '2018010112', 'time_end': '2020021909'}
58
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "19940101", "time_end": "20151231"}
59
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "20140701", "time_end": "20160430"}
60
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "20160501", "time_end": "20170131"}
61
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "20170201", "time_end": "20170531"}
62
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "20170601", "time_end": "20170930"}
63
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "20171001", "time_end": "20171231"}
64
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
62
65
  # GLBu0.08
63
- data_info['hourly']['dataset']['GLBu0.08']['version']['93.0']['time_range'] = {'time_start': '20180919', 'time_end': '20181208'}
66
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "20180919", "time_end": "20181208"}
64
67
  # GLBy0.08
65
- data_info['hourly']['dataset']['GLBy0.08']['version']['93.0']['time_range'] = {'time_start': '20181204', 'time_end': '20300904'}
68
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "20181204", "time_end": "20300904"}
66
69
 
67
70
  # variable
68
71
  variable_info = {
69
- 'u': {'var_name': 'water_u', 'standard_name': 'eastward_sea_water_velocity'},
70
- 'v': {'var_name': 'water_v', 'standard_name': 'northward_sea_water_velocity'},
71
- 'temp': {'var_name': 'water_temp', 'standard_name': 'sea_water_potential_temperature'},
72
- 'salt': {'var_name': 'salinity', 'standard_name': 'sea_water_salinity'},
73
- 'ssh': {'var_name': 'surf_el', 'standard_name': 'sea_surface_elevation'},
74
- 'u_b': {'var_name': 'water_u_bottom', 'standard_name': 'eastward_sea_water_velocity_at_sea_floor'},
75
- 'v_b': {'var_name': 'water_v_bottom', 'standard_name': 'northward_sea_water_velocity_at_sea_floor'},
76
- 'temp_b': {'var_name': 'water_temp_bottom', 'standard_name': 'sea_water_potential_temperature_at_sea_floor'},
77
- 'salt_b': {'var_name': 'salinity_bottom', 'standard_name': 'sea_water_salinity_at_sea_floor'},
72
+ "u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
73
+ "v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
74
+ "temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
75
+ "salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
76
+ "ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
77
+ "u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
78
+ "v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
79
+ "temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
80
+ "salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
78
81
  }
79
82
 
80
83
  # classification method
@@ -82,15 +85,15 @@ variable_info = {
82
85
  # same_path: the data of different years is stored in the same file
83
86
  # var_different: the data of different variables is stored in different files
84
87
  # var_year_different: the data of different variables and years is stored in different files
85
- data_info['hourly']['dataset']['GLBv0.08']['version']['53.X']['classification'] = 'year_different'
86
- data_info['hourly']['dataset']['GLBv0.08']['version']['56.3']['classification'] = 'same_path'
87
- data_info['hourly']['dataset']['GLBv0.08']['version']['57.2']['classification'] = 'same_path'
88
- data_info['hourly']['dataset']['GLBv0.08']['version']['92.8']['classification'] = 'var_different'
89
- data_info['hourly']['dataset']['GLBv0.08']['version']['57.7']['classification'] = 'same_path'
90
- data_info['hourly']['dataset']['GLBv0.08']['version']['92.9']['classification'] = 'var_different'
91
- data_info['hourly']['dataset']['GLBv0.08']['version']['93.0']['classification'] = 'var_different'
92
- data_info['hourly']['dataset']['GLBu0.08']['version']['93.0']['classification'] = 'var_different'
93
- data_info['hourly']['dataset']['GLBy0.08']['version']['93.0']['classification'] = 'var_year_different'
88
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
89
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
90
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
91
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
92
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
93
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
94
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
95
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
96
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
94
97
 
95
98
  # download info
96
99
  # base url
@@ -98,64 +101,61 @@ data_info['hourly']['dataset']['GLBy0.08']['version']['93.0']['classification']
98
101
  url_53x = {}
99
102
  for y_53x in range(1994, 2016):
100
103
  # r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
101
- url_53x[str(y_53x)] = rf'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?'
102
- data_info['hourly']['dataset']['GLBv0.08']['version']['53.X']['url'] = url_53x
104
+ url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
105
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
103
106
  # GLBv0.08 56.3
104
- data_info['hourly']['dataset']['GLBv0.08']['version']['56.3']['url'] = r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?'
107
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
105
108
  # GLBv0.08 57.2
106
- data_info['hourly']['dataset']['GLBv0.08']['version']['57.2']['url'] = r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?'
109
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
107
110
  # GLBv0.08 92.8
108
111
  url_928 = {
109
- 'uv3z': r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?',
110
- 'ts3z': r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?',
111
- 'ssh': r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?',
112
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
113
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
114
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
112
115
  }
113
- data_info['hourly']['dataset']['GLBv0.08']['version']['92.8']['url'] = url_928
116
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
114
117
  # GLBv0.08 57.7
115
- data_info['hourly']['dataset']['GLBv0.08']['version']['57.7']['url'] = r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?'
118
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
116
119
  # GLBv0.08 92.9
117
120
  url_929 = {
118
- 'uv3z': r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?',
119
- 'ts3z': r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?',
120
- 'ssh': r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?',
121
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
122
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
123
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
121
124
  }
122
- data_info['hourly']['dataset']['GLBv0.08']['version']['92.9']['url'] = url_929
125
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
123
126
  # GLBv0.08 93.0
124
127
  url_930_v = {
125
- 'uv3z': r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?',
126
- 'ts3z': r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?',
127
- 'ssh': r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?',
128
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
129
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
130
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
128
131
  }
129
- data_info['hourly']['dataset']['GLBv0.08']['version']['93.0']['url'] = url_930_v
132
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
130
133
  # GLBu0.08 93.0
131
134
  url_930_u = {
132
- 'uv3z': r'https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?',
133
- 'ts3z': r'https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?',
134
- 'ssh': r'https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?',
135
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
136
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
137
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
135
138
  }
136
- data_info['hourly']['dataset']['GLBu0.08']['version']['93.0']['url'] = url_930_u
139
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
137
140
  # GLBy0.08 93.0
138
141
  uv3z_930_y = {}
139
142
  ts3z_930_y = {}
140
143
  ssh_930_y = {}
141
144
  for y_930_y in range(2018, 2025):
142
- uv3z_930_y[str(
143
- y_930_y)] = rf'https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?'
144
- ts3z_930_y[str(
145
- y_930_y)] = rf'https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?'
146
- ssh_930_y[str(
147
- y_930_y)] = rf'https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?'
145
+ uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
146
+ ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
147
+ ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
148
148
  url_930_y = {
149
- 'uv3z': uv3z_930_y,
150
- 'ts3z': ts3z_930_y,
151
- 'ssh': ssh_930_y,
149
+ "uv3z": uv3z_930_y,
150
+ "ts3z": ts3z_930_y,
151
+ "ssh": ssh_930_y,
152
152
  }
153
- data_info['hourly']['dataset']['GLBy0.08']['version']['93.0']['url'] = url_930_y
153
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
154
154
 
155
155
  var_group = {
156
- 'uv3z': ['u', 'v', 'u_b', 'v_b'],
157
- 'ts3z': ['temp', 'salt', 'temp_b', 'salt_b'],
158
- 'ssh': ['ssh'],
156
+ "uv3z": ["u", "v", "u_b", "v_b"],
157
+ "ts3z": ["temp", "salt", "temp_b", "salt_b"],
158
+ "ssh": ["ssh"],
159
159
  }
160
160
 
161
161
 
@@ -164,21 +164,23 @@ def draw_time_range(pic_save_folder=None):
164
164
  os.makedirs(pic_save_folder, exist_ok=True)
165
165
  # Converting the data into a format suitable for plotting
166
166
  data = []
167
- for dataset, versions in data_info['hourly']['dataset'].items():
168
- for version, time_range in versions['version'].items():
169
- t_s = time_range['time_range']['time_start']
170
- t_e = time_range['time_range']['time_end']
167
+ for dataset, versions in data_info["hourly"]["dataset"].items():
168
+ for version, time_range in versions["version"].items():
169
+ t_s = time_range["time_range"]["time_start"]
170
+ t_e = time_range["time_range"]["time_end"]
171
171
  if len(t_s) == 8:
172
- t_s = t_s + '00'
172
+ t_s = t_s + "00"
173
173
  if len(t_e) == 8:
174
- t_e = t_e + '21'
175
- t_s, t_e = t_s + '0000', t_e + '0000'
176
- data.append({
177
- 'dataset': dataset,
178
- 'version': version,
179
- 'start_date': pd.to_datetime(t_s),
180
- 'end_date': pd.to_datetime(t_e),
181
- })
174
+ t_e = t_e + "21"
175
+ t_s, t_e = t_s + "0000", t_e + "0000"
176
+ data.append(
177
+ {
178
+ "dataset": dataset,
179
+ "version": version,
180
+ "start_date": pd.to_datetime(t_s),
181
+ "end_date": pd.to_datetime(t_e),
182
+ }
183
+ )
182
184
 
183
185
  # Creating a DataFrame
184
186
  df = pd.DataFrame(data)
@@ -187,7 +189,7 @@ def draw_time_range(pic_save_folder=None):
187
189
  plt.figure(figsize=(12, 6))
188
190
 
189
191
  # Combined labels for datasets and versions
190
- combined_labels = [f"{dataset}_{version}" for dataset, version in zip(df['dataset'], df['version'])]
192
+ combined_labels = [f"{dataset}_{version}" for dataset, version in zip(df["dataset"], df["version"])]
191
193
 
192
194
  colors = plt.cm.viridis(np.linspace(0, 1, len(combined_labels)))
193
195
 
@@ -197,40 +199,40 @@ def draw_time_range(pic_save_folder=None):
197
199
  # Plotting each time range
198
200
  k = 1
199
201
  for _, row in df.iterrows():
200
- plt.plot([row['start_date'], row['end_date']], [k, k], color=label_colors[f"{row['dataset']}_{row['version']}"], linewidth=6)
202
+ plt.plot([row["start_date"], row["end_date"]], [k, k], color=label_colors[f"{row['dataset']}_{row['version']}"], linewidth=6)
201
203
  # plt.text(row['end_date'], k,
202
204
  # f"{row['version']}", ha='right', color='black')
203
- ymdh_s = row['start_date'].strftime('%Y-%m-%d %H')
204
- ymdh_e = row['end_date'].strftime('%Y-%m-%d %H')
205
+ ymdh_s = row["start_date"].strftime("%Y-%m-%d %H")
206
+ ymdh_e = row["end_date"].strftime("%Y-%m-%d %H")
205
207
  if k == 1 or k == len(combined_labels):
206
- plt.text(row['start_date'], k+0.125, f"{ymdh_s}", ha='left', color='black')
207
- plt.text(row['end_date'], k+0.125, f"{ymdh_e}", ha='right', color='black')
208
+ plt.text(row["start_date"], k + 0.125, f"{ymdh_s}", ha="left", color="black")
209
+ plt.text(row["end_date"], k + 0.125, f"{ymdh_e}", ha="right", color="black")
208
210
  else:
209
- plt.text(row['start_date'], k+0.125, f"{ymdh_s}", ha='right', color='black')
210
- plt.text(row['end_date'], k+0.125, f"{ymdh_e}", ha='left', color='black')
211
+ plt.text(row["start_date"], k + 0.125, f"{ymdh_s}", ha="right", color="black")
212
+ plt.text(row["end_date"], k + 0.125, f"{ymdh_e}", ha="left", color="black")
211
213
  k += 1
212
214
 
213
215
  # Setting the y-axis labels
214
- plt.yticks(range(1, len(combined_labels)+1), combined_labels)
215
- plt.xlabel('Time')
216
- plt.ylabel('Dataset - Version')
217
- plt.title('Time Range of Different Versions of Datasets')
216
+ plt.yticks(range(1, len(combined_labels) + 1), combined_labels)
217
+ plt.xlabel("Time")
218
+ plt.ylabel("Dataset - Version")
219
+ plt.title("Time Range of Different Versions of Datasets")
218
220
  plt.xticks(rotation=45)
219
221
  plt.grid(True)
220
222
  plt.tight_layout()
221
223
  if pic_save_folder:
222
- plt.savefig(Path(pic_save_folder) / 'HYCOM_time_range.png')
223
- print(f'[bold green]HYCOM_time_range.png has been saved in {pic_save_folder}')
224
+ plt.savefig(Path(pic_save_folder) / "HYCOM_time_range.png")
225
+ print(f"[bold green]HYCOM_time_range.png has been saved in {pic_save_folder}")
224
226
  else:
225
- plt.savefig('HYCOM_time_range.png')
226
- print('[bold green]HYCOM_time_range.png has been saved in the current folder')
227
- print(f'Curren folder: {os.getcwd()}')
227
+ plt.savefig("HYCOM_time_range.png")
228
+ print("[bold green]HYCOM_time_range.png has been saved in the current folder")
229
+ print(f"Curren folder: {os.getcwd()}")
228
230
  # plt.show()
229
231
  plt.close()
230
232
 
231
233
 
232
- def get_time_list(time_s, time_e, delta, interval_type='hour'):
233
- '''
234
+ def get_time_list(time_s, time_e, delta, interval_type="hour"):
235
+ """
234
236
  Description: get a list of time strings from time_s to time_e with a specified interval
235
237
  Args:
236
238
  time_s: start time string, e.g. '2023080203' for hours or '20230802' for days
@@ -239,14 +241,14 @@ def get_time_list(time_s, time_e, delta, interval_type='hour'):
239
241
  interval_type: 'hour' for hour interval, 'day' for day interval
240
242
  Returns:
241
243
  dt_list: a list of time strings
242
- '''
244
+ """
243
245
  time_s, time_e = str(time_s), str(time_e)
244
- if interval_type == 'hour':
245
- time_format = '%Y%m%d%H'
246
- delta_type = 'hours'
247
- elif interval_type == 'day':
248
- time_format = '%Y%m%d'
249
- delta_type = 'days'
246
+ if interval_type == "hour":
247
+ time_format = "%Y%m%d%H"
248
+ delta_type = "hours"
249
+ elif interval_type == "day":
250
+ time_format = "%Y%m%d"
251
+ delta_type = "days"
250
252
  # Ensure time strings are in the correct format for days
251
253
  time_s = time_s[:8]
252
254
  time_e = time_e[:8]
@@ -264,50 +266,50 @@ def get_time_list(time_s, time_e, delta, interval_type='hour'):
264
266
  def transform_time(time_str):
265
267
  # old_time = '2023080203'
266
268
  # time_new = '2023-08-02T03%3A00%3A00Z'
267
- time_new = f'{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z'
269
+ time_new = f"{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z"
268
270
  return time_new
269
271
 
270
272
 
271
- def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_str_end=None, mode='single_depth', depth=None, level_num=None):
273
+ def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_str_end=None, mode="single_depth", depth=None, level_num=None):
272
274
  query_dict = {
273
- 'var': variable_info[var]['var_name'],
274
- 'north': lat_max,
275
- 'west': lon_min,
276
- 'east': lon_max,
277
- 'south': lat_min,
278
- 'horizStride': 1,
279
- 'time': None,
280
- 'time_start': None,
281
- 'time_end': None,
282
- 'timeStride': None,
283
- 'vertCoord': None,
284
- 'vertStride': None,
285
- 'addLatLon': 'true',
286
- 'accept': 'netcdf4',
275
+ "var": variable_info[var]["var_name"],
276
+ "north": lat_max,
277
+ "west": lon_min,
278
+ "east": lon_max,
279
+ "south": lat_min,
280
+ "horizStride": 1,
281
+ "time": None,
282
+ "time_start": None,
283
+ "time_end": None,
284
+ "timeStride": None,
285
+ "vertCoord": None,
286
+ "vertStride": None,
287
+ "addLatLon": "true",
288
+ "accept": "netcdf4",
287
289
  }
288
290
 
289
291
  if time_str_end is not None:
290
- query_dict['time_start'] = transform_time(time_str_ymdh)
291
- query_dict['time_end'] = transform_time(time_str_end)
292
- query_dict['timeStride'] = 1
292
+ query_dict["time_start"] = transform_time(time_str_ymdh)
293
+ query_dict["time_end"] = transform_time(time_str_end)
294
+ query_dict["timeStride"] = 1
293
295
  else:
294
- query_dict['time'] = transform_time(time_str_ymdh)
296
+ query_dict["time"] = transform_time(time_str_ymdh)
295
297
 
296
298
  def get_nearest_level_index(depth):
297
299
  level_depth = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 125.0, 150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0, 1250.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000]
298
- return min(range(len(level_depth)), key=lambda i: abs(level_depth[i]-depth))
300
+ return min(range(len(level_depth)), key=lambda i: abs(level_depth[i] - depth))
299
301
 
300
- if var not in ['ssh', 'u_b', 'v_b', 'temp_b', 'salt_b'] and var in ['u', 'v', 'temp', 'salt']:
301
- if mode == 'depth':
302
+ if var not in ["ssh", "u_b", "v_b", "temp_b", "salt_b"] and var in ["u", "v", "temp", "salt"]:
303
+ if mode == "depth":
302
304
  if depth < 0 or depth > 5000:
303
- print('Please ensure the depth is in the range of 0-5000 m')
304
- query_dict['vertCoord'] = get_nearest_level_index(depth) + 1
305
- elif mode == 'level':
305
+ print("Please ensure the depth is in the range of 0-5000 m")
306
+ query_dict["vertCoord"] = get_nearest_level_index(depth) + 1
307
+ elif mode == "level":
306
308
  if level_num < 1 or level_num > 40:
307
- print('Please ensure the level_num is in the range of 1-40')
308
- query_dict['vertCoord'] = max(1, min(level_num, 40))
309
- elif mode == 'full':
310
- query_dict['vertStride'] = 1
309
+ print("Please ensure the level_num is in the range of 1-40")
310
+ query_dict["vertCoord"] = max(1, min(level_num, 40))
311
+ elif mode == "full":
312
+ query_dict["vertStride"] = 1
311
313
  else:
312
314
  raise ValueError("Invalid mode. Choose from 'depth', 'level', or 'full'")
313
315
 
@@ -328,13 +330,13 @@ def check_time_in_dataset_and_version(time_input, time_end=None):
328
330
  else:
329
331
  time_start = int(time_input)
330
332
  time_end = int(time_end)
331
- time_input_str = f'{time_input}-{time_end}'
333
+ time_input_str = f"{time_input}-{time_end}"
332
334
 
333
335
  # 根据时间长度补全时间格式
334
336
  if len(str(time_start)) == 8:
335
- time_start = str(time_start) + '00'
337
+ time_start = str(time_start) + "00"
336
338
  if len(str(time_end)) == 8:
337
- time_end = str(time_end) + '21'
339
+ time_end = str(time_end) + "21"
338
340
  time_start, time_end = int(time_start), int(time_end)
339
341
 
340
342
  d_list = []
@@ -343,75 +345,75 @@ def check_time_in_dataset_and_version(time_input, time_end=None):
343
345
  have_data = False
344
346
 
345
347
  # 遍历数据集和版本
346
- for dataset_name in data_info['hourly']['dataset'].keys():
347
- for version_name in data_info['hourly']['dataset'][dataset_name]['version'].keys():
348
- time_s, time_e = list(data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range'].values())
348
+ for dataset_name in data_info["hourly"]["dataset"].keys():
349
+ for version_name in data_info["hourly"]["dataset"][dataset_name]["version"].keys():
350
+ time_s, time_e = list(data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"].values())
349
351
  time_s, time_e = str(time_s), str(time_e)
350
352
  if len(time_s) == 8:
351
- time_s = time_s + '00'
353
+ time_s = time_s + "00"
352
354
  if len(time_e) == 8:
353
- time_e = time_e + '21'
355
+ time_e = time_e + "21"
354
356
  # 检查时间是否在数据集的时间范围内
355
357
  if is_single_time:
356
358
  if time_start >= int(time_s) and time_start <= int(time_e):
357
359
  d_list.append(dataset_name)
358
360
  v_list.append(version_name)
359
- trange_list.append(f'{time_s}-{time_e}')
361
+ trange_list.append(f"{time_s}-{time_e}")
360
362
  have_data = True
361
363
  else:
362
364
  if time_start >= int(time_s) and time_end <= int(time_e):
363
365
  d_list.append(dataset_name)
364
366
  v_list.append(version_name)
365
- trange_list.append(f'{time_s}-{time_e}')
367
+ trange_list.append(f"{time_s}-{time_e}")
366
368
  have_data = True
367
369
 
368
370
  # 输出结果
369
- print(f'[bold red]{time_input_str} is in the following dataset and version:')
371
+ print(f"[bold red]{time_input_str} is in the following dataset and version:")
370
372
  if have_data:
371
373
  for d, v, trange in zip(d_list, v_list, trange_list):
372
- print(f'[bold blue]{d} {v} {trange}')
374
+ print(f"[bold blue]{d} {v} {trange}")
373
375
  return True
374
376
  else:
375
- print(f'[bold red]{time_input_str} is not in any dataset and version')
377
+ print(f"[bold red]{time_input_str} is not in any dataset and version")
376
378
  return False
377
379
 
378
380
 
379
381
  def ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time_input, time_end=None):
380
382
  # 根据时间长度补全时间格式
381
383
  if len(str(time_input)) == 8:
382
- time_input = str(time_input) + '00'
384
+ time_input = str(time_input) + "00"
383
385
  time_start = int(time_input)
384
386
  if time_end is not None:
385
387
  if len(str(time_end)) == 8:
386
- time_end = str(time_end) + '21'
388
+ time_end = str(time_end) + "21"
387
389
  time_end = int(time_end)
388
390
  else:
389
391
  time_end = time_start
390
392
 
391
393
  # 检查指定的数据集和版本是否存在
392
- if dataset_name not in data_info['hourly']['dataset']:
393
- print(f'[bold red]Dataset {dataset_name} not found.')
394
+ if dataset_name not in data_info["hourly"]["dataset"]:
395
+ print(f"[bold red]Dataset {dataset_name} not found.")
394
396
  return False
395
- if version_name not in data_info['hourly']['dataset'][dataset_name]['version']:
396
- print(f'[bold red]Version {version_name} not found in dataset {dataset_name}.')
397
+ if version_name not in data_info["hourly"]["dataset"][dataset_name]["version"]:
398
+ print(f"[bold red]Version {version_name} not found in dataset {dataset_name}.")
397
399
  return False
398
400
 
399
401
  # 获取指定数据集和版本的时间范围
400
- time_range = data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
402
+ time_range = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"]
401
403
  time_s, time_e = list(time_range.values())
402
404
  time_s, time_e = str(time_s), str(time_e)
403
405
  if len(time_s) == 8:
404
- time_s = time_s + '00'
406
+ time_s = time_s + "00"
405
407
  if len(time_e) == 8:
406
- time_e = time_e + '21'
408
+ time_e = time_e + "21"
407
409
  time_s, time_e = int(time_s), int(time_e)
408
410
 
409
411
  # 检查时间是否在指定数据集和版本的时间范围内
410
412
  if time_start >= time_s and time_end <= time_e:
411
- print(f'[bold blue]Time {time_input} to {time_end} is within dataset {dataset_name} and version {version_name}.')
413
+ print(f"[bold blue]Time {time_input} to {time_end} is within dataset {dataset_name} and version {version_name}.")
412
414
  return True
413
415
  else:
414
- print(f'[bold red]Time {time_input} to {time_end} is not within dataset {dataset_name} and version {version_name}.')
416
+ print(f"[bold red]Time {time_input} to {time_end} is not within dataset {dataset_name} and version {version_name}.")
415
417
  return False
416
418
 
417
419
 
@@ -420,7 +422,7 @@ def direct_choose_dataset_and_version(time_input, time_end=None):
420
422
  # 示例结构:data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
421
423
 
422
424
  if len(str(time_input)) == 8:
423
- time_input = str(time_input) + '00'
425
+ time_input = str(time_input) + "00"
424
426
 
425
427
  # 如果 time_end 是 None,则将 time_input 的值赋给它
426
428
  if time_end is None:
@@ -431,14 +433,14 @@ def direct_choose_dataset_and_version(time_input, time_end=None):
431
433
 
432
434
  dataset_name_out, version_name_out = None, None
433
435
 
434
- for dataset_name in data_info['hourly']['dataset'].keys():
435
- for version_name in data_info['hourly']['dataset'][dataset_name]['version'].keys():
436
- [time_s, time_e] = list(data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range'].values())
436
+ for dataset_name in data_info["hourly"]["dataset"].keys():
437
+ for version_name in data_info["hourly"]["dataset"][dataset_name]["version"].keys():
438
+ [time_s, time_e] = list(data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["time_range"].values())
437
439
  time_s, time_e = str(time_s), str(time_e)
438
440
  if len(time_s) == 8:
439
- time_s = time_s + '00'
441
+ time_s = time_s + "00"
440
442
  if len(time_e) == 8:
441
- time_e = time_e + '21'
443
+ time_e = time_e + "21"
442
444
  time_s, time_e = int(time_s), int(time_e)
443
445
 
444
446
  # 检查时间是否在数据集版本的时间范围内
@@ -448,55 +450,55 @@ def direct_choose_dataset_and_version(time_input, time_end=None):
448
450
  dataset_name_out, version_name_out = dataset_name, version_name
449
451
 
450
452
  if dataset_name_out is not None and version_name_out is not None:
451
- print(f'[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen')
453
+ print(f"[bold purple]dataset: {dataset_name_out}, version: {version_name_out} is chosen")
452
454
 
453
455
  # 如果没有找到匹配的数据集和版本,会返回 None
454
456
  return dataset_name_out, version_name_out
455
457
 
456
458
 
457
459
  def get_base_url(dataset_name, version_name, var, year_str):
458
- url_dict = data_info['hourly']['dataset'][dataset_name]['version'][version_name]['url']
459
- classification_method = data_info['hourly']['dataset'][dataset_name]['version'][version_name]['classification']
460
- if classification_method == 'year_different':
460
+ url_dict = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["url"]
461
+ classification_method = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["classification"]
462
+ if classification_method == "year_different":
461
463
  base_url = url_dict[str(year_str)]
462
- elif classification_method == 'same_path':
464
+ elif classification_method == "same_path":
463
465
  base_url = url_dict
464
- elif classification_method == 'var_different':
466
+ elif classification_method == "var_different":
465
467
  base_url = None
466
468
  for key, value in var_group.items():
467
469
  if var in value:
468
470
  base_url = url_dict[key]
469
471
  break
470
472
  if base_url is None:
471
- print('Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]')
472
- elif classification_method == 'var_year_different':
473
+ print("Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]")
474
+ elif classification_method == "var_year_different":
473
475
  base_url = None
474
476
  for key, value in var_group.items():
475
477
  if var in value:
476
478
  base_url = url_dict[key][str(year_str)]
477
479
  break
478
480
  if base_url is None:
479
- print('Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]')
481
+ print("Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]")
480
482
  return base_url
481
483
 
482
484
 
483
485
  def get_submit_url(dataset_name, version_name, var, year_str, query_dict):
484
486
  base_url = get_base_url(dataset_name, version_name, var, year_str)
485
- if isinstance(query_dict['var'], str):
486
- query_dict['var'] = [query_dict['var']]
487
- target_url = base_url + '&'.join(f"var={var}" for var in query_dict['var']) + '&' + '&'.join(f"{key}={value}" for key, value in query_dict.items() if key != 'var')
487
+ if isinstance(query_dict["var"], str):
488
+ query_dict["var"] = [query_dict["var"]]
489
+ target_url = base_url + "&".join(f"var={var}" for var in query_dict["var"]) + "&" + "&".join(f"{key}={value}" for key, value in query_dict.items() if key != "var")
488
490
  return target_url
489
491
 
490
492
 
491
493
  def clear_existing_file(file_full_path):
492
494
  if os.path.exists(file_full_path):
493
495
  os.remove(file_full_path)
494
- print(f'{file_full_path} has been removed')
496
+ print(f"{file_full_path} has been removed")
495
497
 
496
498
 
497
499
  def check_existing_file(file_full_path):
498
500
  if os.path.exists(file_full_path):
499
- print(f'[bold #FFA54F]{file_full_path} exists')
501
+ print(f"[bold #FFA54F]{file_full_path} exists")
500
502
  return True
501
503
  else:
502
504
  # print(f'{file_full_path} does not exist')
@@ -505,49 +507,95 @@ def check_existing_file(file_full_path):
505
507
 
506
508
  def get_ua():
507
509
  current_dir = os.path.dirname(os.path.abspath(__file__))
508
- ua_file_txt = os.path.join(current_dir, 'User_Agent-list.txt')
510
+ ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
509
511
 
510
- with open(ua_file_txt, 'r') as f:
512
+ with open(ua_file_txt, "r") as f:
511
513
  ua_list = f.readlines()
512
514
  # 去掉换行符和空行
513
515
  ua_list = [line.strip() for line in ua_list if line.strip()]
514
516
 
515
- return random.choice(ua_list)
517
+ # if current_platform == 'Linux':
518
+ # ua_list = [line for line in ua_list if 'Linux' in line]
516
519
 
520
+ return random.choice(ua_list)
517
521
 
518
522
 
519
- def get_proxy():
523
+ def get_proxy_file():
520
524
  # 获取当前脚本的绝对路径
521
525
  script_dir = os.path.dirname(os.path.abspath(__file__))
522
526
  # 构建ip.txt的绝对路径
523
- ip_file_txt = os.path.join(script_dir, 'ip.txt')
524
- with open(ip_file_txt, 'r') as f:
527
+ ip_file_txt = os.path.join(script_dir, "ip.txt")
528
+ with open(ip_file_txt, "r") as f:
525
529
  ips = f.readlines()
526
530
  ip_list = []
527
531
  for ip in ips:
528
532
  ip_list.append(ip.strip())
529
533
  choose_ip = random.choice(ip_list)
530
- proxies = {
531
- 'http': 'http://' + choose_ip,
532
- 'https': 'https://' + choose_ip
533
- }
534
+ proxies = {"http": "http://" + choose_ip, "https": "https://" + choose_ip}
534
535
  # print(f'Using proxy: {proxies}')
535
536
  return proxies
536
537
 
537
538
 
538
- def dlownload_file(target_url, store_path, file_name, check=False):
539
- print(f'[bold #96cbd7]Downloading {file_name}...')
539
+ def scrape_and_categorize_proxies(choose_protocol="http"):
540
+ url = "https://topproxylinks.com/"
541
+ # 发送HTTP请求获取网页内容
542
+ response = requests.get(url)
543
+ # 使用BeautifulSoup解析网页
544
+ soup = BeautifulSoup(response.text, "html.parser")
545
+
546
+ # 初始化字典来存储不同协议的代理
547
+ proxies_dict = {"http": [], "socks4": [], "socks5": []}
548
+
549
+ # 查找表格中的所有行
550
+ tbody = soup.find("tbody")
551
+
552
+ if tbody:
553
+ for row in tbody.find_all("tr"):
554
+ # 提取协议、代理和国家的单元格
555
+ cells = row.find_all("td")
556
+ protocol = cells[0].text.strip().lower()
557
+ proxy = cells[1].text.strip()
558
+
559
+ # 根据协议分类存储代理
560
+ if protocol in proxies_dict:
561
+ proxies_dict[protocol].append(proxy)
562
+
563
+ if choose_protocol in proxies_dict:
564
+ proxies_list = proxies_dict[choose_protocol]
565
+ else:
566
+ proxies_list = proxies_dict["http"]
567
+
568
+ return proxies_list
569
+
570
+ def get_proxy():
571
+ ip_list = scrape_and_categorize_proxies(choose_protocol="http")
572
+ choose_ip = random.choice(ip_list)
573
+ proxies = {"http": f"http://{choose_ip}", "https": f"http://{choose_ip}"}
574
+ print(f'Using proxy: {proxies}')
575
+ return proxies
576
+
577
+
578
+ def download_file(target_url, store_path, file_name, check=False):
579
+ # Check if the file exists
580
+ fname = Path(store_path) / file_name
581
+ if check:
582
+ if check_existing_file(fname):
583
+ count_dict["skip"] += 1
584
+ return
585
+ clear_existing_file(fname)
586
+
587
+ # -----------------------------------------------
588
+ print(f"[bold #f0f6d0]Requesting {file_name}...")
540
589
  # 创建会话
541
590
  s = requests.Session()
542
591
  download_success = False
543
592
  request_times = 0
544
- filename = Path(store_path) / file_name
545
593
 
546
- def calculate_wait_time(time_str):
594
+ def calculate_wait_time(time_str, target_url):
547
595
  import re
548
596
 
549
597
  # 定义正则表达式,匹配YYYYMMDDHH格式的时间
550
- time_pattern = r'\d{10}'
598
+ time_pattern = r"\d{10}"
551
599
 
552
600
  # 定义两个字符串
553
601
  # str1 = 'HYCOM_water_u_2018010100_2018010112.nc'
@@ -560,46 +608,65 @@ def dlownload_file(target_url, store_path, file_name, check=False):
560
608
  num_times_str = len(times_in_str)
561
609
 
562
610
  if num_times_str > 1:
563
- delta_t = datetime.datetime.strptime(times_in_str[1], '%Y%m%d%H') - datetime.datetime.strptime(times_in_str[0], '%Y%m%d%H')
611
+ delta_t = datetime.datetime.strptime(times_in_str[1], "%Y%m%d%H") - datetime.datetime.strptime(times_in_str[0], "%Y%m%d%H")
564
612
  delta_t = delta_t.total_seconds() / 3600
565
613
  delta_t = delta_t / 3 + 1
566
614
  else:
567
615
  delta_t = 1
616
+ # 单个要素最多等待5分钟,不宜太短,太短可能请求失败;也不宜太长,太长可能会浪费时间
617
+ num_var = int(target_url.count("var="))
618
+ if num_var <= 0:
619
+ num_var = 1
620
+ return int(delta_t * 5 * 60 * num_var)
568
621
 
569
- return int(delta_t*15)
622
+ max_timeout = calculate_wait_time(file_name, target_url)
623
+ print(f"[bold #912dbc]Max timeout: {max_timeout} seconds")
570
624
 
571
- max_timeout = calculate_wait_time(file_name)
572
-
573
- if check:
574
- if check_existing_file(filename):
575
- return
576
- clear_existing_file(filename)
577
625
  # print(f'Download_start_time: {datetime.datetime.now()}')
578
626
  download_time_s = datetime.datetime.now()
579
- order_list = ['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']
627
+ order_list = ["1st", "2nd", "3rd", "4th", "5th", "6th", "7th", "8th", "9th", "10th"]
580
628
  while not download_success:
581
629
  if request_times >= 10:
582
630
  # print(f'下载失败,已重试 {request_times} 次\n可先跳过,后续再试')
583
- print(f'[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later')
631
+ print(f"[bold #ffe5c0]Download failed after {request_times} times\nYou can skip it and try again later")
632
+ count_dict["fail"] += 1
584
633
  break
585
634
  if request_times > 0:
586
635
  # print(f'\r正在重试第 {request_times} 次', end="")
587
- print(f'[bold #ffe5c0]Retrying the {order_list[request_times-1]} time...')
636
+ print(f"[bold #ffe5c0]Retrying the {order_list[request_times-1]} time...")
588
637
  # 尝试下载文件
589
638
  try:
590
- headers = {'User-Agent': get_ua()}
591
- response = s.get(target_url, headers=headers, timeout=random.randint(5, max_timeout))
639
+ headers = {"User-Agent": get_ua()}
640
+ """ response = s.get(target_url, headers=headers, timeout=random.randint(5, max_timeout))
592
641
  response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
593
642
 
594
643
  # 保存文件
595
644
  with open(filename, 'wb') as f:
596
- f.write(response.content)
597
- # print(f'\r文件 {filename} 下载成功', end="")
598
- if os.path.exists(filename):
645
+ f.write(response.content) """
646
+
647
+ if find_proxy:
648
+ proxies = get_proxy()
649
+ response = s.get(target_url, headers=headers, proxies=proxies, stream=True, timeout=random.randint(5, max_timeout))
650
+ else:
651
+ response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout)) # 启用流式传输
652
+ response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
653
+
654
+ # 保存文件
655
+ with open(fname, "wb") as f:
656
+ print(f"[bold #96cbd7]Downloading {file_name}...")
657
+ for chunk in response.iter_content(chunk_size=1024):
658
+ if chunk:
659
+ f.write(chunk)
660
+
661
+ f.close()
662
+
663
+ # print(f'\r文件 {fname} 下载成功', end="")
664
+ if os.path.exists(fname):
599
665
  download_success = True
600
666
  download_time_e = datetime.datetime.now()
601
667
  download_delta = download_time_e - download_time_s
602
- print(f'[#65b168]File [bold #dfff73]{filename} [#65b168]has been downloaded successfully, Time: [#39cbdd]{download_delta}')
668
+ print(f"[#3dfc40]File [bold #dfff73]{fname} [#3dfc40]has been downloaded successfully, Time: [#39cbdd]{download_delta}")
669
+ count_dict["success"] += 1
603
670
  # print(f'Download_end_time: {datetime.datetime.now()}')
604
671
 
605
672
  except requests.exceptions.HTTPError as errh:
@@ -636,17 +703,17 @@ def check_dataset_version(dataset_name, version_name, download_time, download_ti
636
703
  download_time_str = str(download_time)
637
704
 
638
705
  if len(download_time_str) == 8:
639
- download_time_str = download_time_str + '00'
706
+ download_time_str = download_time_str + "00"
640
707
 
641
708
  # 检查小时是否有效(如果需要的话)
642
709
  if download_time_end is None and not check_hour_is_valid(download_time_str):
643
- print('Please ensure the hour is 00, 03, 06, 09, 12, 15, 18, 21')
644
- raise ValueError('The hour is invalid')
710
+ print("Please ensure the hour is 00, 03, 06, 09, 12, 15, 18, 21")
711
+ raise ValueError("The hour is invalid")
645
712
 
646
713
  # 根据是否检查整个天来设置时间范围
647
714
  if download_time_end is not None:
648
715
  if len(str(download_time_end)) == 8:
649
- download_time_end = str(download_time_end) + '21'
716
+ download_time_end = str(download_time_end) + "21"
650
717
  have_data = check_time_in_dataset_and_version(download_time_str, download_time_end)
651
718
  if have_data:
652
719
  return direct_choose_dataset_and_version(download_time_str, download_time_end)
@@ -661,26 +728,25 @@ def check_dataset_version(dataset_name, version_name, download_time, download_ti
661
728
  def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end=None):
662
729
  year_str = str(download_time)[:4]
663
730
  if depth is not None and level_num is not None:
664
- print('Please ensure the depth or level_num is None')
665
- print('Progress will use the depth')
666
- which_mode = 'depth'
731
+ print("Please ensure the depth or level_num is None")
732
+ print("Progress will use the depth")
733
+ which_mode = "depth"
667
734
  elif depth is not None and level_num is None:
668
- print(f'Data of single depth (~{depth} m) will be downloaded...')
669
- which_mode = 'depth'
735
+ print(f"Data of single depth (~{depth} m) will be downloaded...")
736
+ which_mode = "depth"
670
737
  elif level_num is not None and depth is None:
671
- print(f'Data of single level ({level_num}) will be downloaded...')
672
- which_mode = 'level'
738
+ print(f"Data of single level ({level_num}) will be downloaded...")
739
+ which_mode = "level"
673
740
  else:
674
- print('Full depth or full level data will be downloaded...')
675
- which_mode = 'full'
741
+ print("Full depth or full level data will be downloaded...")
742
+ which_mode = "full"
676
743
  query_dict = get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, download_time_end, which_mode, depth, level_num)
677
- submit_url = get_submit_url(
678
- dataset_name, version_name, var, year_str, query_dict)
744
+ submit_url = get_submit_url(dataset_name, version_name, var, year_str, query_dict)
679
745
  return submit_url
680
746
 
681
747
 
682
- def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time='2024083100', download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
683
- print('[bold #ecdbfe]-'*160)
748
+ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time="2024083100", download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
749
+ print("[bold #ecdbfe]-" * 160)
684
750
  download_time = str(download_time)
685
751
  if download_time_end is not None:
686
752
  download_time_end = str(download_time_end)
@@ -688,6 +754,11 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
688
754
  else:
689
755
  dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time)
690
756
  if dataset_name is None and version_name is None:
757
+ count_dict["no_data"] += 1
758
+ if download_time_end is not None:
759
+ count_dict["no_data_list"].append(f"{download_time}-{download_time_end}")
760
+ else:
761
+ count_dict["no_data_list"].append(download_time)
691
762
  return
692
763
 
693
764
  if isinstance(var, str):
@@ -700,7 +771,7 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
700
771
  file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
701
772
  if download_time_end is not None:
702
773
  file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}_{download_time_end}.nc"
703
- dlownload_file(submit_url, store_path, file_name, check)
774
+ download_file(submit_url, store_path, file_name, check)
704
775
  else:
705
776
  varlist = [_ for _ in var]
706
777
  for key, value in var_group.items():
@@ -721,24 +792,24 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
721
792
  new_str = f'{new_str}&var={variable_info[v]["var_name"]}'
722
793
  submit_url = submit_url.replace(old_str, new_str)
723
794
  # file_name = f'HYCOM_{'-'.join([variable_info[v]["var_name"] for v in current_group])}_{download_time}.nc'
724
- file_name = f'HYCOM_{key}_{download_time}.nc'
795
+ file_name = f"HYCOM_{key}_{download_time}.nc"
725
796
  if download_time_end is not None:
726
- file_name = f'HYCOM_{key}_{download_time}_{download_time_end}.nc'
727
- dlownload_file(submit_url, store_path, file_name, check)
797
+ file_name = f"HYCOM_{key}_{download_time}_{download_time_end}.nc"
798
+ download_file(submit_url, store_path, file_name, check)
728
799
 
729
800
 
730
801
  def convert_full_name_to_short_name(full_name):
731
802
  for var, info in variable_info.items():
732
- if full_name == info['var_name'] or full_name == info['standard_name'] or full_name == var:
803
+ if full_name == info["var_name"] or full_name == info["standard_name"] or full_name == var:
733
804
  return var
734
- print('[bold #FFE4E1]Please ensure the var is in:\n[bold blue]u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b')
735
- print('or')
736
- print('[bold blue]water_u, water_v, water_temp, salinity, surf_el, water_u_bottom, water_v_bottom, water_temp_bottom, salinity_bottom')
805
+ print("[bold #FFE4E1]Please ensure the var is in:\n[bold blue]u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b")
806
+ print("or")
807
+ print("[bold blue]water_u, water_v, water_temp, salinity, surf_el, water_u_bottom, water_v_bottom, water_temp_bottom, salinity_bottom")
737
808
  return False
738
809
 
739
810
 
740
811
  def download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check):
741
- '''
812
+ """
742
813
  # 并行下载任务
743
814
  # 这个函数是为了并行下载而设置的,是必须的,直接调用direct_download并行下载会出问题
744
815
 
@@ -746,13 +817,29 @@ def download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_ma
746
817
  本情况下,download_task函数的作用是将每个下载任务封装起来,包括它所需的所有参数。
747
818
  这样,每个任务都是独立的,有自己的参数和数据,不会与其他任务共享或修改任何数据。
748
819
  因此,即使多个任务同时执行,也不会出现数据交互错乱的问题。
749
- '''
820
+ """
750
821
 
751
822
  prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
752
823
 
753
824
 
754
- def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
755
- '''
825
+ def done_callback(future, progress, task, total, counter_lock):
826
+ """
827
+ # 并行下载任务的回调函数
828
+ # 这个函数是为了并行下载而设置的,是必须的,直接调用direct_download并行下载会出问题
829
+
830
+ 回调函数:当一个任务完成后,会调用这个函数,这样可以及时更新进度条,显示任务的完成情况。
831
+ 本情况下,done_callback函数的作用是当一个任务完成后,更新进度条的进度,显示任务的完成情况。
832
+ 这样,即使多个任务同时执行,也可以及时看到每个任务的完成情况,不会等到所有任务都完成才显示。
833
+ """
834
+
835
+ global parallel_counter
836
+ with counter_lock:
837
+ parallel_counter += 1
838
+ progress.update(task, advance=1, description=f"[cyan]Downloading... {parallel_counter}/{total}")
839
+
840
+
841
+ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
842
+ """
756
843
  Description:
757
844
  Download the data of single time or a series of time
758
845
 
@@ -773,13 +860,17 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
773
860
 
774
861
  Returns:
775
862
  None
776
- '''
863
+ """
777
864
  ymdh_time_s, ymdh_time_e = str(time_s), str(time_e)
865
+ if num_workers is not None and num_workers > 1: # 如果使用多线程下载,用于进度条显示
866
+ global parallel_counter
867
+ parallel_counter = 0
868
+ counter_lock = Lock() # 创建一个锁,线程安全的计数器
778
869
  if ymdh_time_s == ymdh_time_e:
779
870
  prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name)
780
871
  elif int(ymdh_time_s) < int(ymdh_time_e):
781
- print('Downloading a series of files...')
782
- time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3, 'hour')
872
+ print("Downloading a series of files...")
873
+ time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3, "hour")
783
874
  with Progress() as progress:
784
875
  task = progress.add_task("[cyan]Downloading...", total=len(time_list))
785
876
  if ftimes == 1:
@@ -787,35 +878,39 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
787
878
  # 串行方式
788
879
  for i, time_str in enumerate(time_list):
789
880
  prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, check)
790
- progress.update(task, advance=1, description=f'[cyan]Downloading... {i+1}/{len(time_list)}')
881
+ progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")
791
882
  else:
792
883
  # 并行方式
793
884
  with ThreadPoolExecutor(max_workers=num_workers) as executor:
794
885
  futures = [executor.submit(download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
795
- for i, future in enumerate(futures):
796
- future.add_done_callback(lambda _: progress.update(task, advance=1, description=f'[cyan]Downloading... {i+1}/{len(time_list)}'))
886
+ """ for i, future in enumerate(futures):
887
+ future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")) """
888
+ for feature in as_completed(futures):
889
+ done_callback(feature, progress, task, len(time_list), counter_lock)
797
890
  else:
798
- new_time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3*ftimes, 'hour')
891
+ new_time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3 * ftimes, "hour")
799
892
  total_num = len(new_time_list)
800
893
  if num_workers is None or num_workers <= 1:
801
894
  # 串行方式
802
895
  for i, time_str in enumerate(new_time_list):
803
- time_str_end_index = int(min(len(time_list)-1, int(i*ftimes+ftimes-1)))
896
+ time_str_end_index = int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))
804
897
  time_str_end = time_list[time_str_end_index]
805
898
  prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
806
- progress.update(task, advance=1, description=f'[cyan]Downloading... {i+1}/{total_num}')
899
+ progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")
807
900
  else:
808
901
  # 并行方式
809
902
  with ThreadPoolExecutor(max_workers=num_workers) as executor:
810
- futures = [executor.submit(download_task, var, new_time_list[i], time_list[int(min(len(time_list)-1, int(i*ftimes+ftimes-1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
811
- for i, future in enumerate(futures):
812
- future.add_done_callback(lambda _: progress.update(task, advance=1, description=f'[cyan]Downloading... {i+1}/{total_num}'))
903
+ futures = [executor.submit(download_task, var, new_time_list[i], time_list[int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
904
+ """ for i, future in enumerate(futures):
905
+ future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")) """
906
+ for feature in as_completed(futures):
907
+ done_callback(feature, progress, task, len(time_list), counter_lock)
813
908
  else:
814
- print('Please ensure the time_s is no more than time_e')
909
+ print("Please ensure the time_s is no more than time_e")
815
910
 
816
911
 
817
- def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
818
- '''
912
+ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
913
+ """
819
914
  Description:
820
915
  Download the data of single time or a series of time
821
916
 
@@ -838,20 +933,20 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
838
933
 
839
934
  Returns:
840
935
  None
841
- '''
936
+ """
842
937
  # 打印信息并处理数据集和版本名称
843
938
  if dataset_name is None and version_name is None:
844
- print('The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.')
845
- print('If you wanna choose the dataset and version by yourself, please set the dataset_name and version_name together.')
939
+ print("The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.")
940
+ print("If you wanna choose the dataset and version by yourself, please set the dataset_name and version_name together.")
846
941
  elif dataset_name is None and version_name is not None:
847
- print('Please ensure the dataset_name is not None')
848
- print('If you do not add the dataset_name, both the dataset and version will be chosen according to the download_time.')
942
+ print("Please ensure the dataset_name is not None")
943
+ print("If you do not add the dataset_name, both the dataset and version will be chosen according to the download_time.")
849
944
  elif dataset_name is not None and version_name is None:
850
- print('Please ensure the version_name is not None')
851
- print('If you do not add the version_name, both the dataset and version will be chosen according to the download_time.')
945
+ print("Please ensure the version_name is not None")
946
+ print("If you do not add the version_name, both the dataset and version will be chosen according to the download_time.")
852
947
  else:
853
- print('The dataset_name and version_name are both set by yourself.')
854
- print('Please ensure the dataset_name and version_name are correct.')
948
+ print("The dataset_name and version_name are both set by yourself.")
949
+ print("Please ensure the dataset_name and version_name are correct.")
855
950
 
856
951
  if isinstance(var, list):
857
952
  if len(var) == 1:
@@ -861,17 +956,17 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
861
956
  elif isinstance(var, str):
862
957
  var = convert_full_name_to_short_name(var)
863
958
  else:
864
- raise ValueError('The var is invalid')
959
+ raise ValueError("The var is invalid")
865
960
  if var is False:
866
- raise ValueError('The var is invalid')
961
+ raise ValueError("The var is invalid")
867
962
  if lon_min < 0 or lon_min > 359.92 or lon_max < 0 or lon_max > 359.92 or lat_min < -80 or lat_min > 90 or lat_max < -80 or lat_max > 90:
868
- print('Please ensure the lon_min, lon_max, lat_min, lat_max are in the range')
869
- print('The range of lon_min, lon_max is 0~359.92')
870
- print('The range of lat_min, lat_max is -80~90')
871
- raise ValueError('The lon or lat is invalid')
963
+ print("Please ensure the lon_min, lon_max, lat_min, lat_max are in the range")
964
+ print("The range of lon_min, lon_max is 0~359.92")
965
+ print("The range of lat_min, lat_max is -80~90")
966
+ raise ValueError("The lon or lat is invalid")
872
967
 
873
968
  if ftimes != 1:
874
- print('Please ensure the ftimes is in [1, 8]')
969
+ print("Please ensure the ftimes is in [1, 8]")
875
970
  ftimes = max(min(ftimes, 8), 1)
876
971
 
877
972
  if store_path is None:
@@ -884,19 +979,43 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
884
979
 
885
980
  time_s = str(time_s)
886
981
  if len(time_s) == 8:
887
- time_s += '00'
982
+ time_s += "00"
888
983
  if time_e is None:
889
984
  time_e = time_s[:]
890
985
  else:
891
986
  time_e = str(time_e)
892
987
  if len(time_e) == 8:
893
- time_e += '21'
988
+ time_e += "21"
989
+
990
+ global count_dict
991
+ count_dict = {"success": 0, "fail": 0, "skip": 0, "no_data": 0, "total": 0, "no_data_list": []}
992
+
993
+ """ global current_platform
994
+ current_platform = platform.system() """
995
+
996
+ global find_proxy
997
+ find_proxy = False
894
998
 
895
999
  download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
896
1000
 
1001
+ count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
1002
+
1003
+ print("[bold #ecdbfe]-" * 160)
1004
+ print(f"[bold #ff80ab]Total: {count_dict['total']}\nSuccess: {count_dict['success']}\nFail: {count_dict['fail']}\nSkip: {count_dict['skip']}")
1005
+ if count_dict["fail"] > 0:
1006
+ print("[bold #be5528]Please try again to download the failed data later")
1007
+ if count_dict["no_data"] > 0:
1008
+ if count_dict["no_data"] == 1:
1009
+ print(f"[bold #f90000]There is {count_dict['no_data']} data that does not exist in any dataset and version")
1010
+ else:
1011
+ print(f"[bold #f90000]These are {count_dict['no_data']} data that do not exist in any dataset and version")
1012
+ for no_data in count_dict["no_data_list"]:
1013
+ print(f"[bold #d81b60]{no_data}")
1014
+ print("[bold #ecdbfe]-" * 160)
1015
+
897
1016
 
898
1017
  def how_to_use():
899
- print('''
1018
+ print("""
900
1019
  # 1. Choose the dataset and version according to the time:
901
1020
  # 1.1 Use function to query
902
1021
  You can use the function check_time_in_dataset_and_version(time_input=20241101) to find the dataset and version according to the time.
@@ -941,51 +1060,51 @@ def how_to_use():
941
1060
  # 7.2 You can download the data of single time or a series of time
942
1061
  # 7.3 The parameters you must set are var, ymdh_time_s, ymdh_time_e
943
1062
  # 7.4 Example: download('u', '2024110112', '2024110212', lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None)
944
- ''')
1063
+ """)
945
1064
 
946
1065
 
947
- if __name__ == '__main__':
1066
+ if __name__ == "__main__":
948
1067
  # help(hycom3h.download)
949
- time_s, time_e = '2016070100', '2019123121'
950
- merge_name = '2018_2024'
951
- root_path = r'G:\Data\HYCOM\3hourly'
952
- location_dict = {'west': 105, 'east': 130, 'south': 15, 'north': 45}
1068
+ time_s, time_e = "2018070100", "2019123121"
1069
+ merge_name = f"{time_s}_{time_e}" # 合并后的文件名
1070
+ root_path = r"G:\Data\HYCOM\3hourly"
1071
+ location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
953
1072
  download_dict = {
954
- 'water_u': {'simple_name': 'u', 'download': 1},
955
- 'water_v': {'simple_name': 'v', 'download': 1},
956
- 'surf_el': {'simple_name': 'ssh', 'download': 1},
957
- 'water_temp': {'simple_name': 'temp', 'download': 1},
958
- 'salinity': {'simple_name': 'salt', 'download': 1},
959
- 'water_u_bottom': {'simple_name': 'u_b', 'download': 0},
960
- 'water_v_bottom': {'simple_name': 'v_b', 'download': 0},
961
- 'water_temp_bottom': {'simple_name': 'temp_b', 'download': 0},
962
- 'salinity_bottom': {'simple_name': 'salt_b', 'download': 0},
1073
+ "water_u": {"simple_name": "u", "download": 1},
1074
+ "water_v": {"simple_name": "v", "download": 1},
1075
+ "surf_el": {"simple_name": "ssh", "download": 1},
1076
+ "water_temp": {"simple_name": "temp", "download": 1},
1077
+ "salinity": {"simple_name": "salt", "download": 1},
1078
+ "water_u_bottom": {"simple_name": "u_b", "download": 0},
1079
+ "water_v_bottom": {"simple_name": "v_b", "download": 0},
1080
+ "water_temp_bottom": {"simple_name": "temp_b", "download": 0},
1081
+ "salinity_bottom": {"simple_name": "salt_b", "download": 0},
963
1082
  }
964
1083
 
965
1084
  var_list = []
966
1085
  for var_name in download_dict.keys():
967
- if download_dict[var_name]['download'] == 1:
1086
+ if download_dict[var_name]["download"] == 1:
968
1087
  var_list.append(var_name)
969
1088
 
970
1089
  # set depth or level, only one can be True
971
1090
  # if you wanna download all depth or level, set both False
972
1091
  depth = None # or 0-5000 meters
973
1092
  level = None # or 1-40 levels
974
- num_workers = 1
1093
+ num_workers = 3
975
1094
 
976
1095
  check = True
977
1096
  ftimes = 1
978
1097
 
979
1098
  download_switch, single_var = True, False
980
1099
  combine_switch = False
981
- copy_switch, copy_dir = False, r'G:\Data\HYCOM\3hourly'
1100
+ copy_switch, copy_dir = False, r"G:\Data\HYCOM\3hourly"
982
1101
 
983
1102
  if download_switch:
984
1103
  if single_var:
985
1104
  for var_name in var_list:
986
- download(var=var_name, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict['west'], lon_max=location_dict['east'], lat_min=location_dict['south'], lat_max=location_dict['north'], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes)
1105
+ download(var=var_name, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes)
987
1106
  else:
988
- download(var=var_list, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict['west'], lon_max=location_dict['east'], lat_min=location_dict['south'], lat_max=location_dict['north'], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes)
1107
+ download(var=var_list, time_s=time_s, time_e=time_e, store_path=Path(root_path), lon_min=location_dict["west"], lon_max=location_dict["east"], lat_min=location_dict["south"], lat_max=location_dict["north"], num_workers=num_workers, check=check, depth=depth, level=level, ftimes=ftimes)
989
1108
 
990
1109
  """ if combine_switch or copy_switch:
991
1110
  time_list = get_time_list(time_s, time_e, 3, 'hour')