oafuncs 0.0.89__py2.py3-none-any.whl → 0.0.91__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,9 +2,9 @@
2
2
  # coding=utf-8
3
3
  """
4
4
  Author: Liu Kun && 16031215@qq.com
5
- Date: 2024-11-01 10:31:09
5
+ Date: 2024-11-02 11:07:49
6
6
  LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2024-12-08 10:20:45
7
+ LastEditTime: 2025-01-07 16:31:36
8
8
  FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
9
9
  Description:
10
10
  EditPlatform: vscode
@@ -16,182 +16,189 @@ Python Version: 3.12
16
16
  import datetime
17
17
  import os
18
18
  import random
19
+ import re
19
20
  import time
20
21
  import warnings
21
22
  from concurrent.futures import ThreadPoolExecutor, as_completed
22
23
  from pathlib import Path
23
24
  from threading import Lock
24
- import re
25
25
 
26
26
  import matplotlib.pyplot as plt
27
27
  import numpy as np
28
28
  import pandas as pd
29
29
  import requests
30
- from bs4 import BeautifulSoup
31
30
  from rich import print
32
31
  from rich.progress import Progress
33
- import glob
32
+
33
+ from oafuncs.oa_down.user_agent import get_ua
34
+ from oafuncs.oa_file import file_size, mean_size
35
+ from oafuncs.oa_nc import check as check_nc
34
36
 
35
37
  warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
36
38
 
37
39
  __all__ = ["draw_time_range", "download", "how_to_use", "get_time_list"]
38
40
 
39
- # time resolution
40
- data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
41
-
42
- # hourly data
43
- # dataset: GLBv0.08, GLBu0.08, GLBy0.08
44
- data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}, "ESPC_D": {}}
45
-
46
- # version
47
- # version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
48
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
49
- # version of GLBu0.08: 93.0
50
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
51
- # version of GLBy0.08: 93.0
52
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
53
- # version of ESPC_D: V02
54
- data_info["hourly"]["dataset"]["ESPC_D"]["version"] = {"V02": {}}
55
-
56
- # info details
57
- # time range
58
- # GLBv0.08
59
- # 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
60
- # 目前只纠正了GLBv0.08 93.0的时间范围,具体到小时了
61
- # 其他数据集的时刻暂时默认为00起,21止
62
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "1994010112", "time_end": "2015123109"}
63
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "2014070112", "time_end": "2016093009"}
64
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "2016050112", "time_end": "2017020109"}
65
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "2017020112", "time_end": "2017060109"}
66
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "2017060112", "time_end": "2017100109"}
67
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "2017100112", "time_end": "2018032009"}
68
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
69
- # GLBu0.08
70
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018091912", "time_end": "2018120909"}
71
- # GLBy0.08
72
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018120412", "time_end": "2024090509"}
73
- # ESPC-D
74
- data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["time_range"] = {"time_start": "2024081012", "time_end": "2030010100"}
75
-
76
- # variable
77
- variable_info = {
78
- "u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
79
- "v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
80
- "temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
81
- "salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
82
- "ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
83
- "u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
84
- "v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
85
- "temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
86
- "salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
87
- }
88
-
89
- # classification method
90
- # year_different: the data of different years is stored in different files
91
- # same_path: the data of different years is stored in the same file
92
- # var_different: the data of different variables is stored in different files
93
- # var_year_different: the data of different variables and years is stored in different files
94
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
95
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
96
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
97
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
98
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
99
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
100
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
101
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
102
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
103
- data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["classification"] = "single_var_year_different"
104
-
105
- # download info
106
- # base url
107
- # GLBv0.08 53.X
108
- url_53x = {}
109
- for y_53x in range(1994, 2016):
110
- # r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
111
- url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
112
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
113
- # GLBv0.08 56.3
114
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
115
- # GLBv0.08 57.2
116
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
117
- # GLBv0.08 92.8
118
- url_928 = {
119
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
120
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
121
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
122
- }
123
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
124
- # GLBv0.08 57.7
125
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
126
- # GLBv0.08 92.9
127
- url_929 = {
128
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
129
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
130
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
131
- }
132
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
133
- # GLBv0.08 93.0
134
- url_930_v = {
135
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
136
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
137
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
138
- }
139
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
140
- # GLBu0.08 93.0
141
- url_930_u = {
142
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
143
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
144
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
145
- }
146
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
147
- # GLBy0.08 93.0
148
- uv3z_930_y = {}
149
- ts3z_930_y = {}
150
- ssh_930_y = {}
151
- for y_930_y in range(2018, 2025):
152
- uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
153
- ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
154
- ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
155
- # GLBy0.08 93.0 data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
156
- url_930_y = {
157
- "uv3z": uv3z_930_y,
158
- "ts3z": ts3z_930_y,
159
- "ssh": ssh_930_y,
160
- }
161
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
162
- # ESPC-D-V02
163
- u3z_espc_d_v02_y = {}
164
- v3z_espc_d_v02_y = {}
165
- t3z_espc_d_v02_y = {}
166
- s3z_espc_d_v02_y = {}
167
- ssh_espc_d_v02_y = {}
168
- for y_espc_d_v02 in range(2024, 2030):
169
- u3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/u3z/{y_espc_d_v02}?"
170
- v3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/v3z/{y_espc_d_v02}?"
171
- t3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/t3z/{y_espc_d_v02}?"
172
- s3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/s3z/{y_espc_d_v02}?"
173
- ssh_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/ssh/{y_espc_d_v02}?"
174
- url_espc_d_v02_y = {
175
- "u3z": u3z_espc_d_v02_y,
176
- "v3z": v3z_espc_d_v02_y,
177
- "t3z": t3z_espc_d_v02_y,
178
- "s3z": s3z_espc_d_v02_y,
179
- "ssh": ssh_espc_d_v02_y,
180
- }
181
- data_info['hourly']['dataset']['ESPC_D']['version']['V02']['url'] = url_espc_d_v02_y
182
-
183
- var_group = {
184
- "uv3z": ["u", "v", "u_b", "v_b"],
185
- "ts3z": ["temp", "salt", "temp_b", "salt_b"],
186
- "ssh": ["ssh"],
187
- }
188
- single_var_group = {
189
- "u3z": ["u"],
190
- "v3z": ["v"],
191
- "t3z": ["temp"],
192
- "s3z": ["salt"],
193
- "ssh": ["ssh"],
194
- }
41
+
42
+ def _get_initial_data():
43
+ global variable_info, data_info, var_group, single_var_group
44
+ # ----------------------------------------------
45
+ # variable
46
+ variable_info = {
47
+ "u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
48
+ "v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
49
+ "temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
50
+ "salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
51
+ "ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
52
+ "u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
53
+ "v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
54
+ "temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
55
+ "salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
56
+ }
57
+ # ----------------------------------------------
58
+ # time resolution
59
+ data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
60
+
61
+ # hourly data
62
+ # dataset: GLBv0.08, GLBu0.08, GLBy0.08
63
+ data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}, "ESPC_D": {}}
64
+
65
+ # version
66
+ # version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
67
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
68
+ # version of GLBu0.08: 93.0
69
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
70
+ # version of GLBy0.08: 93.0
71
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
72
+ # version of ESPC_D: V02
73
+ data_info["hourly"]["dataset"]["ESPC_D"]["version"] = {"V02": {}}
74
+
75
+ # info details
76
+ # time range
77
+ # GLBv0.08
78
+ # 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
79
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "1994010112", "time_end": "2015123109"}
80
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "2014070112", "time_end": "2016093009"}
81
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "2016050112", "time_end": "2017020109"}
82
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "2017020112", "time_end": "2017060109"}
83
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "2017060112", "time_end": "2017100109"}
84
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "2017100112", "time_end": "2018032009"}
85
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
86
+ # GLBu0.08
87
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018091912", "time_end": "2018120909"}
88
+ # GLBy0.08
89
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018120412", "time_end": "2024090509"}
90
+ # ESPC-D
91
+ data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["time_range"] = {"time_start": "2024081012", "time_end": "2030010100"}
92
+
93
+ # classification method
94
+ # year_different: the data of different years is stored in different files
95
+ # same_path: the data of different years is stored in the same file
96
+ # var_different: the data of different variables is stored in different files
97
+ # var_year_different: the data of different variables and years is stored in different files
98
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
99
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
100
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
101
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
102
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
103
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
104
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
105
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
106
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
107
+ data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["classification"] = "single_var_year_different"
108
+
109
+ # download info
110
+ # base url
111
+ # GLBv0.08 53.X
112
+ url_53x = {}
113
+ for y_53x in range(1994, 2016):
114
+ # r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
115
+ url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
116
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
117
+ # GLBv0.08 56.3
118
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
119
+ # GLBv0.08 57.2
120
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
121
+ # GLBv0.08 92.8
122
+ url_928 = {
123
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
124
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
125
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
126
+ }
127
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
128
+ # GLBv0.08 57.7
129
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
130
+ # GLBv0.08 92.9
131
+ url_929 = {
132
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
133
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
134
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
135
+ }
136
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
137
+ # GLBv0.08 93.0
138
+ url_930_v = {
139
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
140
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
141
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
142
+ }
143
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
144
+ # GLBu0.08 93.0
145
+ url_930_u = {
146
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
147
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
148
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
149
+ }
150
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
151
+ # GLBy0.08 93.0
152
+ uv3z_930_y = {}
153
+ ts3z_930_y = {}
154
+ ssh_930_y = {}
155
+ for y_930_y in range(2018, 2025):
156
+ uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
157
+ ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
158
+ ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
159
+ # GLBy0.08 93.0 data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
160
+ url_930_y = {
161
+ "uv3z": uv3z_930_y,
162
+ "ts3z": ts3z_930_y,
163
+ "ssh": ssh_930_y,
164
+ }
165
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
166
+ # ESPC-D-V02
167
+ u3z_espc_d_v02_y = {}
168
+ v3z_espc_d_v02_y = {}
169
+ t3z_espc_d_v02_y = {}
170
+ s3z_espc_d_v02_y = {}
171
+ ssh_espc_d_v02_y = {}
172
+ for y_espc_d_v02 in range(2024, 2030):
173
+ u3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/u3z/{y_espc_d_v02}?"
174
+ v3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/v3z/{y_espc_d_v02}?"
175
+ t3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/t3z/{y_espc_d_v02}?"
176
+ s3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/s3z/{y_espc_d_v02}?"
177
+ ssh_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/ssh/{y_espc_d_v02}?"
178
+ url_espc_d_v02_y = {
179
+ "u3z": u3z_espc_d_v02_y,
180
+ "v3z": v3z_espc_d_v02_y,
181
+ "t3z": t3z_espc_d_v02_y,
182
+ "s3z": s3z_espc_d_v02_y,
183
+ "ssh": ssh_espc_d_v02_y,
184
+ }
185
+ data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["url"] = url_espc_d_v02_y
186
+ # ----------------------------------------------
187
+ var_group = {
188
+ "uv3z": ["u", "v", "u_b", "v_b"],
189
+ "ts3z": ["temp", "salt", "temp_b", "salt_b"],
190
+ "ssh": ["ssh"],
191
+ }
192
+ # ----------------------------------------------
193
+ single_var_group = {
194
+ "u3z": ["u"],
195
+ "v3z": ["v"],
196
+ "t3z": ["temp"],
197
+ "s3z": ["salt"],
198
+ "ssh": ["ssh"],
199
+ }
200
+
201
+ return variable_info, data_info, var_group, single_var_group
195
202
 
196
203
 
197
204
  def draw_time_range(pic_save_folder=None):
@@ -299,14 +306,14 @@ def get_time_list(time_s, time_e, delta, interval_type="hour"):
299
306
  return dt_list
300
307
 
301
308
 
302
- def transform_time(time_str):
309
+ def _transform_time(time_str):
303
310
  # old_time = '2023080203'
304
311
  # time_new = '2023-08-02T03%3A00%3A00Z'
305
312
  time_new = f"{time_str[:4]}-{time_str[4:6]}-{time_str[6:8]}T{time_str[8:10]}%3A00%3A00Z"
306
313
  return time_new
307
314
 
308
315
 
309
- def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_str_end=None, mode="single_depth", depth=None, level_num=None):
316
+ def _get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_str_end=None, mode="single_depth", depth=None, level_num=None):
310
317
  query_dict = {
311
318
  "var": variable_info[var]["var_name"],
312
319
  "north": lat_max,
@@ -325,11 +332,11 @@ def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_
325
332
  }
326
333
 
327
334
  if time_str_end is not None:
328
- query_dict["time_start"] = transform_time(time_str_ymdh)
329
- query_dict["time_end"] = transform_time(time_str_end)
335
+ query_dict["time_start"] = _transform_time(time_str_ymdh)
336
+ query_dict["time_end"] = _transform_time(time_str_end)
330
337
  query_dict["timeStride"] = 1
331
338
  else:
332
- query_dict["time"] = transform_time(time_str_ymdh)
339
+ query_dict["time"] = _transform_time(time_str_ymdh)
333
340
 
334
341
  def get_nearest_level_index(depth):
335
342
  level_depth = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 125.0, 150.0, 200.0, 250.0, 300.0, 350.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0, 1250.0, 1500.0, 2000.0, 2500.0, 3000.0, 4000.0, 5000]
@@ -354,7 +361,7 @@ def get_query_dict(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh, time_
354
361
  return query_dict
355
362
 
356
363
 
357
- def check_time_in_dataset_and_version(time_input, time_end=None):
364
+ def _check_time_in_dataset_and_version(time_input, time_end=None):
358
365
  # 判断是处理单个时间点还是时间范围
359
366
  is_single_time = time_end is None
360
367
 
@@ -411,8 +418,8 @@ def check_time_in_dataset_and_version(time_input, time_end=None):
411
418
  if is_single_time:
412
419
  return True
413
420
  else:
414
- base_url_s = get_base_url(d_list[0], v_list[0], "u", str(time_start))
415
- base_url_e = get_base_url(d_list[0], v_list[0], "u", str(time_end))
421
+ base_url_s = _get_base_url(d_list[0], v_list[0], "u", str(time_start))
422
+ base_url_e = _get_base_url(d_list[0], v_list[0], "u", str(time_end))
416
423
  if base_url_s == base_url_e:
417
424
  return True
418
425
  else:
@@ -423,7 +430,7 @@ def check_time_in_dataset_and_version(time_input, time_end=None):
423
430
  return False
424
431
 
425
432
 
426
- def ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time_input, time_end=None):
433
+ def _ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time_input, time_end=None):
427
434
  # 根据时间长度补全时间格式
428
435
  if len(str(time_input)) == 8:
429
436
  time_input = str(time_input) + "00"
@@ -462,7 +469,7 @@ def ensure_time_in_specific_dataset_and_version(dataset_name, version_name, time
462
469
  return False
463
470
 
464
471
 
465
- def direct_choose_dataset_and_version(time_input, time_end=None):
472
+ def _direct_choose_dataset_and_version(time_input, time_end=None):
466
473
  # 假设 data_info 是一个字典,包含了数据集和版本的信息
467
474
  # 示例结构:data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range']
468
475
 
@@ -501,7 +508,7 @@ def direct_choose_dataset_and_version(time_input, time_end=None):
501
508
  return dataset_name_out, version_name_out
502
509
 
503
510
 
504
- def get_base_url(dataset_name, version_name, var, ymdh_str):
511
+ def _get_base_url(dataset_name, version_name, var, ymdh_str):
505
512
  year_str = int(ymdh_str[:4])
506
513
  url_dict = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["url"]
507
514
  classification_method = data_info["hourly"]["dataset"][dataset_name]["version"][version_name]["classification"]
@@ -542,199 +549,109 @@ def get_base_url(dataset_name, version_name, var, ymdh_str):
542
549
  return base_url
543
550
 
544
551
 
545
- def get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict):
546
- base_url = get_base_url(dataset_name, version_name, var, ymdh_str)
552
+ def _get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict):
553
+ base_url = _get_base_url(dataset_name, version_name, var, ymdh_str)
547
554
  if isinstance(query_dict["var"], str):
548
555
  query_dict["var"] = [query_dict["var"]]
549
556
  target_url = base_url + "&".join(f"var={var}" for var in query_dict["var"]) + "&" + "&".join(f"{key}={value}" for key, value in query_dict.items() if key != "var")
550
557
  return target_url
551
558
 
552
559
 
553
- def clear_existing_file(file_full_path):
560
+ def _clear_existing_file(file_full_path):
554
561
  if os.path.exists(file_full_path):
555
562
  os.remove(file_full_path)
556
563
  print(f"{file_full_path} has been removed")
557
564
 
558
565
 
559
- def find_file(parent_path, fname, mode="path"):
560
- """
561
- description:
562
- param {*} parent_path: The parent path where the files are located
563
- param {*} fname: The file name pattern to search for
564
- param {*} mode: 'path' to return the full path of the files, 'file' to return only the file names
565
- return {*} A list of file paths or file names if files are found, None otherwise
566
- """
567
-
568
- def natural_sort_key(s):
569
- """生成一个用于自然排序的键"""
570
- return [int(text) if text.isdigit() else text.lower() for text in re.split("([0-9]+)", s)]
571
-
572
- # 将parent_path和fname结合成完整的搜索路径
573
- search_pattern = os.path.join(str(parent_path), fname)
574
-
575
- # 使用glob模块查找所有匹配的文件
576
- matched_files = glob.glob(search_pattern)
577
-
578
- # 如果没有找到任何文件,则返回False
579
- if not matched_files:
580
- return None
581
-
582
- # 在find_files函数中替换natsorted调用
583
- matched_files = sorted(matched_files, key=natural_sort_key)
584
-
585
- # 根据mode参数决定返回的内容
586
- if mode == "file":
587
- # 只返回文件名
588
- result = [os.path.basename(file) for file in matched_files]
589
- else: # 默认为'path'
590
- # 返回文件的绝对路径
591
- result = [os.path.abspath(file) for file in matched_files]
592
-
593
- return result
594
-
595
-
596
- def file_size(file_path, unit="KB"):
597
- # 检查文件是否存在
598
- if not os.path.exists(file_path):
599
- return "文件不存在"
600
-
601
- # 获取文件大小(字节)
602
- file_size = os.path.getsize(file_path)
603
-
604
- # 单位转换字典
605
- unit_dict = {"PB": 1024**5, "TB": 1024**4, "GB": 1024**3, "MB": 1024**2, "KB": 1024}
606
-
607
- # 检查传入的单位是否合法
608
- if unit not in unit_dict:
609
- return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
610
-
611
- # 转换文件大小到指定单位
612
- converted_size = file_size / unit_dict[unit]
613
-
614
- return converted_size
615
-
616
-
617
- # ** 计算文件夹下指定相关文件的平均大小
618
- def mean_size(parent_path, fname):
619
- flist = find_file(parent_path, fname)
620
- if flist:
621
- size_list = [file_size(f) for f in flist if file_size(f) != 0]
622
- else:
623
- size_list = []
624
- if size_list:
625
- min_size, max_size = min(size_list), max(size_list)
626
- mean_size = sum(size_list) / len(size_list)
627
- else:
628
- mean_size, min_size, max_size = 0, 0, 0
629
- return mean_size, min_size, max_size
630
-
631
-
632
- def check_existing_file(file_full_path, min_size):
566
+ def _check_existing_file(file_full_path, avg_size):
633
567
  if os.path.exists(file_full_path):
634
568
  print(f"[bold #FFA54F]{file_full_path} exists")
635
569
  fsize = file_size(file_full_path)
636
- if min_size:
637
- if fsize < min_size:
638
- print(f"[bold #FFA54F]{file_full_path} ({fsize:.2f} KB) may be incomplete")
639
- # clear_existing_file(file_full_path)
640
- return False
641
- else:
570
+ delta_size_ratio = (fsize - avg_size) / avg_size
571
+ if abs(delta_size_ratio) > 0.025:
572
+ if check_nc(file_full_path):
573
+ # print(f"File size is abnormal but can be opened normally, file size: {fsize:.2f} KB")
642
574
  return True
643
- if fsize < 5:
644
- print(f"[bold #FFA54F]{file_full_path} ({fsize:.2f} KB) may be incomplete")
645
- # clear_existing_file(file_full_path)
646
- return False
575
+ else:
576
+ print(f"File size is abnormal and cannot be opened, {file_full_path}: {fsize:.2f} KB")
577
+ return False
647
578
  else:
648
579
  return True
649
580
  else:
650
- # print(f'{file_full_path} does not exist')
651
581
  return False
652
582
 
653
583
 
654
- def get_ua():
655
- current_dir = os.path.dirname(os.path.abspath(__file__))
656
- ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
657
-
658
- with open(ua_file_txt, "r") as f:
659
- ua_list = f.readlines()
660
- # 去掉换行符和空行
661
- ua_list = [line.strip() for line in ua_list if line.strip()]
662
-
663
- # if current_platform == 'Linux':
664
- # ua_list = [line for line in ua_list if 'Linux' in line]
665
-
666
- return random.choice(ua_list)
667
-
584
+ def _get_mean_size30(store_path, same_file):
585
+ if same_file not in fsize_dict.keys():
586
+ # print(f'Same file name: {same_file}')
587
+ fsize_dict[same_file] = {"size": 0, "count": 0}
668
588
 
669
- def get_proxy_file():
670
- # 获取当前脚本的绝对路径
671
- script_dir = os.path.dirname(os.path.abspath(__file__))
672
- # 构建ip.txt的绝对路径
673
- ip_file_txt = os.path.join(script_dir, "ip.txt")
674
- with open(ip_file_txt, "r") as f:
675
- ips = f.readlines()
676
- ip_list = []
677
- for ip in ips:
678
- ip_list.append(ip.strip())
679
- choose_ip = random.choice(ip_list)
680
- proxies = {"http": "http://" + choose_ip, "https": "https://" + choose_ip}
681
- # print(f'Using proxy: {proxies}')
682
- return proxies
683
-
684
-
685
- def scrape_and_categorize_proxies(choose_protocol="http"):
686
- url = "https://topproxylinks.com/"
687
- # 发送HTTP请求获取网页内容
688
- response = requests.get(url)
689
- # 使用BeautifulSoup解析网页
690
- soup = BeautifulSoup(response.text, "html.parser")
691
-
692
- # 初始化字典来存储不同协议的代理
693
- proxies_dict = {"http": [], "socks4": [], "socks5": []}
589
+ if fsize_dict[same_file]["count"] < 30 or fsize_dict[same_file]["size"] == 0:
590
+ # 更新30次文件最小值,后续认为可以代表所有文件,不再更新占用时间
591
+ fsize_mean = mean_size(store_path, same_file, max_num=30)
592
+ set_min_size = fsize_mean * 0.95
593
+ fsize_dict[same_file]["size"] = set_min_size
594
+ fsize_dict[same_file]["count"] += 1
595
+ else:
596
+ set_min_size = fsize_dict[same_file]["size"]
597
+ return set_min_size
694
598
 
695
- # 查找表格中的所有行
696
- tbody = soup.find("tbody")
697
599
 
698
- if tbody:
699
- for row in tbody.find_all("tr"):
700
- # 提取协议、代理和国家的单元格
701
- cells = row.find_all("td")
702
- protocol = cells[0].text.strip().lower()
703
- proxy = cells[1].text.strip()
600
+ def _get_mean_size_move(same_file, current_file):
601
+ # 获取锁
602
+ with fsize_dict_lock: # 全局锁,确保同一时间只能有一个线程访问
603
+ # 初始化字典中的值,如果文件不在字典中
604
+ if same_file not in fsize_dict.keys():
605
+ fsize_dict[same_file] = {"size_list": [], "mean_size": 1.0}
704
606
 
705
- # 根据协议分类存储代理
706
- if protocol in proxies_dict:
707
- proxies_dict[protocol].append(proxy)
607
+ tolerance_ratio = 0.025 # 容忍的阈值比例
608
+ current_file_size = file_size(current_file)
708
609
 
709
- if choose_protocol in proxies_dict:
710
- proxies_list = proxies_dict[choose_protocol]
711
- else:
712
- proxies_list = proxies_dict["http"]
610
+ # 如果列表不为空,则计算平均值,否则保持为1
611
+ if fsize_dict[same_file]["size_list"]:
612
+ fsize_dict[same_file]["mean_size"] = sum(fsize_dict[same_file]["size_list"]) / len(fsize_dict[same_file]["size_list"])
613
+ fsize_dict[same_file]["mean_size"] = max(fsize_dict[same_file]["mean_size"], 1.0)
614
+ else:
615
+ fsize_dict[same_file]["mean_size"] = 1.0
713
616
 
714
- return proxies_list
617
+ size_difference_ratio = (current_file_size - fsize_dict[same_file]["mean_size"]) / fsize_dict[same_file]["mean_size"]
715
618
 
619
+ if abs(size_difference_ratio) > tolerance_ratio:
620
+ if check_nc(current_file):
621
+ # print(f"File size is abnormal but can be opened normally, file size: {current_file_size:.2f} KB")
622
+ # 文件可以正常打开,但大小异常,保留当前文件大小
623
+ fsize_dict[same_file]["size_list"] = [current_file_size]
624
+ fsize_dict[same_file]["mean_size"] = current_file_size
625
+ else:
626
+ _clear_existing_file(current_file)
627
+ print(f"File size is abnormal, may need to be downloaded again, file size: {current_file_size:.2f} KB")
628
+ else:
629
+ # 添加当前文件大小到列表中,并更新计数
630
+ fsize_dict[same_file]["size_list"].append(current_file_size)
716
631
 
717
- def get_proxy():
718
- ip_list = scrape_and_categorize_proxies(choose_protocol="http")
719
- choose_ip = random.choice(ip_list)
720
- proxies = {"http": f"http://{choose_ip}", "https": f"http://{choose_ip}"}
721
- print(f"Using proxy: {proxies}")
722
- return proxies
632
+ # 返回调整后的平均值,这里根据您的需求,返回的是添加新值之前的平均值
633
+ return fsize_dict[same_file]["mean_size"]
723
634
 
724
635
 
725
- def download_file(target_url, store_path, file_name, check=False):
636
+ def _download_file(target_url, store_path, file_name, check=False):
726
637
  # Check if the file exists
727
638
  fname = Path(store_path) / file_name
728
639
  file_name_split = file_name.split("_")
729
- same_file = f'{file_name_split[0]}_{file_name_split[1]}*nc'
730
- fsize_mean, fsize_min, fsize_max = mean_size(store_path, same_file)
731
- set_min_size = fsize_mean - 0.5 * (fsize_max - fsize_min)
732
- set_min_size = set_min_size if set_min_size > 0 else fsize_min
640
+ file_name_split = file_name_split[:-1]
641
+ # same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
642
+ same_file = "_".join(file_name_split) + "*nc"
643
+
733
644
  if check:
734
- if check_existing_file(fname, set_min_size*0.8):
645
+ if same_file not in fsize_dict.keys(): # 对第一个文件单独进行检查,因为没有大小可以对比
646
+ check_nc(fname,if_delete=True)
647
+
648
+ # set_min_size = _get_mean_size30(store_path, same_file) # 原方案,只30次取平均值;若遇变化,无法判断
649
+ get_mean_size = _get_mean_size_move(same_file, fname)
650
+
651
+ if _check_existing_file(fname, get_mean_size):
735
652
  count_dict["skip"] += 1
736
653
  return
737
- clear_existing_file(fname)
654
+ _clear_existing_file(fname)
738
655
 
739
656
  # -----------------------------------------------
740
657
  print(f"[bold #f0f6d0]Requesting {file_name}...")
@@ -744,13 +661,11 @@ def download_file(target_url, store_path, file_name, check=False):
744
661
  request_times = 0
745
662
 
746
663
  def calculate_wait_time(time_str, target_url):
747
- import re
748
-
749
664
  # 定义正则表达式,匹配YYYYMMDDHH格式的时间
750
665
  time_pattern = r"\d{10}"
751
666
 
752
667
  # 定义两个字符串
753
- # str1 = 'HYCOM_water_u_2018010100_2018010112.nc'
668
+ # str1 = 'HYCOM_water_u_2018010100-2018010112.nc'
754
669
  # str2 = 'HYCOM_water_u_2018010100.nc'
755
670
 
756
671
  # 使用正则表达式查找时间
@@ -796,13 +711,8 @@ def download_file(target_url, store_path, file_name, check=False):
796
711
  with open(filename, 'wb') as f:
797
712
  f.write(response.content) """
798
713
 
799
- if find_proxy:
800
- proxies = get_proxy()
801
- response = s.get(target_url, headers=headers, proxies=proxies, stream=True, timeout=random.randint(5, max_timeout))
802
- else:
803
- response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout)) # 启用流式传输
714
+ response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout)) # 启用流式传输
804
715
  response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
805
-
806
716
  # 保存文件
807
717
  with open(fname, "wb") as f:
808
718
  print(f"[bold #96cbd7]Downloading {file_name}...")
@@ -834,7 +744,7 @@ def download_file(target_url, store_path, file_name, check=False):
834
744
  request_times += 1
835
745
 
836
746
 
837
- def check_hour_is_valid(ymdh_str):
747
+ def _check_hour_is_valid(ymdh_str):
838
748
  # hour should be 00, 03, 06, 09, 12, 15, 18, 21
839
749
  hh = int(str(ymdh_str[-2:]))
840
750
  if hh in [0, 3, 6, 9, 12, 15, 18, 21]:
@@ -843,9 +753,9 @@ def check_hour_is_valid(ymdh_str):
843
753
  return False
844
754
 
845
755
 
846
- def check_dataset_version(dataset_name, version_name, download_time, download_time_end=None):
756
+ def _check_dataset_version(dataset_name, version_name, download_time, download_time_end=None):
847
757
  if dataset_name is not None and version_name is not None:
848
- just_ensure = ensure_time_in_specific_dataset_and_version(dataset_name, version_name, download_time, download_time_end)
758
+ just_ensure = _ensure_time_in_specific_dataset_and_version(dataset_name, version_name, download_time, download_time_end)
849
759
  if just_ensure:
850
760
  return dataset_name, version_name
851
761
  else:
@@ -858,7 +768,7 @@ def check_dataset_version(dataset_name, version_name, download_time, download_ti
858
768
  download_time_str = download_time_str + "00"
859
769
 
860
770
  # 检查小时是否有效(如果需要的话)
861
- if download_time_end is None and not check_hour_is_valid(download_time_str):
771
+ if download_time_end is None and not _check_hour_is_valid(download_time_str):
862
772
  print("Please ensure the hour is 00, 03, 06, 09, 12, 15, 18, 21")
863
773
  raise ValueError("The hour is invalid")
864
774
 
@@ -866,18 +776,18 @@ def check_dataset_version(dataset_name, version_name, download_time, download_ti
866
776
  if download_time_end is not None:
867
777
  if len(str(download_time_end)) == 8:
868
778
  download_time_end = str(download_time_end) + "21"
869
- have_data = check_time_in_dataset_and_version(download_time_str, download_time_end)
779
+ have_data = _check_time_in_dataset_and_version(download_time_str, download_time_end)
870
780
  if have_data:
871
- return direct_choose_dataset_and_version(download_time_str, download_time_end)
781
+ return _direct_choose_dataset_and_version(download_time_str, download_time_end)
872
782
  else:
873
- have_data = check_time_in_dataset_and_version(download_time_str)
783
+ have_data = _check_time_in_dataset_and_version(download_time_str)
874
784
  if have_data:
875
- return direct_choose_dataset_and_version(download_time_str)
785
+ return _direct_choose_dataset_and_version(download_time_str)
876
786
 
877
787
  return None, None
878
788
 
879
789
 
880
- def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end=None):
790
+ def _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end=None):
881
791
  # year_str = str(download_time)[:4]
882
792
  ymdh_str = str(download_time)
883
793
  if depth is not None and level_num is not None:
@@ -893,19 +803,19 @@ def get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max
893
803
  else:
894
804
  # print("Full depth or full level data will be downloaded...")
895
805
  which_mode = "full"
896
- query_dict = get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, download_time_end, which_mode, depth, level_num)
897
- submit_url = get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict)
806
+ query_dict = _get_query_dict(var, lon_min, lon_max, lat_min, lat_max, download_time, download_time_end, which_mode, depth, level_num)
807
+ submit_url = _get_submit_url(dataset_name, version_name, var, ymdh_str, query_dict)
898
808
  return submit_url
899
809
 
900
810
 
901
- def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time="2024083100", download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
811
+ def _prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, download_time="2024083100", download_time_end=None, depth=None, level_num=None, store_path=None, dataset_name=None, version_name=None, check=False):
902
812
  print("[bold #ecdbfe]-" * 160)
903
813
  download_time = str(download_time)
904
814
  if download_time_end is not None:
905
815
  download_time_end = str(download_time_end)
906
- dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time, download_time_end)
816
+ dataset_name, version_name = _check_dataset_version(dataset_name, version_name, download_time, download_time_end)
907
817
  else:
908
- dataset_name, version_name = check_dataset_version(dataset_name, version_name, download_time)
818
+ dataset_name, version_name = _check_dataset_version(dataset_name, version_name, download_time)
909
819
  if dataset_name is None and version_name is None:
910
820
  count_dict["no_data"] += 1
911
821
  if download_time_end is not None:
@@ -920,11 +830,11 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
920
830
  if isinstance(var, list):
921
831
  if len(var) == 1:
922
832
  var = var[0]
923
- submit_url = get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
833
+ submit_url = _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
924
834
  file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
925
835
  if download_time_end is not None:
926
- file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}_{download_time_end}.nc"
927
- download_file(submit_url, store_path, file_name, check)
836
+ file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}-{download_time_end}.nc" # 这里时间不能用下划线,不然后续处理查找同一变量文件会出问题
837
+ _download_file(submit_url, store_path, file_name, check)
928
838
  else:
929
839
  if download_time < "2024081012":
930
840
  varlist = [_ for _ in var]
@@ -937,7 +847,7 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
937
847
  continue
938
848
 
939
849
  var = current_group[0]
940
- submit_url = get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
850
+ submit_url = _get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
941
851
  file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
942
852
  old_str = f'var={variable_info[var]["var_name"]}'
943
853
  new_str = f'var={variable_info[var]["var_name"]}'
@@ -948,18 +858,18 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
948
858
  # file_name = f'HYCOM_{'-'.join([variable_info[v]["var_name"] for v in current_group])}_{download_time}.nc'
949
859
  file_name = f"HYCOM_{key}_{download_time}.nc"
950
860
  if download_time_end is not None:
951
- file_name = f"HYCOM_{key}_{download_time}_{download_time_end}.nc"
952
- download_file(submit_url, store_path, file_name, check)
861
+ file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc" # 这里时间不能用下划线,不然后续处理查找同一变量文件会出问题
862
+ _download_file(submit_url, store_path, file_name, check)
953
863
  else:
954
864
  for v in var:
955
- submit_url = get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
865
+ submit_url = _get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
956
866
  file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
957
867
  if download_time_end is not None:
958
- file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}_{download_time_end}.nc"
959
- download_file(submit_url, store_path, file_name, check)
868
+ file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}-{download_time_end}.nc"
869
+ _download_file(submit_url, store_path, file_name, check)
960
870
 
961
871
 
962
- def convert_full_name_to_short_name(full_name):
872
+ def _convert_full_name_to_short_name(full_name):
963
873
  for var, info in variable_info.items():
964
874
  if full_name == info["var_name"] or full_name == info["standard_name"] or full_name == var:
965
875
  return var
@@ -969,7 +879,7 @@ def convert_full_name_to_short_name(full_name):
969
879
  return False
970
880
 
971
881
 
972
- def download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check):
882
+ def _download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check):
973
883
  """
974
884
  # 并行下载任务
975
885
  # 这个函数是为了并行下载而设置的,是必须的,直接调用direct_download并行下载会出问题
@@ -980,10 +890,10 @@ def download_task(var, time_str, time_str_end, lon_min, lon_max, lat_min, lat_ma
980
890
  因此,即使多个任务同时执行,也不会出现数据交互错乱的问题。
981
891
  """
982
892
 
983
- prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
893
+ _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
984
894
 
985
895
 
986
- def done_callback(future, progress, task, total, counter_lock):
896
+ def _done_callback(future, progress, task, total, counter_lock):
987
897
  """
988
898
  # 并行下载任务的回调函数
989
899
  # 这个函数是为了并行下载而设置的,是必须的,直接调用direct_download并行下载会出问题
@@ -999,7 +909,7 @@ def done_callback(future, progress, task, total, counter_lock):
999
909
  progress.update(task, advance=1, description=f"[cyan]Downloading... {parallel_counter}/{total}")
1000
910
 
1001
911
 
1002
- def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
912
+ def _download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
1003
913
  """
1004
914
  Description:
1005
915
  Download the data of single time or a series of time
@@ -1028,7 +938,7 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
1028
938
  parallel_counter = 0
1029
939
  counter_lock = Lock() # 创建一个锁,线程安全的计数器
1030
940
  if ymdh_time_s == ymdh_time_e:
1031
- prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, check)
941
+ _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, None, depth, level, store_path, dataset_name, version_name, check)
1032
942
  elif int(ymdh_time_s) < int(ymdh_time_e):
1033
943
  print("Downloading a series of files...")
1034
944
  time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3, "hour")
@@ -1038,16 +948,16 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
1038
948
  if num_workers is None or num_workers <= 1:
1039
949
  # 串行方式
1040
950
  for i, time_str in enumerate(time_list):
1041
- prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, check)
951
+ _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, None, depth, level, store_path, dataset_name, version_name, check)
1042
952
  progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")
1043
953
  else:
1044
954
  # 并行方式
1045
955
  with ThreadPoolExecutor(max_workers=num_workers) as executor:
1046
- futures = [executor.submit(download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
956
+ futures = [executor.submit(_download_task, var, time_str, None, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
1047
957
  """ for i, future in enumerate(futures):
1048
958
  future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{len(time_list)}")) """
1049
959
  for feature in as_completed(futures):
1050
- done_callback(feature, progress, task, len(time_list), counter_lock)
960
+ _done_callback(feature, progress, task, len(time_list), counter_lock)
1051
961
  else:
1052
962
  new_time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3 * ftimes, "hour")
1053
963
  total_num = len(new_time_list)
@@ -1056,16 +966,16 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
1056
966
  for i, time_str in enumerate(new_time_list):
1057
967
  time_str_end_index = int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))
1058
968
  time_str_end = time_list[time_str_end_index]
1059
- prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
969
+ _prepare_url_to_download(var, lon_min, lon_max, lat_min, lat_max, time_str, time_str_end, depth, level, store_path, dataset_name, version_name, check)
1060
970
  progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")
1061
971
  else:
1062
972
  # 并行方式
1063
973
  with ThreadPoolExecutor(max_workers=num_workers) as executor:
1064
- futures = [executor.submit(download_task, var, new_time_list[i], time_list[int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
974
+ futures = [executor.submit(_download_task, var, new_time_list[i], time_list[int(min(len(time_list) - 1, int(i * ftimes + ftimes - 1)))], lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for i in range(total_num)]
1065
975
  """ for i, future in enumerate(futures):
1066
976
  future.add_done_callback(lambda _: progress.update(task, advance=1, description=f"[cyan]Downloading... {i+1}/{total_num}")) """
1067
977
  for feature in as_completed(futures):
1068
- done_callback(feature, progress, task, len(time_list), counter_lock)
978
+ _done_callback(feature, progress, task, len(time_list), counter_lock)
1069
979
  else:
1070
980
  print("Please ensure the time_s is no more than time_e")
1071
981
 
@@ -1073,28 +983,30 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
1073
983
  def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
1074
984
  """
1075
985
  Description:
1076
- Download the data of single time or a series of time
986
+ Download the data of single time or a series of time
1077
987
 
1078
988
  Parameters:
1079
- var: str or list, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
1080
- time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
1081
- time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21; default is None, if not set, the data of single time will be downloaded; or same as time_s, the data of single time will be downloaded
1082
- lon_min: float, the minimum longitude, default is 0
1083
- lon_max: float, the maximum longitude, default is 359.92
1084
- lat_min: float, the minimum latitude, default is -80
1085
- lat_max: float, the maximum latitude, default is 90
1086
- depth: float, the depth, default is None, if you wanna get the data of single depth, you can set the depth, suggest to set the depth in [0, 5000]
1087
- level: int, the level number, default is None, if you wanna get the data of single level, you can set the level, suggest to set the level in [1, 40]
1088
- store_path: str, the path to store the data, default is None, if not set, the data will be stored in the current working directory
1089
- dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08', if not set, the dataset will be chosen according to the download_time
1090
- version_name: str, the version name, default is None, example: '53.X', '56.3', if not set, the version will be chosen according to the download_time
1091
- num_workers: int, the number of workers, default is None, if not set, the number of workers will be 1; suggest not to set the number of workers too large
1092
- check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
1093
- ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
989
+ var: str or list, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
990
+ time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
991
+ time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21; default is None, if not set, the data of single time will be downloaded; or same as time_s, the data of single time will be downloaded
992
+ lon_min: float, the minimum longitude, default is 0
993
+ lon_max: float, the maximum longitude, default is 359.92
994
+ lat_min: float, the minimum latitude, default is -80
995
+ lat_max: float, the maximum latitude, default is 90
996
+ depth: float, the depth, default is None, if you wanna get the data of single depth, you can set the depth, suggest to set the depth in [0, 5000]
997
+ level: int, the level number, default is None, if you wanna get the data of single level, you can set the level, suggest to set the level in [1, 40]
998
+ store_path: str, the path to store the data, default is None, if not set, the data will be stored in the current working directory
999
+ dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08', if not set, the dataset will be chosen according to the download_time
1000
+ version_name: str, the version name, default is None, example: '53.X', '56.3', if not set, the version will be chosen according to the download_time
1001
+ num_workers: int, the number of workers, default is None, if not set, the number of workers will be 1; suggest not to set the number of workers too large
1002
+ check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
1003
+ ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
1094
1004
 
1095
1005
  Returns:
1096
- None
1006
+ None
1097
1007
  """
1008
+ _get_initial_data()
1009
+
1098
1010
  # 打印信息并处理数据集和版本名称
1099
1011
  if dataset_name is None and version_name is None:
1100
1012
  print("The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.")
@@ -1111,11 +1023,11 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
1111
1023
 
1112
1024
  if isinstance(var, list):
1113
1025
  if len(var) == 1:
1114
- var = convert_full_name_to_short_name(var[0])
1026
+ var = _convert_full_name_to_short_name(var[0])
1115
1027
  else:
1116
- var = [convert_full_name_to_short_name(v) for v in var]
1028
+ var = [_convert_full_name_to_short_name(v) for v in var]
1117
1029
  elif isinstance(var, str):
1118
- var = convert_full_name_to_short_name(var)
1030
+ var = _convert_full_name_to_short_name(var)
1119
1031
  else:
1120
1032
  raise ValueError("The var is invalid")
1121
1033
  if var is False:
@@ -1136,8 +1048,8 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
1136
1048
  os.makedirs(str(store_path), exist_ok=True)
1137
1049
 
1138
1050
  if num_workers is not None:
1139
- num_workers = max(min(num_workers, 10), 1)
1140
-
1051
+ num_workers = max(min(num_workers, 10), 1) # 暂时不限制最大值,再检查的时候可以多开一些线程
1052
+ # num_workers = int(max(num_workers, 1))
1141
1053
  time_s = str(time_s)
1142
1054
  if len(time_s) == 8:
1143
1055
  time_s += "00"
@@ -1154,10 +1066,13 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
1154
1066
  """ global current_platform
1155
1067
  current_platform = platform.system() """
1156
1068
 
1157
- global find_proxy
1158
- find_proxy = False
1069
+ global fsize_dict
1070
+ fsize_dict = {}
1071
+
1072
+ global fsize_dict_lock
1073
+ fsize_dict_lock = Lock()
1159
1074
 
1160
- download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
1075
+ _download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
1161
1076
 
1162
1077
  count_dict["total"] = count_dict["success"] + count_dict["fail"] + count_dict["skip"] + count_dict["no_data"]
1163
1078
 
@@ -1225,8 +1140,7 @@ def how_to_use():
1225
1140
 
1226
1141
 
1227
1142
  if __name__ == "__main__":
1228
- # help(hycom3h.download)
1229
- time_s, time_e = "2024081012", "2024081115"
1143
+ time_s, time_e = "2024101012", "2024101018"
1230
1144
  merge_name = f"{time_s}_{time_e}" # 合并后的文件名
1231
1145
  root_path = r"G:\Data\HYCOM\3hourly_test"
1232
1146
  location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
@@ -1256,7 +1170,7 @@ if __name__ == "__main__":
1256
1170
  download_switch, single_var = True, False
1257
1171
  combine_switch = False
1258
1172
  copy_switch, copy_dir = False, r"G:\Data\HYCOM\3hourly"
1259
-
1173
+
1260
1174
  # draw_time_range(pic_save_folder=r'I:\Delete')
1261
1175
 
1262
1176
  if download_switch:
@@ -1288,6 +1202,6 @@ if __name__ == "__main__":
1288
1202
  merge_path_name = Path(root_path)/f'HYCOM_{fname}_{merge_name}.nc'
1289
1203
  if combine_switch:
1290
1204
  # 这里的var_name必须是官方变量名,不能再是简写了
1291
- merge5nc(file_list, var_name, 'time', merge_path_name)
1205
+ merge(file_list, var_name, 'time', merge_path_name)
1292
1206
  if copy_switch:
1293
1207
  copy_file(merge_path_name, copy_dir) """