oafuncs 0.0.88__py2.py3-none-any.whl → 0.0.90__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,9 +2,9 @@
2
2
  # coding=utf-8
3
3
  """
4
4
  Author: Liu Kun && 16031215@qq.com
5
- Date: 2024-11-01 10:31:09
5
+ Date: 2024-11-02 11:07:49
6
6
  LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2024-12-08 10:20:45
7
+ LastEditTime: 2025-01-07 16:31:36
8
8
  FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
9
9
  Description:
10
10
  EditPlatform: vscode
@@ -16,182 +16,188 @@ Python Version: 3.12
16
16
  import datetime
17
17
  import os
18
18
  import random
19
+ import re
19
20
  import time
20
21
  import warnings
21
22
  from concurrent.futures import ThreadPoolExecutor, as_completed
22
23
  from pathlib import Path
23
24
  from threading import Lock
24
- import re
25
25
 
26
26
  import matplotlib.pyplot as plt
27
27
  import numpy as np
28
28
  import pandas as pd
29
29
  import requests
30
- from bs4 import BeautifulSoup
31
30
  from rich import print
32
31
  from rich.progress import Progress
33
- import glob
32
+
33
+ from oafuncs.oa_down.user_agent import get_ua
34
+ from oafuncs.oa_file import file_size, mean_size
34
35
 
35
36
  warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
36
37
 
37
38
  __all__ = ["draw_time_range", "download", "how_to_use", "get_time_list"]
38
39
 
39
- # time resolution
40
- data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
41
-
42
- # hourly data
43
- # dataset: GLBv0.08, GLBu0.08, GLBy0.08
44
- data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}, "ESPC_D": {}}
45
-
46
- # version
47
- # version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
48
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
49
- # version of GLBu0.08: 93.0
50
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
51
- # version of GLBy0.08: 93.0
52
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
53
- # version of ESPC_D: V02
54
- data_info["hourly"]["dataset"]["ESPC_D"]["version"] = {"V02": {}}
55
-
56
- # info details
57
- # time range
58
- # GLBv0.08
59
- # 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
60
- # 目前只纠正了GLBv0.08 93.0的时间范围,具体到小时了
61
- # 其他数据集的时刻暂时默认为00起,21止
62
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "1994010112", "time_end": "2015123109"}
63
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "2014070112", "time_end": "2016093009"}
64
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "2016050112", "time_end": "2017020109"}
65
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "2017020112", "time_end": "2017060109"}
66
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "2017060112", "time_end": "2017100109"}
67
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "2017100112", "time_end": "2018032009"}
68
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
69
- # GLBu0.08
70
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018091912", "time_end": "2018120909"}
71
- # GLBy0.08
72
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018120412", "time_end": "2024090509"}
73
- # ESPC-D
74
- data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["time_range"] = {"time_start": "2024081012", "time_end": "2030010100"}
75
-
76
- # variable
77
- variable_info = {
78
- "u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
79
- "v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
80
- "temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
81
- "salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
82
- "ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
83
- "u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
84
- "v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
85
- "temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
86
- "salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
87
- }
88
-
89
- # classification method
90
- # year_different: the data of different years is stored in different files
91
- # same_path: the data of different years is stored in the same file
92
- # var_different: the data of different variables is stored in different files
93
- # var_year_different: the data of different variables and years is stored in different files
94
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
95
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
96
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
97
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
98
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
99
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
100
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
101
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
102
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
103
- data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["classification"] = "single_var_year_different"
104
-
105
- # download info
106
- # base url
107
- # GLBv0.08 53.X
108
- url_53x = {}
109
- for y_53x in range(1994, 2016):
110
- # r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
111
- url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
112
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
113
- # GLBv0.08 56.3
114
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
115
- # GLBv0.08 57.2
116
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
117
- # GLBv0.08 92.8
118
- url_928 = {
119
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
120
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
121
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
122
- }
123
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
124
- # GLBv0.08 57.7
125
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
126
- # GLBv0.08 92.9
127
- url_929 = {
128
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
129
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
130
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
131
- }
132
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
133
- # GLBv0.08 93.0
134
- url_930_v = {
135
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
136
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
137
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
138
- }
139
- data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
140
- # GLBu0.08 93.0
141
- url_930_u = {
142
- "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
143
- "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
144
- "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
145
- }
146
- data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
147
- # GLBy0.08 93.0
148
- uv3z_930_y = {}
149
- ts3z_930_y = {}
150
- ssh_930_y = {}
151
- for y_930_y in range(2018, 2025):
152
- uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
153
- ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
154
- ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
155
- # GLBy0.08 93.0 data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
156
- url_930_y = {
157
- "uv3z": uv3z_930_y,
158
- "ts3z": ts3z_930_y,
159
- "ssh": ssh_930_y,
160
- }
161
- data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
162
- # ESPC-D-V02
163
- u3z_espc_d_v02_y = {}
164
- v3z_espc_d_v02_y = {}
165
- t3z_espc_d_v02_y = {}
166
- s3z_espc_d_v02_y = {}
167
- ssh_espc_d_v02_y = {}
168
- for y_espc_d_v02 in range(2024, 2030):
169
- u3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/u3z/{y_espc_d_v02}?"
170
- v3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/v3z/{y_espc_d_v02}?"
171
- t3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/t3z/{y_espc_d_v02}?"
172
- s3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/s3z/{y_espc_d_v02}?"
173
- ssh_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/ssh/{y_espc_d_v02}?"
174
- url_espc_d_v02_y = {
175
- "u3z": u3z_espc_d_v02_y,
176
- "v3z": v3z_espc_d_v02_y,
177
- "t3z": t3z_espc_d_v02_y,
178
- "s3z": s3z_espc_d_v02_y,
179
- "ssh": ssh_espc_d_v02_y,
180
- }
181
- data_info['hourly']['dataset']['ESPC_D']['version']['V02']['url'] = url_espc_d_v02_y
182
-
183
- var_group = {
184
- "uv3z": ["u", "v", "u_b", "v_b"],
185
- "ts3z": ["temp", "salt", "temp_b", "salt_b"],
186
- "ssh": ["ssh"],
187
- }
188
- single_var_group = {
189
- "u3z": ["u"],
190
- "v3z": ["v"],
191
- "t3z": ["temp"],
192
- "s3z": ["salt"],
193
- "ssh": ["ssh"],
194
- }
40
+
41
+ def get_initial_data():
42
+ global variable_info, data_info, var_group, single_var_group
43
+ # ----------------------------------------------
44
+ # variable
45
+ variable_info = {
46
+ "u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
47
+ "v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
48
+ "temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
49
+ "salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
50
+ "ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
51
+ "u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
52
+ "v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
53
+ "temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
54
+ "salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
55
+ }
56
+ # ----------------------------------------------
57
+ # time resolution
58
+ data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
59
+
60
+ # hourly data
61
+ # dataset: GLBv0.08, GLBu0.08, GLBy0.08
62
+ data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}, "ESPC_D": {}}
63
+
64
+ # version
65
+ # version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
66
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
67
+ # version of GLBu0.08: 93.0
68
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
69
+ # version of GLBy0.08: 93.0
70
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
71
+ # version of ESPC_D: V02
72
+ data_info["hourly"]["dataset"]["ESPC_D"]["version"] = {"V02": {}}
73
+
74
+ # info details
75
+ # time range
76
+ # GLBv0.08
77
+ # 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
78
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "1994010112", "time_end": "2015123109"}
79
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "2014070112", "time_end": "2016093009"}
80
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "2016050112", "time_end": "2017020109"}
81
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "2017020112", "time_end": "2017060109"}
82
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "2017060112", "time_end": "2017100109"}
83
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "2017100112", "time_end": "2018032009"}
84
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
85
+ # GLBu0.08
86
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018091912", "time_end": "2018120909"}
87
+ # GLBy0.08
88
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018120412", "time_end": "2024090509"}
89
+ # ESPC-D
90
+ data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["time_range"] = {"time_start": "2024081012", "time_end": "2030010100"}
91
+
92
+ # classification method
93
+ # year_different: the data of different years is stored in different files
94
+ # same_path: the data of different years is stored in the same file
95
+ # var_different: the data of different variables is stored in different files
96
+ # var_year_different: the data of different variables and years is stored in different files
97
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
98
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
99
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
100
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
101
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
102
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
103
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
104
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
105
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
106
+ data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["classification"] = "single_var_year_different"
107
+
108
+ # download info
109
+ # base url
110
+ # GLBv0.08 53.X
111
+ url_53x = {}
112
+ for y_53x in range(1994, 2016):
113
+ # r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
114
+ url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
115
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
116
+ # GLBv0.08 56.3
117
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
118
+ # GLBv0.08 57.2
119
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
120
+ # GLBv0.08 92.8
121
+ url_928 = {
122
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
123
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
124
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
125
+ }
126
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
127
+ # GLBv0.08 57.7
128
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
129
+ # GLBv0.08 92.9
130
+ url_929 = {
131
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
132
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
133
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
134
+ }
135
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
136
+ # GLBv0.08 93.0
137
+ url_930_v = {
138
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
139
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
140
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
141
+ }
142
+ data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
143
+ # GLBu0.08 93.0
144
+ url_930_u = {
145
+ "uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
146
+ "ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
147
+ "ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
148
+ }
149
+ data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
150
+ # GLBy0.08 93.0
151
+ uv3z_930_y = {}
152
+ ts3z_930_y = {}
153
+ ssh_930_y = {}
154
+ for y_930_y in range(2018, 2025):
155
+ uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
156
+ ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
157
+ ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
158
+ # GLBy0.08 93.0 data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
159
+ url_930_y = {
160
+ "uv3z": uv3z_930_y,
161
+ "ts3z": ts3z_930_y,
162
+ "ssh": ssh_930_y,
163
+ }
164
+ data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
165
+ # ESPC-D-V02
166
+ u3z_espc_d_v02_y = {}
167
+ v3z_espc_d_v02_y = {}
168
+ t3z_espc_d_v02_y = {}
169
+ s3z_espc_d_v02_y = {}
170
+ ssh_espc_d_v02_y = {}
171
+ for y_espc_d_v02 in range(2024, 2030):
172
+ u3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/u3z/{y_espc_d_v02}?"
173
+ v3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/v3z/{y_espc_d_v02}?"
174
+ t3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/t3z/{y_espc_d_v02}?"
175
+ s3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/s3z/{y_espc_d_v02}?"
176
+ ssh_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/ssh/{y_espc_d_v02}?"
177
+ url_espc_d_v02_y = {
178
+ "u3z": u3z_espc_d_v02_y,
179
+ "v3z": v3z_espc_d_v02_y,
180
+ "t3z": t3z_espc_d_v02_y,
181
+ "s3z": s3z_espc_d_v02_y,
182
+ "ssh": ssh_espc_d_v02_y,
183
+ }
184
+ data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["url"] = url_espc_d_v02_y
185
+ # ----------------------------------------------
186
+ var_group = {
187
+ "uv3z": ["u", "v", "u_b", "v_b"],
188
+ "ts3z": ["temp", "salt", "temp_b", "salt_b"],
189
+ "ssh": ["ssh"],
190
+ }
191
+ # ----------------------------------------------
192
+ single_var_group = {
193
+ "u3z": ["u"],
194
+ "v3z": ["v"],
195
+ "t3z": ["temp"],
196
+ "s3z": ["salt"],
197
+ "ssh": ["ssh"],
198
+ }
199
+
200
+ return variable_info, data_info, var_group, single_var_group
195
201
 
196
202
 
197
203
  def draw_time_range(pic_save_folder=None):
@@ -556,79 +562,6 @@ def clear_existing_file(file_full_path):
556
562
  print(f"{file_full_path} has been removed")
557
563
 
558
564
 
559
- def find_file(parent_path, fname, mode="path"):
560
- """
561
- description:
562
- param {*} parent_path: The parent path where the files are located
563
- param {*} fname: The file name pattern to search for
564
- param {*} mode: 'path' to return the full path of the files, 'file' to return only the file names
565
- return {*} A list of file paths or file names if files are found, None otherwise
566
- """
567
-
568
- def natural_sort_key(s):
569
- """生成一个用于自然排序的键"""
570
- return [int(text) if text.isdigit() else text.lower() for text in re.split("([0-9]+)", s)]
571
-
572
- # 将parent_path和fname结合成完整的搜索路径
573
- search_pattern = os.path.join(str(parent_path), fname)
574
-
575
- # 使用glob模块查找所有匹配的文件
576
- matched_files = glob.glob(search_pattern)
577
-
578
- # 如果没有找到任何文件,则返回False
579
- if not matched_files:
580
- return None
581
-
582
- # 在find_files函数中替换natsorted调用
583
- matched_files = sorted(matched_files, key=natural_sort_key)
584
-
585
- # 根据mode参数决定返回的内容
586
- if mode == "file":
587
- # 只返回文件名
588
- result = [os.path.basename(file) for file in matched_files]
589
- else: # 默认为'path'
590
- # 返回文件的绝对路径
591
- result = [os.path.abspath(file) for file in matched_files]
592
-
593
- return result
594
-
595
-
596
- def file_size(file_path, unit="KB"):
597
- # 检查文件是否存在
598
- if not os.path.exists(file_path):
599
- return "文件不存在"
600
-
601
- # 获取文件大小(字节)
602
- file_size = os.path.getsize(file_path)
603
-
604
- # 单位转换字典
605
- unit_dict = {"PB": 1024**5, "TB": 1024**4, "GB": 1024**3, "MB": 1024**2, "KB": 1024}
606
-
607
- # 检查传入的单位是否合法
608
- if unit not in unit_dict:
609
- return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
610
-
611
- # 转换文件大小到指定单位
612
- converted_size = file_size / unit_dict[unit]
613
-
614
- return converted_size
615
-
616
-
617
- # ** 计算文件夹下指定相关文件的平均大小
618
- def mean_size(parent_path, fname):
619
- flist = find_file(parent_path, fname)
620
- if flist:
621
- size_list = [file_size(f) for f in flist if file_size(f) != 0]
622
- else:
623
- size_list = []
624
- if size_list:
625
- min_size, max_size = min(size_list), max(size_list)
626
- mean_size = sum(size_list) / len(size_list)
627
- else:
628
- mean_size, min_size, max_size = 0, 0, 0
629
- return mean_size, min_size, max_size
630
-
631
-
632
565
  def check_existing_file(file_full_path, min_size):
633
566
  if os.path.exists(file_full_path):
634
567
  print(f"[bold #FFA54F]{file_full_path} exists")
@@ -651,87 +584,28 @@ def check_existing_file(file_full_path, min_size):
651
584
  return False
652
585
 
653
586
 
654
- def get_ua():
655
- current_dir = os.path.dirname(os.path.abspath(__file__))
656
- ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
657
-
658
- with open(ua_file_txt, "r") as f:
659
- ua_list = f.readlines()
660
- # 去掉换行符和空行
661
- ua_list = [line.strip() for line in ua_list if line.strip()]
662
-
663
- # if current_platform == 'Linux':
664
- # ua_list = [line for line in ua_list if 'Linux' in line]
665
-
666
- return random.choice(ua_list)
667
-
668
-
669
- def get_proxy_file():
670
- # 获取当前脚本的绝对路径
671
- script_dir = os.path.dirname(os.path.abspath(__file__))
672
- # 构建ip.txt的绝对路径
673
- ip_file_txt = os.path.join(script_dir, "ip.txt")
674
- with open(ip_file_txt, "r") as f:
675
- ips = f.readlines()
676
- ip_list = []
677
- for ip in ips:
678
- ip_list.append(ip.strip())
679
- choose_ip = random.choice(ip_list)
680
- proxies = {"http": "http://" + choose_ip, "https": "https://" + choose_ip}
681
- # print(f'Using proxy: {proxies}')
682
- return proxies
683
-
684
-
685
- def scrape_and_categorize_proxies(choose_protocol="http"):
686
- url = "https://topproxylinks.com/"
687
- # 发送HTTP请求获取网页内容
688
- response = requests.get(url)
689
- # 使用BeautifulSoup解析网页
690
- soup = BeautifulSoup(response.text, "html.parser")
691
-
692
- # 初始化字典来存储不同协议的代理
693
- proxies_dict = {"http": [], "socks4": [], "socks5": []}
694
-
695
- # 查找表格中的所有行
696
- tbody = soup.find("tbody")
697
-
698
- if tbody:
699
- for row in tbody.find_all("tr"):
700
- # 提取协议、代理和国家的单元格
701
- cells = row.find_all("td")
702
- protocol = cells[0].text.strip().lower()
703
- proxy = cells[1].text.strip()
704
-
705
- # 根据协议分类存储代理
706
- if protocol in proxies_dict:
707
- proxies_dict[protocol].append(proxy)
708
-
709
- if choose_protocol in proxies_dict:
710
- proxies_list = proxies_dict[choose_protocol]
711
- else:
712
- proxies_list = proxies_dict["http"]
713
-
714
- return proxies_list
715
-
716
-
717
- def get_proxy():
718
- ip_list = scrape_and_categorize_proxies(choose_protocol="http")
719
- choose_ip = random.choice(ip_list)
720
- proxies = {"http": f"http://{choose_ip}", "https": f"http://{choose_ip}"}
721
- print(f"Using proxy: {proxies}")
722
- return proxies
723
-
724
-
725
587
  def download_file(target_url, store_path, file_name, check=False):
726
588
  # Check if the file exists
727
589
  fname = Path(store_path) / file_name
728
590
  file_name_split = file_name.split("_")
729
- same_file = f'{file_name_split[0]}_{file_name_split[1]}*nc'
730
- fsize_mean, fsize_min, fsize_max = mean_size(store_path, same_file)
731
- set_min_size = fsize_mean - 0.5 * (fsize_max - fsize_min)
732
- set_min_size = set_min_size if set_min_size > 0 else fsize_min
591
+ file_name_split = file_name_split[:-1]
592
+ # same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
593
+ same_file = "_".join(file_name_split) + "*nc"
594
+
595
+ if same_file not in fsize_dict.keys():
596
+ # print(f'Same file name: {same_file}')
597
+ fsize_dict[same_file] = {"size": 0, "count": 0}
598
+
599
+ if fsize_dict[same_file]["count"] < 30 or fsize_dict[same_file]["size"] == 0:
600
+ # 更新30次文件最小值,后续认为可以代表所有文件,不再更新占用时间
601
+ fsize_mean = mean_size(store_path, same_file, max_num=30)
602
+ set_min_size = fsize_mean * 0.8
603
+ fsize_dict[same_file]["size"] = set_min_size
604
+ fsize_dict[same_file]["count"] += 1
605
+ else:
606
+ set_min_size = fsize_dict[same_file]["size"]
733
607
  if check:
734
- if check_existing_file(fname, set_min_size*0.8):
608
+ if check_existing_file(fname, set_min_size):
735
609
  count_dict["skip"] += 1
736
610
  return
737
611
  clear_existing_file(fname)
@@ -744,13 +618,11 @@ def download_file(target_url, store_path, file_name, check=False):
744
618
  request_times = 0
745
619
 
746
620
  def calculate_wait_time(time_str, target_url):
747
- import re
748
-
749
621
  # 定义正则表达式,匹配YYYYMMDDHH格式的时间
750
622
  time_pattern = r"\d{10}"
751
623
 
752
624
  # 定义两个字符串
753
- # str1 = 'HYCOM_water_u_2018010100_2018010112.nc'
625
+ # str1 = 'HYCOM_water_u_2018010100-2018010112.nc'
754
626
  # str2 = 'HYCOM_water_u_2018010100.nc'
755
627
 
756
628
  # 使用正则表达式查找时间
@@ -796,13 +668,8 @@ def download_file(target_url, store_path, file_name, check=False):
796
668
  with open(filename, 'wb') as f:
797
669
  f.write(response.content) """
798
670
 
799
- if find_proxy:
800
- proxies = get_proxy()
801
- response = s.get(target_url, headers=headers, proxies=proxies, stream=True, timeout=random.randint(5, max_timeout))
802
- else:
803
- response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout)) # 启用流式传输
671
+ response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout)) # 启用流式传输
804
672
  response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
805
-
806
673
  # 保存文件
807
674
  with open(fname, "wb") as f:
808
675
  print(f"[bold #96cbd7]Downloading {file_name}...")
@@ -923,7 +790,7 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
923
790
  submit_url = get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
924
791
  file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
925
792
  if download_time_end is not None:
926
- file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}_{download_time_end}.nc"
793
+ file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}-{download_time_end}.nc" # 这里时间不能用下划线,不然后续处理查找同一变量文件会出问题
927
794
  download_file(submit_url, store_path, file_name, check)
928
795
  else:
929
796
  if download_time < "2024081012":
@@ -948,14 +815,14 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
948
815
  # file_name = f'HYCOM_{'-'.join([variable_info[v]["var_name"] for v in current_group])}_{download_time}.nc'
949
816
  file_name = f"HYCOM_{key}_{download_time}.nc"
950
817
  if download_time_end is not None:
951
- file_name = f"HYCOM_{key}_{download_time}_{download_time_end}.nc"
818
+ file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc" # 这里时间不能用下划线,不然后续处理查找同一变量文件会出问题
952
819
  download_file(submit_url, store_path, file_name, check)
953
820
  else:
954
821
  for v in var:
955
822
  submit_url = get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
956
823
  file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
957
824
  if download_time_end is not None:
958
- file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}_{download_time_end}.nc"
825
+ file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}-{download_time_end}.nc"
959
826
  download_file(submit_url, store_path, file_name, check)
960
827
 
961
828
 
@@ -1073,28 +940,30 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
1073
940
  def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
1074
941
  """
1075
942
  Description:
1076
- Download the data of single time or a series of time
943
+ Download the data of single time or a series of time
1077
944
 
1078
945
  Parameters:
1079
- var: str or list, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
1080
- time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
1081
- time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21; default is None, if not set, the data of single time will be downloaded; or same as time_s, the data of single time will be downloaded
1082
- lon_min: float, the minimum longitude, default is 0
1083
- lon_max: float, the maximum longitude, default is 359.92
1084
- lat_min: float, the minimum latitude, default is -80
1085
- lat_max: float, the maximum latitude, default is 90
1086
- depth: float, the depth, default is None, if you wanna get the data of single depth, you can set the depth, suggest to set the depth in [0, 5000]
1087
- level: int, the level number, default is None, if you wanna get the data of single level, you can set the level, suggest to set the level in [1, 40]
1088
- store_path: str, the path to store the data, default is None, if not set, the data will be stored in the current working directory
1089
- dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08', if not set, the dataset will be chosen according to the download_time
1090
- version_name: str, the version name, default is None, example: '53.X', '56.3', if not set, the version will be chosen according to the download_time
1091
- num_workers: int, the number of workers, default is None, if not set, the number of workers will be 1; suggest not to set the number of workers too large
1092
- check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
1093
- ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
946
+ var: str or list, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
947
+ time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
948
+ time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21; default is None, if not set, the data of single time will be downloaded; or same as time_s, the data of single time will be downloaded
949
+ lon_min: float, the minimum longitude, default is 0
950
+ lon_max: float, the maximum longitude, default is 359.92
951
+ lat_min: float, the minimum latitude, default is -80
952
+ lat_max: float, the maximum latitude, default is 90
953
+ depth: float, the depth, default is None, if you wanna get the data of single depth, you can set the depth, suggest to set the depth in [0, 5000]
954
+ level: int, the level number, default is None, if you wanna get the data of single level, you can set the level, suggest to set the level in [1, 40]
955
+ store_path: str, the path to store the data, default is None, if not set, the data will be stored in the current working directory
956
+ dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08', if not set, the dataset will be chosen according to the download_time
957
+ version_name: str, the version name, default is None, example: '53.X', '56.3', if not set, the version will be chosen according to the download_time
958
+ num_workers: int, the number of workers, default is None, if not set, the number of workers will be 1; suggest not to set the number of workers too large
959
+ check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
960
+ ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
1094
961
 
1095
962
  Returns:
1096
- None
963
+ None
1097
964
  """
965
+ get_initial_data()
966
+
1098
967
  # 打印信息并处理数据集和版本名称
1099
968
  if dataset_name is None and version_name is None:
1100
969
  print("The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.")
@@ -1154,8 +1023,8 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
1154
1023
  """ global current_platform
1155
1024
  current_platform = platform.system() """
1156
1025
 
1157
- global find_proxy
1158
- find_proxy = False
1026
+ global fsize_dict
1027
+ fsize_dict = {}
1159
1028
 
1160
1029
  download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
1161
1030
 
@@ -1225,8 +1094,7 @@ def how_to_use():
1225
1094
 
1226
1095
 
1227
1096
  if __name__ == "__main__":
1228
- # help(hycom3h.download)
1229
- time_s, time_e = "2024081012", "2024081115"
1097
+ time_s, time_e = "2024101012", "2024101018"
1230
1098
  merge_name = f"{time_s}_{time_e}" # 合并后的文件名
1231
1099
  root_path = r"G:\Data\HYCOM\3hourly_test"
1232
1100
  location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
@@ -1256,7 +1124,7 @@ if __name__ == "__main__":
1256
1124
  download_switch, single_var = True, False
1257
1125
  combine_switch = False
1258
1126
  copy_switch, copy_dir = False, r"G:\Data\HYCOM\3hourly"
1259
-
1127
+
1260
1128
  # draw_time_range(pic_save_folder=r'I:\Delete')
1261
1129
 
1262
1130
  if download_switch:
@@ -1288,6 +1156,6 @@ if __name__ == "__main__":
1288
1156
  merge_path_name = Path(root_path)/f'HYCOM_{fname}_{merge_name}.nc'
1289
1157
  if combine_switch:
1290
1158
  # 这里的var_name必须是官方变量名,不能再是简写了
1291
- merge5nc(file_list, var_name, 'time', merge_path_name)
1159
+ merge(file_list, var_name, 'time', merge_path_name)
1292
1160
  if copy_switch:
1293
1161
  copy_file(merge_path_name, copy_dir) """