oafuncs 0.0.60__py2.py3-none-any.whl → 0.0.61__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_cmap.py +8 -14
- oafuncs/oa_data.py +23 -41
- oafuncs/oa_down/hycom_3hourly.py +47 -89
- oafuncs/oa_down/refs_pdf.py +14 -25
- oafuncs/oa_draw.py +23 -47
- oafuncs/oa_file.py +34 -35
- oafuncs/oa_nc.py +18 -34
- oafuncs/oa_sign/meteorological.py +7 -14
- oafuncs/oa_sign/ocean.py +7 -12
- {oafuncs-0.0.60.dist-info → oafuncs-0.0.61.dist-info}/METADATA +1 -1
- oafuncs-0.0.61.dist-info/RECORD +22 -0
- oafuncs-0.0.60.dist-info/RECORD +0 -22
- {oafuncs-0.0.60.dist-info → oafuncs-0.0.61.dist-info}/LICENSE.txt +0 -0
- {oafuncs-0.0.60.dist-info → oafuncs-0.0.61.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.60.dist-info → oafuncs-0.0.61.dist-info}/top_level.txt +0 -0
oafuncs/oa_cmap.py
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-09-17 16:55:11
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\
|
7
|
+
LastEditTime: 2024-11-21 13:14:24
|
8
|
+
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_cmap.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
@@ -18,8 +18,7 @@ import matplotlib as mpl
|
|
18
18
|
import matplotlib.pyplot as plt
|
19
19
|
import numpy as np
|
20
20
|
|
21
|
-
__all__ = ['show', 'extract_colors', 'create_custom',
|
22
|
-
'create_diverging', 'create_5rgb_txt']
|
21
|
+
__all__ = ['show', 'extract_colors', 'create_custom', 'create_diverging', 'create_5rgb_txt']
|
23
22
|
|
24
23
|
# ** 将cmap用填色图可视化(官网摘抄函数)
|
25
24
|
|
@@ -34,8 +33,7 @@ def show(colormaps: list):
|
|
34
33
|
np.random.seed(19680801)
|
35
34
|
data = np.random.randn(30, 30)
|
36
35
|
n = len(colormaps)
|
37
|
-
fig, axs = plt.subplots(1, n, figsize=(n * 2 + 2, 3),
|
38
|
-
constrained_layout=True, squeeze=False)
|
36
|
+
fig, axs = plt.subplots(1, n, figsize=(n * 2 + 2, 3), constrained_layout=True, squeeze=False)
|
39
37
|
for [ax, cmap] in zip(axs.flat, colormaps):
|
40
38
|
psm = ax.pcolormesh(data, cmap=cmap, rasterized=True, vmin=-4, vmax=4)
|
41
39
|
fig.colorbar(psm, ax=ax)
|
@@ -67,11 +65,9 @@ def create_custom(colors: list, nodes=None): # 利用颜色快速配色
|
|
67
65
|
c_map = mk_cmap(['aliceblue','skyblue','deepskyblue'],[0.0,0.5,1.0])
|
68
66
|
'''
|
69
67
|
if nodes is None: # 采取自动分配比例
|
70
|
-
cmap_color = mpl.colors.LinearSegmentedColormap.from_list(
|
71
|
-
'mycmap', colors)
|
68
|
+
cmap_color = mpl.colors.LinearSegmentedColormap.from_list('mycmap', colors)
|
72
69
|
else: # 按照提供比例分配
|
73
|
-
cmap_color = mpl.colors.LinearSegmentedColormap.from_list(
|
74
|
-
"mycmap", list(zip(nodes, colors)))
|
70
|
+
cmap_color = mpl.colors.LinearSegmentedColormap.from_list("mycmap", list(zip(nodes, colors)))
|
75
71
|
return cmap_color
|
76
72
|
|
77
73
|
# ** 自制diverging型cmap,默认中间为白色
|
@@ -91,8 +87,7 @@ def create_diverging(colors: list):
|
|
91
87
|
newcolors = colors
|
92
88
|
if n % 2 == 0:
|
93
89
|
newcolors.insert(int(n / 2), '#ffffff') # 偶数个颜色,中间为白色
|
94
|
-
cmap_color = mpl.colors.LinearSegmentedColormap.from_list(
|
95
|
-
"mycmap", list(zip(nodes, newcolors)))
|
90
|
+
cmap_color = mpl.colors.LinearSegmentedColormap.from_list("mycmap", list(zip(nodes, newcolors)))
|
96
91
|
return cmap_color
|
97
92
|
|
98
93
|
# ** 根据RGB的txt文档制作色卡(利用Grads调色盘)
|
@@ -129,8 +124,7 @@ if __name__ == '__main__':
|
|
129
124
|
show([c_map])
|
130
125
|
|
131
126
|
# ** 测试自制diverging型cmap
|
132
|
-
diverging_cmap = create_diverging(["#4e00b3", "#0000FF", "#00c0ff",
|
133
|
-
"#a1d3ff", "#DCDCDC", "#FFD39B", "#FF8247", "#FF0000", "#FF5F9E"])
|
127
|
+
diverging_cmap = create_diverging(["#4e00b3", "#0000FF", "#00c0ff", "#a1d3ff", "#DCDCDC", "#FFD39B", "#FF8247", "#FF0000", "#FF5F9E"])
|
134
128
|
show([diverging_cmap])
|
135
129
|
|
136
130
|
# ** 测试根据RGB的txt文档制作色卡
|
oafuncs/oa_data.py
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-09-17 17:12:47
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\
|
7
|
+
LastEditTime: 2024-11-21 13:13:20
|
8
|
+
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_data.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
@@ -14,7 +14,6 @@ Python Version: 3.11
|
|
14
14
|
'''
|
15
15
|
|
16
16
|
|
17
|
-
|
18
17
|
import multiprocessing as mp
|
19
18
|
from concurrent.futures import ThreadPoolExecutor
|
20
19
|
|
@@ -24,6 +23,8 @@ from scipy.interpolate import griddata
|
|
24
23
|
__all__ = ['interp_2d', 'interp_2d_parallel']
|
25
24
|
|
26
25
|
# ** 高维插值函数,插值最后两个维度
|
26
|
+
|
27
|
+
|
27
28
|
def interp_2d(target_x, target_y, origin_x, origin_y, data, method='linear'):
|
28
29
|
"""
|
29
30
|
高维插值函数,默认插值最后两个维度,传输数据前请确保数据的维度正确
|
@@ -51,8 +52,7 @@ def interp_2d(target_x, target_y, origin_x, origin_y, data, method='linear'):
|
|
51
52
|
|
52
53
|
if origin_x.shape != dims[-2:] or origin_y.shape != dims[-2:]:
|
53
54
|
print(origin_x.shape, dims[-2:])
|
54
|
-
raise ValueError(
|
55
|
-
'Shape of data does not match shape of origin_x or origin_y.')
|
55
|
+
raise ValueError('Shape of data does not match shape of origin_x or origin_y.')
|
56
56
|
|
57
57
|
# 将目标网格展平成一维数组
|
58
58
|
target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
|
@@ -62,14 +62,12 @@ def interp_2d(target_x, target_y, origin_x, origin_y, data, method='linear'):
|
|
62
62
|
|
63
63
|
# 进行插值
|
64
64
|
if len_dims == 2:
|
65
|
-
interpolated_data = griddata(origin_points, np.ravel(
|
66
|
-
data), target_points, method=method)
|
65
|
+
interpolated_data = griddata(origin_points, np.ravel(data), target_points, method=method)
|
67
66
|
interpolated_data = np.reshape(interpolated_data, target_y.shape)
|
68
67
|
elif len_dims == 3:
|
69
68
|
interpolated_data = []
|
70
69
|
for i in range(dims[0]):
|
71
|
-
dt = griddata(origin_points, np.ravel(
|
72
|
-
data[i, :, :]), target_points, method=method)
|
70
|
+
dt = griddata(origin_points, np.ravel(data[i, :, :]), target_points, method=method)
|
73
71
|
interpolated_data.append(np.reshape(dt, target_y.shape))
|
74
72
|
print(f'Interpolating {i+1}/{dims[0]}...')
|
75
73
|
interpolated_data = np.array(interpolated_data)
|
@@ -78,11 +76,9 @@ def interp_2d(target_x, target_y, origin_x, origin_y, data, method='linear'):
|
|
78
76
|
for i in range(dims[0]):
|
79
77
|
interpolated_data.append([])
|
80
78
|
for j in range(dims[1]):
|
81
|
-
dt = griddata(origin_points, np.ravel(
|
82
|
-
data[i, j, :, :]), target_points, method=method)
|
79
|
+
dt = griddata(origin_points, np.ravel(data[i, j, :, :]), target_points, method=method)
|
83
80
|
interpolated_data[i].append(np.reshape(dt, target_y.shape))
|
84
|
-
print(
|
85
|
-
f'\rInterpolating {i*dims[1]+j+1}/{dims[0]*dims[1]}...', end='')
|
81
|
+
print(f'\rInterpolating {i*dims[1]+j+1}/{dims[0]*dims[1]}...', end='')
|
86
82
|
print('\n')
|
87
83
|
interpolated_data = np.array(interpolated_data)
|
88
84
|
|
@@ -104,33 +100,24 @@ def interp_2d_parallel(target_x, target_y, origin_x, origin_y, data, method='lin
|
|
104
100
|
example : interpolated_data = interp_2d_parallel(target_x, target_y, origin_x, origin_y, data, method='linear')
|
105
101
|
'''
|
106
102
|
def interp_single2d(target_y, target_x, origin_y, origin_x, data, method='linear'):
|
107
|
-
target_points = np.column_stack(
|
108
|
-
|
109
|
-
origin_points = np.column_stack(
|
110
|
-
(np.ravel(origin_y), np.ravel(origin_x)))
|
103
|
+
target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
|
104
|
+
origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
|
111
105
|
|
112
|
-
dt = griddata(origin_points, np.ravel(
|
113
|
-
data[:, :]), target_points, method=method)
|
106
|
+
dt = griddata(origin_points, np.ravel(data[:, :]), target_points, method=method)
|
114
107
|
return np.reshape(dt, target_y.shape)
|
115
108
|
|
116
109
|
def interp_single3d(i, target_y, target_x, origin_y, origin_x, data, method='linear'):
|
117
|
-
target_points = np.column_stack(
|
118
|
-
|
119
|
-
origin_points = np.column_stack(
|
120
|
-
(np.ravel(origin_y), np.ravel(origin_x)))
|
110
|
+
target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
|
111
|
+
origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
|
121
112
|
|
122
|
-
dt = griddata(origin_points, np.ravel(
|
123
|
-
data[i, :, :]), target_points, method=method)
|
113
|
+
dt = griddata(origin_points, np.ravel(data[i, :, :]), target_points, method=method)
|
124
114
|
return np.reshape(dt, target_y.shape)
|
125
115
|
|
126
116
|
def interp_single4d(i, j, target_y, target_x, origin_y, origin_x, data, method='linear'):
|
127
|
-
target_points = np.column_stack(
|
128
|
-
|
129
|
-
origin_points = np.column_stack(
|
130
|
-
(np.ravel(origin_y), np.ravel(origin_x)))
|
117
|
+
target_points = np.column_stack((np.ravel(target_y), np.ravel(target_x)))
|
118
|
+
origin_points = np.column_stack((np.ravel(origin_y), np.ravel(origin_x)))
|
131
119
|
|
132
|
-
dt = griddata(origin_points, np.ravel(
|
133
|
-
data[i, j, :, :]), target_points, method=method)
|
120
|
+
dt = griddata(origin_points, np.ravel(data[i, j, :, :]), target_points, method=method)
|
134
121
|
return np.reshape(dt, target_y.shape)
|
135
122
|
|
136
123
|
if len(target_y.shape) == 1:
|
@@ -150,16 +137,12 @@ def interp_2d_parallel(target_x, target_y, origin_x, origin_y, data, method='lin
|
|
150
137
|
with ThreadPoolExecutor(max_workers=mp.cpu_count()-2) as executor:
|
151
138
|
print(f'Using {mp.cpu_count()-2} threads...')
|
152
139
|
if len_dims == 2:
|
153
|
-
interpolated_data = list(executor.map(interp_single2d, [target_y], [
|
154
|
-
target_x], [origin_y], [origin_x], [data], [method]))
|
140
|
+
interpolated_data = list(executor.map(interp_single2d, [target_y], [target_x], [origin_y], [origin_x], [data], [method]))
|
155
141
|
elif len_dims == 3:
|
156
|
-
interpolated_data = list(executor.map(interp_single3d, [i for i in range(dims[0])], [
|
157
|
-
target_y]*dims[0], [target_x]*dims[0], [origin_y]*dims[0], [origin_x]*dims[0], [data]*dims[0], [method]*dims[0]))
|
142
|
+
interpolated_data = list(executor.map(interp_single3d, [i for i in range(dims[0])], [target_y]*dims[0], [target_x]*dims[0], [origin_y]*dims[0], [origin_x]*dims[0], [data]*dims[0], [method]*dims[0]))
|
158
143
|
elif len_dims == 4:
|
159
|
-
interpolated_data = list(executor.map(interp_single4d, [i for i in range(dims[0]) for j in range(dims[1])], [j for i in range(dims[0]) for j in range(dims[1])], [
|
160
|
-
|
161
|
-
interpolated_data = np.array(interpolated_data).reshape(
|
162
|
-
dims[0], dims[1], target_y.shape[0], target_x.shape[1])
|
144
|
+
interpolated_data = list(executor.map(interp_single4d, [i for i in range(dims[0]) for j in range(dims[1])], [j for i in range(dims[0]) for j in range(dims[1])], [target_y]*dims[0]*dims[1], [target_x]*dims[0]*dims[1], [origin_y]*dims[0]*dims[1], [origin_x]*dims[0]*dims[1], [data]*dims[0]*dims[1], [method]*dims[0]*dims[1]))
|
145
|
+
interpolated_data = np.array(interpolated_data).reshape(dims[0], dims[1], target_y.shape[0], target_x.shape[1])
|
163
146
|
|
164
147
|
interpolated_data = np.array(interpolated_data)
|
165
148
|
|
@@ -193,8 +176,7 @@ if __name__ == '__main__':
|
|
193
176
|
|
194
177
|
# 高维插值多线程
|
195
178
|
start = time.time()
|
196
|
-
interpolated_data = interp_2d_parallel(
|
197
|
-
target_x, target_y, origin_x, origin_y, data)
|
179
|
+
interpolated_data = interp_2d_parallel(target_x, target_y, origin_x, origin_y, data)
|
198
180
|
print(f'Interpolation time: {time.time()-start:.2f}s')
|
199
181
|
|
200
182
|
print(interpolated_data.shape)
|
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-11-01 10:31:09
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-11-21
|
7
|
+
LastEditTime: 2024-11-21 13:24:49
|
8
8
|
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
@@ -27,8 +27,7 @@ import requests
|
|
27
27
|
from rich import print
|
28
28
|
from rich.progress import Progress
|
29
29
|
|
30
|
-
warnings.filterwarnings("ignore", category=RuntimeWarning,
|
31
|
-
message="Engine '.*' loading failed:.*")
|
30
|
+
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
|
32
31
|
|
33
32
|
__all__ = ['draw_time_range', 'download', 'how_to_use', 'get_time_list']
|
34
33
|
|
@@ -37,13 +36,11 @@ data_info = {'yearly': {}, 'monthly': {}, 'daily': {}, 'hourly': {}}
|
|
37
36
|
|
38
37
|
# hourly data
|
39
38
|
# dataset: GLBv0.08, GLBu0.08, GLBy0.08
|
40
|
-
data_info['hourly']['dataset'] = {
|
41
|
-
'GLBv0.08': {}, 'GLBu0.08': {}, 'GLBy0.08': {}}
|
39
|
+
data_info['hourly']['dataset'] = {'GLBv0.08': {}, 'GLBu0.08': {}, 'GLBy0.08': {}}
|
42
40
|
|
43
41
|
# version
|
44
42
|
# version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
|
45
|
-
data_info['hourly']['dataset']['GLBv0.08']['version'] = {
|
46
|
-
'53.X': {}, '56.3': {}, '57.2': {}, '92.8': {}, '57.7': {}, '92.9': {}, '93.0': {}}
|
43
|
+
data_info['hourly']['dataset']['GLBv0.08']['version'] = {'53.X': {}, '56.3': {}, '57.2': {}, '92.8': {}, '57.7': {}, '92.9': {}, '93.0': {}}
|
47
44
|
# version of GLBu0.08: 93.0
|
48
45
|
data_info['hourly']['dataset']['GLBu0.08']['version'] = {'93.0': {}}
|
49
46
|
# version of GLBy0.08: 93.0
|
@@ -55,26 +52,17 @@ data_info['hourly']['dataset']['GLBy0.08']['version'] = {'93.0': {}}
|
|
55
52
|
# 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
|
56
53
|
# 目前只纠正了GLBv0.08 93.0的时间范围,具体到小时了
|
57
54
|
# 其他数据集的时刻暂时默认为00起,21止
|
58
|
-
data_info['hourly']['dataset']['GLBv0.08']['version']['53.X']['time_range'] = {
|
59
|
-
|
60
|
-
data_info['hourly']['dataset']['GLBv0.08']['version']['
|
61
|
-
|
62
|
-
data_info['hourly']['dataset']['GLBv0.08']['version']['57.
|
63
|
-
|
64
|
-
data_info['hourly']['dataset']['GLBv0.08']['version']['
|
65
|
-
'time_start': '20170201', 'time_end': '20170531'}
|
66
|
-
data_info['hourly']['dataset']['GLBv0.08']['version']['57.7']['time_range'] = {
|
67
|
-
'time_start': '20170601', 'time_end': '20170930'}
|
68
|
-
data_info['hourly']['dataset']['GLBv0.08']['version']['92.9']['time_range'] = {
|
69
|
-
'time_start': '20171001', 'time_end': '20171231'}
|
70
|
-
data_info['hourly']['dataset']['GLBv0.08']['version']['93.0']['time_range'] = {
|
71
|
-
'time_start': '2018010112', 'time_end': '2020021909'}
|
55
|
+
data_info['hourly']['dataset']['GLBv0.08']['version']['53.X']['time_range'] = {'time_start': '19940101', 'time_end': '20151230'}
|
56
|
+
data_info['hourly']['dataset']['GLBv0.08']['version']['56.3']['time_range'] = {'time_start': '20140701', 'time_end': '20160430'}
|
57
|
+
data_info['hourly']['dataset']['GLBv0.08']['version']['57.2']['time_range'] = {'time_start': '20160501', 'time_end': '20170131'}
|
58
|
+
data_info['hourly']['dataset']['GLBv0.08']['version']['92.8']['time_range'] = {'time_start': '20170201', 'time_end': '20170531'}
|
59
|
+
data_info['hourly']['dataset']['GLBv0.08']['version']['57.7']['time_range'] = {'time_start': '20170601', 'time_end': '20170930'}
|
60
|
+
data_info['hourly']['dataset']['GLBv0.08']['version']['92.9']['time_range'] = {'time_start': '20171001', 'time_end': '20171231'}
|
61
|
+
data_info['hourly']['dataset']['GLBv0.08']['version']['93.0']['time_range'] = {'time_start': '2018010112', 'time_end': '2020021909'}
|
72
62
|
# GLBu0.08
|
73
|
-
data_info['hourly']['dataset']['GLBu0.08']['version']['93.0']['time_range'] = {
|
74
|
-
'time_start': '20180919', 'time_end': '20181208'}
|
63
|
+
data_info['hourly']['dataset']['GLBu0.08']['version']['93.0']['time_range'] = {'time_start': '20180919', 'time_end': '20181208'}
|
75
64
|
# GLBy0.08
|
76
|
-
data_info['hourly']['dataset']['GLBy0.08']['version']['93.0']['time_range'] = {
|
77
|
-
'time_start': '20181204', 'time_end': '20300904'}
|
65
|
+
data_info['hourly']['dataset']['GLBy0.08']['version']['93.0']['time_range'] = {'time_start': '20181204', 'time_end': '20300904'}
|
78
66
|
|
79
67
|
# variable
|
80
68
|
variable_info = {
|
@@ -110,8 +98,7 @@ data_info['hourly']['dataset']['GLBy0.08']['version']['93.0']['classification']
|
|
110
98
|
url_53x = {}
|
111
99
|
for y_53x in range(1994, 2016):
|
112
100
|
# r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
|
113
|
-
url_53x[str(
|
114
|
-
y_53x)] = rf'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?'
|
101
|
+
url_53x[str(y_53x)] = rf'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?'
|
115
102
|
data_info['hourly']['dataset']['GLBv0.08']['version']['53.X']['url'] = url_53x
|
116
103
|
# GLBv0.08 56.3
|
117
104
|
data_info['hourly']['dataset']['GLBv0.08']['version']['56.3']['url'] = r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?'
|
@@ -184,7 +171,7 @@ def draw_time_range(pic_save_folder=None):
|
|
184
171
|
'dataset': dataset,
|
185
172
|
'version': version,
|
186
173
|
'start_date': pd.to_datetime(t_s),
|
187
|
-
'end_date': pd.to_datetime(t_e)
|
174
|
+
'end_date': pd.to_datetime(t_e),
|
188
175
|
})
|
189
176
|
|
190
177
|
# Creating a DataFrame
|
@@ -194,34 +181,27 @@ def draw_time_range(pic_save_folder=None):
|
|
194
181
|
plt.figure(figsize=(12, 6))
|
195
182
|
|
196
183
|
# Combined labels for datasets and versions
|
197
|
-
combined_labels = [f"{dataset}_{version}" for dataset,
|
198
|
-
version in zip(df['dataset'], df['version'])]
|
184
|
+
combined_labels = [f"{dataset}_{version}" for dataset, version in zip(df['dataset'], df['version'])]
|
199
185
|
|
200
186
|
colors = plt.cm.viridis(np.linspace(0, 1, len(combined_labels)))
|
201
187
|
|
202
188
|
# Assigning a color to each combined label
|
203
|
-
label_colors = {label: colors[i]
|
204
|
-
for i, label in enumerate(combined_labels)}
|
189
|
+
label_colors = {label: colors[i] for i, label in enumerate(combined_labels)}
|
205
190
|
|
206
191
|
# Plotting each time range
|
207
192
|
k = 1
|
208
193
|
for _, row in df.iterrows():
|
209
|
-
plt.plot([row['start_date'], row['end_date']], [k, k],
|
210
|
-
color=label_colors[f"{row['dataset']}_{row['version']}"], linewidth=6)
|
194
|
+
plt.plot([row['start_date'], row['end_date']], [k, k], color=label_colors[f"{row['dataset']}_{row['version']}"], linewidth=6)
|
211
195
|
# plt.text(row['end_date'], k,
|
212
196
|
# f"{row['version']}", ha='right', color='black')
|
213
197
|
ymdh_s = row['start_date'].strftime('%Y-%m-%d %H')
|
214
198
|
ymdh_e = row['end_date'].strftime('%Y-%m-%d %H')
|
215
199
|
if k == 1 or k == len(combined_labels):
|
216
|
-
plt.text(row['start_date'], k+0.125,
|
217
|
-
|
218
|
-
plt.text(row['end_date'], k+0.125,
|
219
|
-
f"{ymdh_e}", ha='right', color='black')
|
200
|
+
plt.text(row['start_date'], k+0.125, f"{ymdh_s}", ha='left', color='black')
|
201
|
+
plt.text(row['end_date'], k+0.125, f"{ymdh_e}", ha='right', color='black')
|
220
202
|
else:
|
221
|
-
plt.text(row['start_date'], k+0.125,
|
222
|
-
|
223
|
-
plt.text(row['end_date'], k+0.125,
|
224
|
-
f"{ymdh_e}", ha='left', color='black')
|
203
|
+
plt.text(row['start_date'], k+0.125, f"{ymdh_s}", ha='right', color='black')
|
204
|
+
plt.text(row['end_date'], k+0.125, f"{ymdh_e}", ha='left', color='black')
|
225
205
|
k += 1
|
226
206
|
|
227
207
|
# Setting the y-axis labels
|
@@ -283,7 +263,7 @@ def set_query_dict_no_vertical(var, lon_min, lon_max, lat_min, lat_max, time_str
|
|
283
263
|
'horizStride': 1,
|
284
264
|
'time': transform_time(time_str_ymdh),
|
285
265
|
'addLatLon': 'true',
|
286
|
-
'accept': 'netcdf4'
|
266
|
+
'accept': 'netcdf4',
|
287
267
|
}
|
288
268
|
return query_dict
|
289
269
|
|
@@ -299,7 +279,7 @@ def set_query_dict_depth_or_level(var, lon_min, lon_max, lat_min, lat_max, time_
|
|
299
279
|
'time': transform_time(time_str_ymdh),
|
300
280
|
'vertCoord': 0,
|
301
281
|
'addLatLon': 'true',
|
302
|
-
'accept': 'netcdf4'
|
282
|
+
'accept': 'netcdf4',
|
303
283
|
}
|
304
284
|
return query_dict
|
305
285
|
|
@@ -315,7 +295,7 @@ def set_query_dict_full(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh):
|
|
315
295
|
'time': transform_time(time_str_ymdh),
|
316
296
|
'vertStride': 1,
|
317
297
|
'addLatLon': 'true',
|
318
|
-
'accept': 'netcdf4'
|
298
|
+
'accept': 'netcdf4',
|
319
299
|
}
|
320
300
|
return query_dict
|
321
301
|
|
@@ -351,21 +331,17 @@ def get_query_dict_single_level(var, lon_min, lon_max, lat_min, lat_max, level_n
|
|
351
331
|
|
352
332
|
def get_query_dict_full_level(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh):
|
353
333
|
if var in ['ssh']:
|
354
|
-
query_dict = set_query_dict_no_vertical(
|
355
|
-
var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
334
|
+
query_dict = set_query_dict_no_vertical(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
356
335
|
else:
|
357
|
-
query_dict = set_query_dict_full(
|
358
|
-
var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
336
|
+
query_dict = set_query_dict_full(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
359
337
|
return query_dict
|
360
338
|
|
361
339
|
|
362
340
|
def get_query_dict_full_depth(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh):
|
363
341
|
if var in ['ssh']:
|
364
|
-
query_dict = set_query_dict_no_vertical(
|
365
|
-
var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
342
|
+
query_dict = set_query_dict_no_vertical(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
366
343
|
else:
|
367
|
-
query_dict = set_query_dict_full(
|
368
|
-
var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
344
|
+
query_dict = set_query_dict_full(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
|
369
345
|
return query_dict
|
370
346
|
|
371
347
|
|
@@ -377,8 +353,7 @@ def ymd_in_which_dataset_and_version(time_ymdh):
|
|
377
353
|
have_data = False
|
378
354
|
for dataset_name in data_info['hourly']['dataset'].keys():
|
379
355
|
for version_name in data_info['hourly']['dataset'][dataset_name]['version'].keys():
|
380
|
-
time_s, time_e = list(
|
381
|
-
data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range'].values())
|
356
|
+
time_s, time_e = list(data_info['hourly']['dataset'][dataset_name]['version'][version_name]['time_range'].values())
|
382
357
|
time_s, time_e = str(time_s), str(time_e)
|
383
358
|
if len(time_s) == 8:
|
384
359
|
time_s = time_s + '00'
|
@@ -394,8 +369,7 @@ def ymd_in_which_dataset_and_version(time_ymdh):
|
|
394
369
|
for d, v, trange in zip(d_list, v_list, trange_list):
|
395
370
|
print(f'[bold blue]{d} {v} {trange}')
|
396
371
|
else:
|
397
|
-
raise ValueError(
|
398
|
-
f'[bold red]{time_ymdh} is not in any dataset and version')
|
372
|
+
raise ValueError(f'[bold red]{time_ymdh} is not in any dataset and version')
|
399
373
|
|
400
374
|
|
401
375
|
def direct_choose_dataset_and_version(time_ymdh):
|
@@ -428,8 +402,7 @@ def get_base_url(dataset_name, version_name, var, year_str):
|
|
428
402
|
elif var in ['ssh']:
|
429
403
|
base_url = url_dict['ssh']
|
430
404
|
else:
|
431
|
-
print(
|
432
|
-
'Please ensure the var is in [u,v,temp,salinity,ssh,u_b,v_b,temp_b,salinity_b]')
|
405
|
+
print('Please ensure the var is in [u,v,temp,salinity,ssh,u_b,v_b,temp_b,salinity_b]')
|
433
406
|
elif classification_method == 'var_year_different':
|
434
407
|
if var in ['u', 'v', 'u_b', 'v_b']:
|
435
408
|
base_url = url_dict['uv3z'][str(year_str)]
|
@@ -438,16 +411,14 @@ def get_base_url(dataset_name, version_name, var, year_str):
|
|
438
411
|
elif var in ['ssh']:
|
439
412
|
base_url = url_dict['ssh'][str(year_str)]
|
440
413
|
else:
|
441
|
-
print(
|
442
|
-
'Please ensure the var is in [u,v,temp,salinity,ssh,u_b,v_b,temp_b,salinity_b]')
|
414
|
+
print('Please ensure the var is in [u,v,temp,salinity,ssh,u_b,v_b,temp_b,salinity_b]')
|
443
415
|
return base_url
|
444
416
|
|
445
417
|
|
446
418
|
def get_submit_url(dataset_name, version_name, var, year_str, query_dict):
|
447
419
|
base_url = get_base_url(dataset_name, version_name, var, year_str)
|
448
420
|
query_dict['var'] = [query_dict['var']]
|
449
|
-
target_url = base_url + '&'.join(f"var={var}" for var in query_dict['var']) + '&' + '&'.join(
|
450
|
-
f"{key}={value}" for key, value in query_dict.items() if key != 'var')
|
421
|
+
target_url = base_url + '&'.join(f"var={var}" for var in query_dict['var']) + '&' + '&'.join(f"{key}={value}" for key, value in query_dict.items() if key != 'var')
|
451
422
|
return target_url
|
452
423
|
|
453
424
|
|
@@ -576,8 +547,7 @@ def dlownload_file(target_url, store_path, file_name, check=False):
|
|
576
547
|
try:
|
577
548
|
headers = {
|
578
549
|
'User-Agent': get_ua()}
|
579
|
-
response = s.get(target_url, headers=headers,
|
580
|
-
timeout=5)
|
550
|
+
response = s.get(target_url, headers=headers, timeout=5)
|
581
551
|
response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
|
582
552
|
|
583
553
|
# 保存文件
|
@@ -620,20 +590,17 @@ def direct_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, dow
|
|
620
590
|
print('The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.')
|
621
591
|
print('If you wanna choose the dataset and version by yourself, please set the dataset_name and version_name together.')
|
622
592
|
ymd_in_which_dataset_and_version(download_time)
|
623
|
-
dataset_name, version_name = direct_choose_dataset_and_version(
|
624
|
-
download_time)
|
593
|
+
dataset_name, version_name = direct_choose_dataset_and_version(download_time)
|
625
594
|
elif dataset_name is None and version_name is not None:
|
626
595
|
print('Please ensure the dataset_name is not None')
|
627
596
|
print('If you do not add the dataset_name, both the dataset and version will be chosen according to the download_time.')
|
628
597
|
ymd_in_which_dataset_and_version(download_time)
|
629
|
-
dataset_name, version_name = direct_choose_dataset_and_version(
|
630
|
-
download_time)
|
598
|
+
dataset_name, version_name = direct_choose_dataset_and_version(download_time)
|
631
599
|
elif dataset_name is not None and version_name is None:
|
632
600
|
print('Please ensure the version_name is not None')
|
633
601
|
print('If you do not add the version_name, both the dataset and version will be chosen according to the download_time.')
|
634
602
|
ymd_in_which_dataset_and_version(download_time)
|
635
|
-
dataset_name, version_name = direct_choose_dataset_and_version(
|
636
|
-
download_time)
|
603
|
+
dataset_name, version_name = direct_choose_dataset_and_version(download_time)
|
637
604
|
else:
|
638
605
|
print('The dataset_name and version_name are both set by yourself.')
|
639
606
|
|
@@ -642,16 +609,13 @@ def direct_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, dow
|
|
642
609
|
print('Please ensure the depth or level_num is None')
|
643
610
|
elif depth is not None:
|
644
611
|
print(f'Data of single depth ({depth}m) will be downloaded...')
|
645
|
-
query_dict = get_query_dict_single_depth(
|
646
|
-
var, lon_min, lon_max, lat_min, lat_max, depth, download_time)
|
612
|
+
query_dict = get_query_dict_single_depth(var, lon_min, lon_max, lat_min, lat_max, depth, download_time)
|
647
613
|
elif level_num is not None:
|
648
614
|
print(f'Data of single level ({level_num}) will be downloaded...')
|
649
|
-
query_dict = get_query_dict_single_level(
|
650
|
-
var, lon_min, lon_max, lat_min, lat_max, level_num, download_time)
|
615
|
+
query_dict = get_query_dict_single_level(var, lon_min, lon_max, lat_min, lat_max, level_num, download_time)
|
651
616
|
else:
|
652
617
|
print('Full depth or full level data will be downloaded...')
|
653
|
-
query_dict = get_query_dict_full_level(
|
654
|
-
var, lon_min, lon_max, lat_min, lat_max, download_time)
|
618
|
+
query_dict = get_query_dict_full_level(var, lon_min, lon_max, lat_min, lat_max, download_time)
|
655
619
|
submit_url = get_submit_url(
|
656
620
|
dataset_name, version_name, var, year_str, query_dict)
|
657
621
|
file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
|
@@ -682,8 +646,7 @@ def download_task(var, time_str, lon_min, lon_max, lat_min, lat_max, depth, leve
|
|
682
646
|
这样,每个任务都是独立的,有自己的参数和数据,不会与其他任务共享或修改任何数据。
|
683
647
|
因此,即使多个任务同时执行,也不会出现数据交互错乱的问题。
|
684
648
|
'''
|
685
|
-
direct_download(var, lon_min, lon_max, lat_min, lat_max, time_str,
|
686
|
-
depth, level, store_path, dataset_name, version_name, check)
|
649
|
+
direct_download(var, lon_min, lon_max, lat_min, lat_max, time_str, depth, level, store_path, dataset_name, version_name, check)
|
687
650
|
|
688
651
|
|
689
652
|
def download(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False):
|
@@ -724,30 +687,25 @@ def download(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
|
|
724
687
|
if len(ymdh_time_e) == 8:
|
725
688
|
ymdh_time_e += '21'
|
726
689
|
if ymdh_time_s == ymdh_time_e:
|
727
|
-
direct_download(var, lon_min, lon_max, lat_min, lat_max,
|
728
|
-
ymdh_time_s, depth, level, store_path, dataset_name, version_name)
|
690
|
+
direct_download(var, lon_min, lon_max, lat_min, lat_max, ymdh_time_s, depth, level, store_path, dataset_name, version_name)
|
729
691
|
elif int(ymdh_time_s) < int(ymdh_time_e):
|
730
692
|
print('Downloading a series of files...')
|
731
693
|
time_list = get_time_list(ymdh_time_s, ymdh_time_e, 3)
|
732
694
|
with Progress() as progress:
|
733
|
-
task = progress.add_task(
|
734
|
-
"[cyan]Downloading...", total=len(time_list))
|
695
|
+
task = progress.add_task("[cyan]Downloading...", total=len(time_list))
|
735
696
|
if num_workers is None or num_workers <= 1:
|
736
697
|
# 串行方式
|
737
698
|
for time_str in time_list:
|
738
|
-
direct_download(var, lon_min, lon_max, lat_min, lat_max,
|
739
|
-
time_str, depth, level, store_path, dataset_name, version_name, check)
|
699
|
+
direct_download(var, lon_min, lon_max, lat_min, lat_max, time_str, depth, level, store_path, dataset_name, version_name, check)
|
740
700
|
progress.update(task, advance=1)
|
741
701
|
else:
|
742
702
|
# 并行方式
|
743
703
|
if num_workers > 10:
|
744
704
|
print('The number of workers is too large!')
|
745
|
-
print(
|
746
|
-
'In order to avoid the server being blocked, the number of workers is set to 10')
|
705
|
+
print('In order to avoid the server being blocked, the number of workers is set to 10')
|
747
706
|
num_workers = 10
|
748
707
|
with ThreadPoolExecutor(max_workers=num_workers) as executor:
|
749
|
-
futures = [executor.submit(download_task, var, time_str, lon_min, lon_max, lat_min, lat_max,
|
750
|
-
depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
|
708
|
+
futures = [executor.submit(download_task, var, time_str, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, check) for time_str in time_list]
|
751
709
|
for future in futures:
|
752
710
|
future.add_done_callback(
|
753
711
|
lambda _: progress.update(task, advance=1))
|
oafuncs/oa_down/refs_pdf.py
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-11-09 13:58:28
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-11-
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\
|
7
|
+
LastEditTime: 2024-11-21 13:18:18
|
8
|
+
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\refs_pdf.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
@@ -41,7 +41,7 @@ def _get_file_size(file_path, unit='KB'):
|
|
41
41
|
'TB': 1024**4,
|
42
42
|
'GB': 1024**3,
|
43
43
|
'MB': 1024**2,
|
44
|
-
'KB': 1024
|
44
|
+
'KB': 1024,
|
45
45
|
}
|
46
46
|
|
47
47
|
# 检查传入的单位是否合法
|
@@ -178,14 +178,11 @@ class _Downloader:
|
|
178
178
|
self.pdf_url = got_url
|
179
179
|
print(f"URL: {self.pdf_url}")
|
180
180
|
else:
|
181
|
-
print(f'[bold #AFEEEE]The website {
|
182
|
-
self.url_list[self.url_index]} do not inlcude the PDF file.')
|
181
|
+
print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
|
183
182
|
self.try_times = self.try_times_each_url_max+1
|
184
183
|
else:
|
185
|
-
print(f"Failed to retrieve the webpage. Status code: {
|
186
|
-
|
187
|
-
print(f'[bold #AFEEEE]The website {
|
188
|
-
self.url_list[self.url_index]} do not inlcude the PDF file.')
|
184
|
+
print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
|
185
|
+
print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
|
189
186
|
self.try_times = self.try_times_each_url_max+1
|
190
187
|
|
191
188
|
def url_iterate(self):
|
@@ -211,12 +208,10 @@ class _Downloader:
|
|
211
208
|
if fsize < self.check_size:
|
212
209
|
# delete the wrong file
|
213
210
|
os.remove(self.fpath)
|
214
|
-
print(f"[bold yellow]The PDF file {
|
215
|
-
self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
|
211
|
+
print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
|
216
212
|
else:
|
217
213
|
print('[bold #E6E6FA]-'*100)
|
218
|
-
print(f"[bold purple]The PDF file {
|
219
|
-
self.fpath} already exists.")
|
214
|
+
print(f"[bold purple]The PDF file {self.fpath} already exists.")
|
220
215
|
return
|
221
216
|
self.url_index = 0
|
222
217
|
already_downloaded = False
|
@@ -242,8 +237,7 @@ class _Downloader:
|
|
242
237
|
return
|
243
238
|
print(f"Downloading: {self.fname}...")
|
244
239
|
try:
|
245
|
-
response = requests.get(
|
246
|
-
self.pdf_url, headers=self.headers, cookies=self.cookies)
|
240
|
+
response = requests.get(self.pdf_url, headers=self.headers, cookies=self.cookies)
|
247
241
|
if response.status_code == 200:
|
248
242
|
with open(self.fpath, 'wb') as f:
|
249
243
|
f.write(response.content)
|
@@ -251,18 +245,14 @@ class _Downloader:
|
|
251
245
|
if fsize < self.check_size:
|
252
246
|
# delete the wrong file
|
253
247
|
os.remove(self.fpath)
|
254
|
-
print(f"[bold yellow]The PDF file {
|
255
|
-
self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
|
248
|
+
print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
|
256
249
|
else:
|
257
|
-
print(f"[bold green]Sucessful to download {
|
258
|
-
self.fpath}")
|
250
|
+
print(f"[bold green]Sucessful to download {self.fpath}")
|
259
251
|
already_downloaded = True
|
260
252
|
else:
|
261
253
|
self.try_times = self.try_times_each_url_max+1
|
262
|
-
print(f"Failed to download the PDF file. Status code: {
|
263
|
-
|
264
|
-
print(f'[bold #AFEEEE]The website {
|
265
|
-
self.url_list[self.url_index]} do not inlcude the PDF file.')
|
254
|
+
print(f"Failed to download the PDF file. Status code: {response.status_code}")
|
255
|
+
print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
|
266
256
|
except Exception as e:
|
267
257
|
print(f"Failed to download the PDF file. Error: {e}")
|
268
258
|
time.sleep(self.sleep)
|
@@ -273,8 +263,7 @@ class _Downloader:
|
|
273
263
|
self.write_wrong_record()
|
274
264
|
return
|
275
265
|
if self.try_times == self.try_times_each_url_max:
|
276
|
-
print(f'Tried {self.try_times} times for {
|
277
|
-
self.url_list[self.url_index-1]}.')
|
266
|
+
print(f'Tried {self.try_times} times for {self.url_list[self.url_index-1]}.')
|
278
267
|
print("Try another URL...")
|
279
268
|
|
280
269
|
|