oafuncs 0.0.89__py2.py3-none-any.whl → 0.0.90__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/data_store/OAFuncs.png +0 -0
- oafuncs/oa_down/hycom_3hourly.py +213 -345
- oafuncs/oa_down/user_agent.py +0 -3
- oafuncs/oa_file.py +22 -7
- oafuncs/oa_help.py +2 -1
- {oafuncs-0.0.89.dist-info → oafuncs-0.0.90.dist-info}/METADATA +1 -1
- {oafuncs-0.0.89.dist-info → oafuncs-0.0.90.dist-info}/RECORD +10 -10
- {oafuncs-0.0.89.dist-info → oafuncs-0.0.90.dist-info}/LICENSE.txt +0 -0
- {oafuncs-0.0.89.dist-info → oafuncs-0.0.90.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.89.dist-info → oafuncs-0.0.90.dist-info}/top_level.txt +0 -0
oafuncs/data_store/OAFuncs.png
CHANGED
Binary file
|
oafuncs/oa_down/hycom_3hourly.py
CHANGED
@@ -2,9 +2,9 @@
|
|
2
2
|
# coding=utf-8
|
3
3
|
"""
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
|
-
Date: 2024-11-
|
5
|
+
Date: 2024-11-02 11:07:49
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime:
|
7
|
+
LastEditTime: 2025-01-07 16:31:36
|
8
8
|
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
@@ -16,182 +16,188 @@ Python Version: 3.12
|
|
16
16
|
import datetime
|
17
17
|
import os
|
18
18
|
import random
|
19
|
+
import re
|
19
20
|
import time
|
20
21
|
import warnings
|
21
22
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
22
23
|
from pathlib import Path
|
23
24
|
from threading import Lock
|
24
|
-
import re
|
25
25
|
|
26
26
|
import matplotlib.pyplot as plt
|
27
27
|
import numpy as np
|
28
28
|
import pandas as pd
|
29
29
|
import requests
|
30
|
-
from bs4 import BeautifulSoup
|
31
30
|
from rich import print
|
32
31
|
from rich.progress import Progress
|
33
|
-
|
32
|
+
|
33
|
+
from oafuncs.oa_down.user_agent import get_ua
|
34
|
+
from oafuncs.oa_file import file_size, mean_size
|
34
35
|
|
35
36
|
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Engine '.*' loading failed:.*")
|
36
37
|
|
37
38
|
__all__ = ["draw_time_range", "download", "how_to_use", "get_time_list"]
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
#
|
43
|
-
#
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
#
|
57
|
-
|
58
|
-
|
59
|
-
#
|
60
|
-
#
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]
|
66
|
-
|
67
|
-
data_info["hourly"]["dataset"]["
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
#
|
74
|
-
|
75
|
-
|
76
|
-
#
|
77
|
-
|
78
|
-
"
|
79
|
-
"
|
80
|
-
"
|
81
|
-
"
|
82
|
-
"
|
83
|
-
"
|
84
|
-
|
85
|
-
"
|
86
|
-
|
87
|
-
}
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
#
|
92
|
-
#
|
93
|
-
#
|
94
|
-
|
95
|
-
|
96
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["
|
97
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["
|
98
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.
|
99
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.
|
100
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["
|
101
|
-
data_info["hourly"]["dataset"]["
|
102
|
-
data_info["hourly"]["dataset"]["
|
103
|
-
data_info["hourly"]["dataset"]["
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
#
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["
|
115
|
-
# GLBv0.08
|
116
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["
|
117
|
-
# GLBv0.08
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["
|
126
|
-
# GLBv0.08
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
40
|
+
|
41
|
+
def get_initial_data():
|
42
|
+
global variable_info, data_info, var_group, single_var_group
|
43
|
+
# ----------------------------------------------
|
44
|
+
# variable
|
45
|
+
variable_info = {
|
46
|
+
"u": {"var_name": "water_u", "standard_name": "eastward_sea_water_velocity"},
|
47
|
+
"v": {"var_name": "water_v", "standard_name": "northward_sea_water_velocity"},
|
48
|
+
"temp": {"var_name": "water_temp", "standard_name": "sea_water_potential_temperature"},
|
49
|
+
"salt": {"var_name": "salinity", "standard_name": "sea_water_salinity"},
|
50
|
+
"ssh": {"var_name": "surf_el", "standard_name": "sea_surface_elevation"},
|
51
|
+
"u_b": {"var_name": "water_u_bottom", "standard_name": "eastward_sea_water_velocity_at_sea_floor"},
|
52
|
+
"v_b": {"var_name": "water_v_bottom", "standard_name": "northward_sea_water_velocity_at_sea_floor"},
|
53
|
+
"temp_b": {"var_name": "water_temp_bottom", "standard_name": "sea_water_potential_temperature_at_sea_floor"},
|
54
|
+
"salt_b": {"var_name": "salinity_bottom", "standard_name": "sea_water_salinity_at_sea_floor"},
|
55
|
+
}
|
56
|
+
# ----------------------------------------------
|
57
|
+
# time resolution
|
58
|
+
data_info = {"yearly": {}, "monthly": {}, "daily": {}, "hourly": {}}
|
59
|
+
|
60
|
+
# hourly data
|
61
|
+
# dataset: GLBv0.08, GLBu0.08, GLBy0.08
|
62
|
+
data_info["hourly"]["dataset"] = {"GLBv0.08": {}, "GLBu0.08": {}, "GLBy0.08": {}, "ESPC_D": {}}
|
63
|
+
|
64
|
+
# version
|
65
|
+
# version of GLBv0.08: 53.X, 56.3, 57.2, 92.8, 57.7, 92.9, 93.0
|
66
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"] = {"53.X": {}, "56.3": {}, "57.2": {}, "92.8": {}, "57.7": {}, "92.9": {}, "93.0": {}}
|
67
|
+
# version of GLBu0.08: 93.0
|
68
|
+
data_info["hourly"]["dataset"]["GLBu0.08"]["version"] = {"93.0": {}}
|
69
|
+
# version of GLBy0.08: 93.0
|
70
|
+
data_info["hourly"]["dataset"]["GLBy0.08"]["version"] = {"93.0": {}}
|
71
|
+
# version of ESPC_D: V02
|
72
|
+
data_info["hourly"]["dataset"]["ESPC_D"]["version"] = {"V02": {}}
|
73
|
+
|
74
|
+
# info details
|
75
|
+
# time range
|
76
|
+
# GLBv0.08
|
77
|
+
# 在网页上提交超过范围的时间,会返回该数据集实际时间范围,从而纠正下面的时间范围
|
78
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["time_range"] = {"time_start": "1994010112", "time_end": "2015123109"}
|
79
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["time_range"] = {"time_start": "2014070112", "time_end": "2016093009"}
|
80
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["time_range"] = {"time_start": "2016050112", "time_end": "2017020109"}
|
81
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["time_range"] = {"time_start": "2017020112", "time_end": "2017060109"}
|
82
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["time_range"] = {"time_start": "2017060112", "time_end": "2017100109"}
|
83
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["time_range"] = {"time_start": "2017100112", "time_end": "2018032009"}
|
84
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018010112", "time_end": "2020021909"}
|
85
|
+
# GLBu0.08
|
86
|
+
data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018091912", "time_end": "2018120909"}
|
87
|
+
# GLBy0.08
|
88
|
+
data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["time_range"] = {"time_start": "2018120412", "time_end": "2024090509"}
|
89
|
+
# ESPC-D
|
90
|
+
data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["time_range"] = {"time_start": "2024081012", "time_end": "2030010100"}
|
91
|
+
|
92
|
+
# classification method
|
93
|
+
# year_different: the data of different years is stored in different files
|
94
|
+
# same_path: the data of different years is stored in the same file
|
95
|
+
# var_different: the data of different variables is stored in different files
|
96
|
+
# var_year_different: the data of different variables and years is stored in different files
|
97
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["classification"] = "year_different"
|
98
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["classification"] = "same_path"
|
99
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["classification"] = "same_path"
|
100
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["classification"] = "var_different"
|
101
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["classification"] = "same_path"
|
102
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["classification"] = "var_different"
|
103
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["classification"] = "var_different"
|
104
|
+
data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["classification"] = "var_different"
|
105
|
+
data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["classification"] = "var_year_different"
|
106
|
+
data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["classification"] = "single_var_year_different"
|
107
|
+
|
108
|
+
# download info
|
109
|
+
# base url
|
110
|
+
# GLBv0.08 53.X
|
111
|
+
url_53x = {}
|
112
|
+
for y_53x in range(1994, 2016):
|
113
|
+
# r'https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/2013?'
|
114
|
+
url_53x[str(y_53x)] = rf"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_53.X/data/{y_53x}?"
|
115
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["53.X"]["url"] = url_53x
|
116
|
+
# GLBv0.08 56.3
|
117
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["56.3"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_56.3?"
|
118
|
+
# GLBv0.08 57.2
|
119
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.2"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.2?"
|
120
|
+
# GLBv0.08 92.8
|
121
|
+
url_928 = {
|
122
|
+
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/uv3z?",
|
123
|
+
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ts3z?",
|
124
|
+
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.8/ssh?",
|
125
|
+
}
|
126
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.8"]["url"] = url_928
|
127
|
+
# GLBv0.08 57.7
|
128
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["57.7"]["url"] = r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_57.7?"
|
129
|
+
# GLBv0.08 92.9
|
130
|
+
url_929 = {
|
131
|
+
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/uv3z?",
|
132
|
+
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ts3z?",
|
133
|
+
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_92.9/ssh?",
|
134
|
+
}
|
135
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["92.9"]["url"] = url_929
|
136
|
+
# GLBv0.08 93.0
|
137
|
+
url_930_v = {
|
138
|
+
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/uv3z?",
|
139
|
+
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ts3z?",
|
140
|
+
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBv0.08/expt_93.0/ssh?",
|
141
|
+
}
|
142
|
+
data_info["hourly"]["dataset"]["GLBv0.08"]["version"]["93.0"]["url"] = url_930_v
|
143
|
+
# GLBu0.08 93.0
|
144
|
+
url_930_u = {
|
145
|
+
"uv3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/uv3z?",
|
146
|
+
"ts3z": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ts3z?",
|
147
|
+
"ssh": r"https://ncss.hycom.org/thredds/ncss/GLBu0.08/expt_93.0/ssh?",
|
148
|
+
}
|
149
|
+
data_info["hourly"]["dataset"]["GLBu0.08"]["version"]["93.0"]["url"] = url_930_u
|
150
|
+
# GLBy0.08 93.0
|
151
|
+
uv3z_930_y = {}
|
152
|
+
ts3z_930_y = {}
|
153
|
+
ssh_930_y = {}
|
154
|
+
for y_930_y in range(2018, 2025):
|
155
|
+
uv3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/uv3z/{y_930_y}?"
|
156
|
+
ts3z_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ts3z/{y_930_y}?"
|
157
|
+
ssh_930_y[str(y_930_y)] = rf"https://ncss.hycom.org/thredds/ncss/GLBy0.08/expt_93.0/ssh/{y_930_y}?"
|
158
|
+
# GLBy0.08 93.0 data time range in each year: year-01-01 12:00 to year+1-01-01 09:00
|
159
|
+
url_930_y = {
|
160
|
+
"uv3z": uv3z_930_y,
|
161
|
+
"ts3z": ts3z_930_y,
|
162
|
+
"ssh": ssh_930_y,
|
163
|
+
}
|
164
|
+
data_info["hourly"]["dataset"]["GLBy0.08"]["version"]["93.0"]["url"] = url_930_y
|
165
|
+
# ESPC-D-V02
|
166
|
+
u3z_espc_d_v02_y = {}
|
167
|
+
v3z_espc_d_v02_y = {}
|
168
|
+
t3z_espc_d_v02_y = {}
|
169
|
+
s3z_espc_d_v02_y = {}
|
170
|
+
ssh_espc_d_v02_y = {}
|
171
|
+
for y_espc_d_v02 in range(2024, 2030):
|
172
|
+
u3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/u3z/{y_espc_d_v02}?"
|
173
|
+
v3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/v3z/{y_espc_d_v02}?"
|
174
|
+
t3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/t3z/{y_espc_d_v02}?"
|
175
|
+
s3z_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/s3z/{y_espc_d_v02}?"
|
176
|
+
ssh_espc_d_v02_y[str(y_espc_d_v02)] = rf"https://ncss.hycom.org/thredds/ncss/ESPC-D-V02/ssh/{y_espc_d_v02}?"
|
177
|
+
url_espc_d_v02_y = {
|
178
|
+
"u3z": u3z_espc_d_v02_y,
|
179
|
+
"v3z": v3z_espc_d_v02_y,
|
180
|
+
"t3z": t3z_espc_d_v02_y,
|
181
|
+
"s3z": s3z_espc_d_v02_y,
|
182
|
+
"ssh": ssh_espc_d_v02_y,
|
183
|
+
}
|
184
|
+
data_info["hourly"]["dataset"]["ESPC_D"]["version"]["V02"]["url"] = url_espc_d_v02_y
|
185
|
+
# ----------------------------------------------
|
186
|
+
var_group = {
|
187
|
+
"uv3z": ["u", "v", "u_b", "v_b"],
|
188
|
+
"ts3z": ["temp", "salt", "temp_b", "salt_b"],
|
189
|
+
"ssh": ["ssh"],
|
190
|
+
}
|
191
|
+
# ----------------------------------------------
|
192
|
+
single_var_group = {
|
193
|
+
"u3z": ["u"],
|
194
|
+
"v3z": ["v"],
|
195
|
+
"t3z": ["temp"],
|
196
|
+
"s3z": ["salt"],
|
197
|
+
"ssh": ["ssh"],
|
198
|
+
}
|
199
|
+
|
200
|
+
return variable_info, data_info, var_group, single_var_group
|
195
201
|
|
196
202
|
|
197
203
|
def draw_time_range(pic_save_folder=None):
|
@@ -556,79 +562,6 @@ def clear_existing_file(file_full_path):
|
|
556
562
|
print(f"{file_full_path} has been removed")
|
557
563
|
|
558
564
|
|
559
|
-
def find_file(parent_path, fname, mode="path"):
|
560
|
-
"""
|
561
|
-
description:
|
562
|
-
param {*} parent_path: The parent path where the files are located
|
563
|
-
param {*} fname: The file name pattern to search for
|
564
|
-
param {*} mode: 'path' to return the full path of the files, 'file' to return only the file names
|
565
|
-
return {*} A list of file paths or file names if files are found, None otherwise
|
566
|
-
"""
|
567
|
-
|
568
|
-
def natural_sort_key(s):
|
569
|
-
"""生成一个用于自然排序的键"""
|
570
|
-
return [int(text) if text.isdigit() else text.lower() for text in re.split("([0-9]+)", s)]
|
571
|
-
|
572
|
-
# 将parent_path和fname结合成完整的搜索路径
|
573
|
-
search_pattern = os.path.join(str(parent_path), fname)
|
574
|
-
|
575
|
-
# 使用glob模块查找所有匹配的文件
|
576
|
-
matched_files = glob.glob(search_pattern)
|
577
|
-
|
578
|
-
# 如果没有找到任何文件,则返回False
|
579
|
-
if not matched_files:
|
580
|
-
return None
|
581
|
-
|
582
|
-
# 在find_files函数中替换natsorted调用
|
583
|
-
matched_files = sorted(matched_files, key=natural_sort_key)
|
584
|
-
|
585
|
-
# 根据mode参数决定返回的内容
|
586
|
-
if mode == "file":
|
587
|
-
# 只返回文件名
|
588
|
-
result = [os.path.basename(file) for file in matched_files]
|
589
|
-
else: # 默认为'path'
|
590
|
-
# 返回文件的绝对路径
|
591
|
-
result = [os.path.abspath(file) for file in matched_files]
|
592
|
-
|
593
|
-
return result
|
594
|
-
|
595
|
-
|
596
|
-
def file_size(file_path, unit="KB"):
|
597
|
-
# 检查文件是否存在
|
598
|
-
if not os.path.exists(file_path):
|
599
|
-
return "文件不存在"
|
600
|
-
|
601
|
-
# 获取文件大小(字节)
|
602
|
-
file_size = os.path.getsize(file_path)
|
603
|
-
|
604
|
-
# 单位转换字典
|
605
|
-
unit_dict = {"PB": 1024**5, "TB": 1024**4, "GB": 1024**3, "MB": 1024**2, "KB": 1024}
|
606
|
-
|
607
|
-
# 检查传入的单位是否合法
|
608
|
-
if unit not in unit_dict:
|
609
|
-
return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
|
610
|
-
|
611
|
-
# 转换文件大小到指定单位
|
612
|
-
converted_size = file_size / unit_dict[unit]
|
613
|
-
|
614
|
-
return converted_size
|
615
|
-
|
616
|
-
|
617
|
-
# ** 计算文件夹下指定相关文件的平均大小
|
618
|
-
def mean_size(parent_path, fname):
|
619
|
-
flist = find_file(parent_path, fname)
|
620
|
-
if flist:
|
621
|
-
size_list = [file_size(f) for f in flist if file_size(f) != 0]
|
622
|
-
else:
|
623
|
-
size_list = []
|
624
|
-
if size_list:
|
625
|
-
min_size, max_size = min(size_list), max(size_list)
|
626
|
-
mean_size = sum(size_list) / len(size_list)
|
627
|
-
else:
|
628
|
-
mean_size, min_size, max_size = 0, 0, 0
|
629
|
-
return mean_size, min_size, max_size
|
630
|
-
|
631
|
-
|
632
565
|
def check_existing_file(file_full_path, min_size):
|
633
566
|
if os.path.exists(file_full_path):
|
634
567
|
print(f"[bold #FFA54F]{file_full_path} exists")
|
@@ -651,87 +584,28 @@ def check_existing_file(file_full_path, min_size):
|
|
651
584
|
return False
|
652
585
|
|
653
586
|
|
654
|
-
def get_ua():
|
655
|
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
656
|
-
ua_file_txt = os.path.join(current_dir, "User_Agent-list.txt")
|
657
|
-
|
658
|
-
with open(ua_file_txt, "r") as f:
|
659
|
-
ua_list = f.readlines()
|
660
|
-
# 去掉换行符和空行
|
661
|
-
ua_list = [line.strip() for line in ua_list if line.strip()]
|
662
|
-
|
663
|
-
# if current_platform == 'Linux':
|
664
|
-
# ua_list = [line for line in ua_list if 'Linux' in line]
|
665
|
-
|
666
|
-
return random.choice(ua_list)
|
667
|
-
|
668
|
-
|
669
|
-
def get_proxy_file():
|
670
|
-
# 获取当前脚本的绝对路径
|
671
|
-
script_dir = os.path.dirname(os.path.abspath(__file__))
|
672
|
-
# 构建ip.txt的绝对路径
|
673
|
-
ip_file_txt = os.path.join(script_dir, "ip.txt")
|
674
|
-
with open(ip_file_txt, "r") as f:
|
675
|
-
ips = f.readlines()
|
676
|
-
ip_list = []
|
677
|
-
for ip in ips:
|
678
|
-
ip_list.append(ip.strip())
|
679
|
-
choose_ip = random.choice(ip_list)
|
680
|
-
proxies = {"http": "http://" + choose_ip, "https": "https://" + choose_ip}
|
681
|
-
# print(f'Using proxy: {proxies}')
|
682
|
-
return proxies
|
683
|
-
|
684
|
-
|
685
|
-
def scrape_and_categorize_proxies(choose_protocol="http"):
|
686
|
-
url = "https://topproxylinks.com/"
|
687
|
-
# 发送HTTP请求获取网页内容
|
688
|
-
response = requests.get(url)
|
689
|
-
# 使用BeautifulSoup解析网页
|
690
|
-
soup = BeautifulSoup(response.text, "html.parser")
|
691
|
-
|
692
|
-
# 初始化字典来存储不同协议的代理
|
693
|
-
proxies_dict = {"http": [], "socks4": [], "socks5": []}
|
694
|
-
|
695
|
-
# 查找表格中的所有行
|
696
|
-
tbody = soup.find("tbody")
|
697
|
-
|
698
|
-
if tbody:
|
699
|
-
for row in tbody.find_all("tr"):
|
700
|
-
# 提取协议、代理和国家的单元格
|
701
|
-
cells = row.find_all("td")
|
702
|
-
protocol = cells[0].text.strip().lower()
|
703
|
-
proxy = cells[1].text.strip()
|
704
|
-
|
705
|
-
# 根据协议分类存储代理
|
706
|
-
if protocol in proxies_dict:
|
707
|
-
proxies_dict[protocol].append(proxy)
|
708
|
-
|
709
|
-
if choose_protocol in proxies_dict:
|
710
|
-
proxies_list = proxies_dict[choose_protocol]
|
711
|
-
else:
|
712
|
-
proxies_list = proxies_dict["http"]
|
713
|
-
|
714
|
-
return proxies_list
|
715
|
-
|
716
|
-
|
717
|
-
def get_proxy():
|
718
|
-
ip_list = scrape_and_categorize_proxies(choose_protocol="http")
|
719
|
-
choose_ip = random.choice(ip_list)
|
720
|
-
proxies = {"http": f"http://{choose_ip}", "https": f"http://{choose_ip}"}
|
721
|
-
print(f"Using proxy: {proxies}")
|
722
|
-
return proxies
|
723
|
-
|
724
|
-
|
725
587
|
def download_file(target_url, store_path, file_name, check=False):
|
726
588
|
# Check if the file exists
|
727
589
|
fname = Path(store_path) / file_name
|
728
590
|
file_name_split = file_name.split("_")
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
591
|
+
file_name_split = file_name_split[:-1]
|
592
|
+
# same_file = f"{file_name_split[0]}_{file_name_split[1]}*nc"
|
593
|
+
same_file = "_".join(file_name_split) + "*nc"
|
594
|
+
|
595
|
+
if same_file not in fsize_dict.keys():
|
596
|
+
# print(f'Same file name: {same_file}')
|
597
|
+
fsize_dict[same_file] = {"size": 0, "count": 0}
|
598
|
+
|
599
|
+
if fsize_dict[same_file]["count"] < 30 or fsize_dict[same_file]["size"] == 0:
|
600
|
+
# 更新30次文件最小值,后续认为可以代表所有文件,不再更新占用时间
|
601
|
+
fsize_mean = mean_size(store_path, same_file, max_num=30)
|
602
|
+
set_min_size = fsize_mean * 0.8
|
603
|
+
fsize_dict[same_file]["size"] = set_min_size
|
604
|
+
fsize_dict[same_file]["count"] += 1
|
605
|
+
else:
|
606
|
+
set_min_size = fsize_dict[same_file]["size"]
|
733
607
|
if check:
|
734
|
-
if check_existing_file(fname, set_min_size
|
608
|
+
if check_existing_file(fname, set_min_size):
|
735
609
|
count_dict["skip"] += 1
|
736
610
|
return
|
737
611
|
clear_existing_file(fname)
|
@@ -744,13 +618,11 @@ def download_file(target_url, store_path, file_name, check=False):
|
|
744
618
|
request_times = 0
|
745
619
|
|
746
620
|
def calculate_wait_time(time_str, target_url):
|
747
|
-
import re
|
748
|
-
|
749
621
|
# 定义正则表达式,匹配YYYYMMDDHH格式的时间
|
750
622
|
time_pattern = r"\d{10}"
|
751
623
|
|
752
624
|
# 定义两个字符串
|
753
|
-
# str1 = '
|
625
|
+
# str1 = 'HYCOM_water_u_2018010100-2018010112.nc'
|
754
626
|
# str2 = 'HYCOM_water_u_2018010100.nc'
|
755
627
|
|
756
628
|
# 使用正则表达式查找时间
|
@@ -796,13 +668,8 @@ def download_file(target_url, store_path, file_name, check=False):
|
|
796
668
|
with open(filename, 'wb') as f:
|
797
669
|
f.write(response.content) """
|
798
670
|
|
799
|
-
|
800
|
-
proxies = get_proxy()
|
801
|
-
response = s.get(target_url, headers=headers, proxies=proxies, stream=True, timeout=random.randint(5, max_timeout))
|
802
|
-
else:
|
803
|
-
response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout)) # 启用流式传输
|
671
|
+
response = s.get(target_url, headers=headers, stream=True, timeout=random.randint(5, max_timeout)) # 启用流式传输
|
804
672
|
response.raise_for_status() # 如果请求返回的不是200,将抛出HTTPError异常
|
805
|
-
|
806
673
|
# 保存文件
|
807
674
|
with open(fname, "wb") as f:
|
808
675
|
print(f"[bold #96cbd7]Downloading {file_name}...")
|
@@ -923,7 +790,7 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
|
|
923
790
|
submit_url = get_submit_url_var(var, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
|
924
791
|
file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}.nc"
|
925
792
|
if download_time_end is not None:
|
926
|
-
file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}
|
793
|
+
file_name = f"HYCOM_{variable_info[var]['var_name']}_{download_time}-{download_time_end}.nc" # 这里时间不能用下划线,不然后续处理查找同一变量文件会出问题
|
927
794
|
download_file(submit_url, store_path, file_name, check)
|
928
795
|
else:
|
929
796
|
if download_time < "2024081012":
|
@@ -948,14 +815,14 @@ def prepare_url_to_download(var, lon_min=0, lon_max=359.92, lat_min=-80, lat_max
|
|
948
815
|
# file_name = f'HYCOM_{'-'.join([variable_info[v]["var_name"] for v in current_group])}_{download_time}.nc'
|
949
816
|
file_name = f"HYCOM_{key}_{download_time}.nc"
|
950
817
|
if download_time_end is not None:
|
951
|
-
file_name = f"HYCOM_{key}_{download_time}
|
818
|
+
file_name = f"HYCOM_{key}_{download_time}-{download_time_end}.nc" # 这里时间不能用下划线,不然后续处理查找同一变量文件会出问题
|
952
819
|
download_file(submit_url, store_path, file_name, check)
|
953
820
|
else:
|
954
821
|
for v in var:
|
955
822
|
submit_url = get_submit_url_var(v, depth, level_num, lon_min, lon_max, lat_min, lat_max, dataset_name, version_name, download_time, download_time_end)
|
956
823
|
file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}.nc"
|
957
824
|
if download_time_end is not None:
|
958
|
-
file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}
|
825
|
+
file_name = f"HYCOM_{variable_info[v]['var_name']}_{download_time}-{download_time_end}.nc"
|
959
826
|
download_file(submit_url, store_path, file_name, check)
|
960
827
|
|
961
828
|
|
@@ -1073,28 +940,30 @@ def download_hourly_func(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min
|
|
1073
940
|
def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, lat_max=90, depth=None, level=None, store_path=None, dataset_name=None, version_name=None, num_workers=None, check=False, ftimes=1):
|
1074
941
|
"""
|
1075
942
|
Description:
|
1076
|
-
|
943
|
+
Download the data of single time or a series of time
|
1077
944
|
|
1078
945
|
Parameters:
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
946
|
+
var: str or list, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
|
947
|
+
time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
|
948
|
+
time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21; default is None, if not set, the data of single time will be downloaded; or same as time_s, the data of single time will be downloaded
|
949
|
+
lon_min: float, the minimum longitude, default is 0
|
950
|
+
lon_max: float, the maximum longitude, default is 359.92
|
951
|
+
lat_min: float, the minimum latitude, default is -80
|
952
|
+
lat_max: float, the maximum latitude, default is 90
|
953
|
+
depth: float, the depth, default is None, if you wanna get the data of single depth, you can set the depth, suggest to set the depth in [0, 5000]
|
954
|
+
level: int, the level number, default is None, if you wanna get the data of single level, you can set the level, suggest to set the level in [1, 40]
|
955
|
+
store_path: str, the path to store the data, default is None, if not set, the data will be stored in the current working directory
|
956
|
+
dataset_name: str, the dataset name, default is None, example: 'GLBv0.08', 'GLBu0.08', 'GLBy0.08', if not set, the dataset will be chosen according to the download_time
|
957
|
+
version_name: str, the version name, default is None, example: '53.X', '56.3', if not set, the version will be chosen according to the download_time
|
958
|
+
num_workers: int, the number of workers, default is None, if not set, the number of workers will be 1; suggest not to set the number of workers too large
|
959
|
+
check: bool, whether to check the existing file, default is False, if set to True, the existing file will be checked and not downloaded again; else, the existing file will be covered
|
960
|
+
ftimes: int, the number of time in one file, default is 1, if set to 1, the data of single time will be downloaded; the maximum is 8, if set to 8, the data of 8 times will be downloaded in one file
|
1094
961
|
|
1095
962
|
Returns:
|
1096
|
-
|
963
|
+
None
|
1097
964
|
"""
|
965
|
+
get_initial_data()
|
966
|
+
|
1098
967
|
# 打印信息并处理数据集和版本名称
|
1099
968
|
if dataset_name is None and version_name is None:
|
1100
969
|
print("The dataset_name and version_name are None, so the dataset and version will be chosen according to the download_time.\nIf there is more than one dataset and version in the time range, the first one will be chosen.")
|
@@ -1154,8 +1023,8 @@ def download(var, time_s, time_e=None, lon_min=0, lon_max=359.92, lat_min=-80, l
|
|
1154
1023
|
""" global current_platform
|
1155
1024
|
current_platform = platform.system() """
|
1156
1025
|
|
1157
|
-
global
|
1158
|
-
|
1026
|
+
global fsize_dict
|
1027
|
+
fsize_dict = {}
|
1159
1028
|
|
1160
1029
|
download_hourly_func(var, time_s, time_e, lon_min, lon_max, lat_min, lat_max, depth, level, store_path, dataset_name, version_name, num_workers, check, ftimes)
|
1161
1030
|
|
@@ -1225,8 +1094,7 @@ def how_to_use():
|
|
1225
1094
|
|
1226
1095
|
|
1227
1096
|
if __name__ == "__main__":
|
1228
|
-
|
1229
|
-
time_s, time_e = "2024081012", "2024081115"
|
1097
|
+
time_s, time_e = "2024101012", "2024101018"
|
1230
1098
|
merge_name = f"{time_s}_{time_e}" # 合并后的文件名
|
1231
1099
|
root_path = r"G:\Data\HYCOM\3hourly_test"
|
1232
1100
|
location_dict = {"west": 105, "east": 130, "south": 15, "north": 45}
|
@@ -1256,7 +1124,7 @@ if __name__ == "__main__":
|
|
1256
1124
|
download_switch, single_var = True, False
|
1257
1125
|
combine_switch = False
|
1258
1126
|
copy_switch, copy_dir = False, r"G:\Data\HYCOM\3hourly"
|
1259
|
-
|
1127
|
+
|
1260
1128
|
# draw_time_range(pic_save_folder=r'I:\Delete')
|
1261
1129
|
|
1262
1130
|
if download_switch:
|
@@ -1288,6 +1156,6 @@ if __name__ == "__main__":
|
|
1288
1156
|
merge_path_name = Path(root_path)/f'HYCOM_{fname}_{merge_name}.nc'
|
1289
1157
|
if combine_switch:
|
1290
1158
|
# 这里的var_name必须是官方变量名,不能再是简写了
|
1291
|
-
|
1159
|
+
merge(file_list, var_name, 'time', merge_path_name)
|
1292
1160
|
if copy_switch:
|
1293
1161
|
copy_file(merge_path_name, copy_dir) """
|
oafuncs/oa_down/user_agent.py
CHANGED
oafuncs/oa_file.py
CHANGED
@@ -326,16 +326,31 @@ def file_size(file_path, unit="KB"):
|
|
326
326
|
|
327
327
|
|
328
328
|
# ** 计算文件夹下指定相关文件的平均大小
|
329
|
-
def mean_size(parent_path,fname):
|
329
|
+
def mean_size(parent_path,fname,max_num=None,unit="KB"):
|
330
|
+
"""
|
331
|
+
Description:
|
332
|
+
Calculate the average size of the specified related files in the folder
|
333
|
+
|
334
|
+
Parameters:
|
335
|
+
parent_path: The parent path where the files are located
|
336
|
+
fname: The file name pattern to search for
|
337
|
+
max_num: The maximum number of files to search for
|
338
|
+
unit: The unit of the file size, default is "KB"
|
339
|
+
|
340
|
+
Returns:
|
341
|
+
The average size
|
342
|
+
"""
|
330
343
|
flist = find_file(parent_path, fname)
|
331
344
|
if flist:
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
345
|
+
if max_num:
|
346
|
+
flist = flist[:int(max_num)]
|
347
|
+
size_list = [file_size(f,unit) for f in flist if file_size(f,unit) > 0]
|
348
|
+
if size_list:
|
349
|
+
return sum(size_list) / len(size_list)
|
350
|
+
else:
|
351
|
+
return 0.0
|
336
352
|
else:
|
337
|
-
|
338
|
-
return mean_size, min_size, max_size
|
353
|
+
return 0.0
|
339
354
|
|
340
355
|
|
341
356
|
if __name__ == "__main__":
|
oafuncs/oa_help.py
CHANGED
@@ -2,25 +2,25 @@ oafuncs/__init__.py,sha256=glcIlhQ9xSK4WtL58dq7Od2S3JPqsuEyhUQ-VWO8hOc,1426
|
|
2
2
|
oafuncs/oa_cmap.py,sha256=azVg9QR_IlG9lXCCXXVs1LS1kFci8yjxDmb_VA_TdTQ,7408
|
3
3
|
oafuncs/oa_data.py,sha256=21HC_7GVFAtU9AMYKGSSzY9J6_0Ju-5n8dJKwOOx5HI,15641
|
4
4
|
oafuncs/oa_draw.py,sha256=QypQp4vJIrbAyFddEVxd9K9Q4d85PRYqYQi9xDUmSZw,11150
|
5
|
-
oafuncs/oa_file.py,sha256=
|
6
|
-
oafuncs/oa_help.py,sha256=
|
5
|
+
oafuncs/oa_file.py,sha256=9b2uXTOqJqds5IhEqA_702G-qzyCZiguGY5JcT9CZ78,12728
|
6
|
+
oafuncs/oa_help.py,sha256=42xvmv6BSTyrKfQtW0bvedyv6ElhFJLMblq5jhziuB4,4076
|
7
7
|
oafuncs/oa_nc.py,sha256=m_80xWzoyY2niupfpTSvej1D_k4WvTnDYlnlYbIfqGI,17525
|
8
8
|
oafuncs/oa_python.py,sha256=Q-6UGGw_dJff7Ef8i87fsLPoGeHV5jBzfb-7HP4THR0,4018
|
9
|
-
oafuncs/data_store/OAFuncs.png,sha256=
|
9
|
+
oafuncs/data_store/OAFuncs.png,sha256=HZORbnBSRX0MZSLTGAZAPK24RBUTmihguMeG9YiU_So,3261697
|
10
10
|
oafuncs/oa_down/User_Agent-list.txt,sha256=pazxSip8_lphEBOPHG902zmIBUg8sBKXgmqp_g6j_E4,661062
|
11
11
|
oafuncs/oa_down/__init__.py,sha256=pKPqxD0z09NEXWCemuemfgTct7Kcu3APPJqqB1FPXRM,565
|
12
|
-
oafuncs/oa_down/hycom_3hourly.py,sha256=
|
12
|
+
oafuncs/oa_down/hycom_3hourly.py,sha256=Bt4MjcshhAyDckfFvdqxjNvzU7JuBVYCwvY8b1OPbPw,59501
|
13
13
|
oafuncs/oa_down/literature.py,sha256=Txv1YGSG-Z7m4o7FGHvXOR40EFxYozMsyM0-gy5CMEg,10086
|
14
14
|
oafuncs/oa_down/test_ua.py,sha256=0IQq3NjqfNr7KkyjS_U-a4mYu-r-E7gzawwo4IfEa6Y,10851
|
15
|
-
oafuncs/oa_down/user_agent.py,sha256=
|
15
|
+
oafuncs/oa_down/user_agent.py,sha256=TsPcAxFmMTYAEHRFjurI1bQBJfDhcA70MdHoUPwQmks,785
|
16
16
|
oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,563
|
17
17
|
oafuncs/oa_sign/meteorological.py,sha256=mLbupsZSq427HTfVbZMvIlFzDHwSzQAbK3X19o8anFY,6525
|
18
18
|
oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
|
19
19
|
oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
|
20
20
|
oafuncs/oa_tool/__init__.py,sha256=IKOlqpWlb4cMDCtq2VKR_RTxQHDNqR_vfqqsOsp_lKQ,466
|
21
21
|
oafuncs/oa_tool/email.py,sha256=4lJxV_KUzhxgLYfVwYTqp0qxRugD7fvsZkXDe5WkUKo,3052
|
22
|
-
oafuncs-0.0.
|
23
|
-
oafuncs-0.0.
|
24
|
-
oafuncs-0.0.
|
25
|
-
oafuncs-0.0.
|
26
|
-
oafuncs-0.0.
|
22
|
+
oafuncs-0.0.90.dist-info/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
|
23
|
+
oafuncs-0.0.90.dist-info/METADATA,sha256=s3X6lHw6yv20rd2528K-5cOk7zcYRGSIGYEg4SeIqqI,3321
|
24
|
+
oafuncs-0.0.90.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109
|
25
|
+
oafuncs-0.0.90.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
|
26
|
+
oafuncs-0.0.90.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|