oafuncs 0.0.76__py2.py3-none-any.whl → 0.0.77__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/oa_cmap.py +73 -24
- oafuncs/oa_down/User_Agent-list.txt +59 -12
- oafuncs/oa_down/__init__.py +2 -3
- oafuncs/oa_down/test.py +24 -1
- oafuncs/oa_file.py +40 -4
- oafuncs/oa_nc.py +98 -36
- {oafuncs-0.0.76.dist-info → oafuncs-0.0.77.dist-info}/METADATA +9 -6
- {oafuncs-0.0.76.dist-info → oafuncs-0.0.77.dist-info}/RECORD +11 -12
- oafuncs/oa_down/refs_pdf.py +0 -338
- {oafuncs-0.0.76.dist-info → oafuncs-0.0.77.dist-info}/LICENSE.txt +0 -0
- {oafuncs-0.0.76.dist-info → oafuncs-0.0.77.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.76.dist-info → oafuncs-0.0.77.dist-info}/top_level.txt +0 -0
oafuncs/oa_cmap.py
CHANGED
@@ -1,24 +1,23 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# coding=utf-8
|
3
|
-
|
3
|
+
"""
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-09-17 16:55:11
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
7
|
LastEditTime: 2024-11-21 13:14:24
|
8
8
|
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_cmap.py
|
9
|
-
Description:
|
9
|
+
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
12
12
|
SystemInfo: Windows 11
|
13
13
|
Python Version: 3.11
|
14
|
-
|
15
|
-
|
14
|
+
"""
|
16
15
|
|
17
16
|
import matplotlib as mpl
|
18
17
|
import matplotlib.pyplot as plt
|
19
18
|
import numpy as np
|
20
19
|
|
21
|
-
__all__ = [
|
20
|
+
__all__ = ["show", "extract_colors", "create_custom", "create_diverging", "create_5rgb_txt", "my_cmap"]
|
22
21
|
|
23
22
|
# ** 将cmap用填色图可视化(官网摘抄函数)
|
24
23
|
|
@@ -42,20 +41,20 @@ def show(colormaps: list):
|
|
42
41
|
|
43
42
|
# ** 将cmap转为list,即多个颜色的列表
|
44
43
|
def extract_colors(cmap, n=256):
|
45
|
-
|
44
|
+
"""
|
46
45
|
cmap : cmap名称
|
47
46
|
n : 提取颜色数量
|
48
47
|
return : 提取的颜色列表
|
49
48
|
example : out_cmap = extract_colors('viridis', 256)
|
50
|
-
|
49
|
+
"""
|
51
50
|
c_map = mpl.colormaps.get_cmap(cmap)
|
52
51
|
out_cmap = [c_map(i) for i in np.linspace(0, 1, n)]
|
53
52
|
return out_cmap
|
54
53
|
|
55
54
|
|
56
55
|
# ** 自制cmap,多色,可带位置
|
57
|
-
def create_custom(colors: list, nodes=None): # 利用颜色快速配色
|
58
|
-
|
56
|
+
def create_custom(colors: list, nodes=None, under=None, over=None): # 利用颜色快速配色
|
57
|
+
"""
|
59
58
|
func : 自制cmap,自动确定颜色位置(等比例)
|
60
59
|
description : colors可以是颜色名称,也可以是十六进制颜色代码
|
61
60
|
param {*} colors 颜色
|
@@ -63,62 +62,112 @@ def create_custom(colors: list, nodes=None): # 利用颜色快速配色
|
|
63
62
|
return {*} c_map
|
64
63
|
example : c_map = mk_cmap(['#C2B7F3','#B3BBF2','#B0CBF1','#ACDCF0','#A8EEED'])
|
65
64
|
c_map = mk_cmap(['aliceblue','skyblue','deepskyblue'],[0.0,0.5,1.0])
|
66
|
-
|
65
|
+
"""
|
67
66
|
if nodes is None: # 采取自动分配比例
|
68
|
-
cmap_color = mpl.colors.LinearSegmentedColormap.from_list(
|
67
|
+
cmap_color = mpl.colors.LinearSegmentedColormap.from_list("mycmap", colors)
|
69
68
|
else: # 按照提供比例分配
|
70
69
|
cmap_color = mpl.colors.LinearSegmentedColormap.from_list("mycmap", list(zip(nodes, colors)))
|
70
|
+
if under is not None:
|
71
|
+
cmap_color.set_under(under)
|
72
|
+
if over is not None:
|
73
|
+
cmap_color.set_over(over)
|
71
74
|
return cmap_color
|
72
75
|
|
76
|
+
|
73
77
|
# ** 自制diverging型cmap,默认中间为白色
|
74
78
|
|
75
79
|
|
76
80
|
def create_diverging(colors: list):
|
77
|
-
|
81
|
+
"""
|
78
82
|
func : 自制cmap,双色,中间默认为白色;如果输入偶数个颜色,则中间为白,如果奇数个颜色,则中间色为中间色
|
79
83
|
description : colors可以是颜色名称,也可以是十六进制颜色代码
|
80
84
|
param {*} colors
|
81
85
|
return {*}
|
82
86
|
example : diverging_cmap = mk_cmap_diverging(["#00c0ff", "#a1d3ff", "#DCDCDC", "#FFD39B", "#FF8247"])
|
83
|
-
|
87
|
+
"""
|
84
88
|
# 自定义颜色位置
|
85
89
|
n = len(colors)
|
86
90
|
nodes = np.linspace(0.0, 1.0, n + 1 if n % 2 == 0 else n)
|
87
91
|
newcolors = colors
|
88
92
|
if n % 2 == 0:
|
89
|
-
newcolors.insert(int(n / 2),
|
93
|
+
newcolors.insert(int(n / 2), "#ffffff") # 偶数个颜色,中间为白色
|
90
94
|
cmap_color = mpl.colors.LinearSegmentedColormap.from_list("mycmap", list(zip(nodes, newcolors)))
|
91
95
|
return cmap_color
|
92
96
|
|
97
|
+
|
93
98
|
# ** 根据RGB的txt文档制作色卡(利用Grads调色盘)
|
94
99
|
|
95
100
|
|
96
101
|
def create_5rgb_txt(rgb_txt_filepath: str): # 根据RGB的txt文档制作色卡/根据rgb值制作
|
97
|
-
|
102
|
+
"""
|
98
103
|
func : 根据RGB的txt文档制作色卡
|
99
104
|
description : rgb_txt_filepath='E:/python/colorbar/test.txt'
|
100
105
|
param {*} rgb_txt_filepath txt文件路径
|
101
106
|
return {*} camp
|
102
107
|
example : cmap_color=dcmap(path)
|
103
|
-
|
108
|
+
"""
|
104
109
|
with open(rgb_txt_filepath) as fid:
|
105
110
|
data = fid.readlines()
|
106
111
|
n = len(data)
|
107
112
|
rgb = np.zeros((n, 3))
|
108
113
|
for i in np.arange(n):
|
109
|
-
rgb[i][0] = data[i].split(
|
110
|
-
rgb[i][1] = data[i].split(
|
111
|
-
rgb[i][2] = data[i].split(
|
114
|
+
rgb[i][0] = data[i].split(",")[0]
|
115
|
+
rgb[i][1] = data[i].split(",")[1]
|
116
|
+
rgb[i][2] = data[i].split(",")[2]
|
112
117
|
max_rgb = np.max(rgb)
|
113
118
|
if max_rgb > 2: # 如果rgb值大于2,则认为是0-255的值,需要归一化
|
114
119
|
rgb = rgb / 255.0
|
115
|
-
icmap = mpl.colors.ListedColormap(rgb, name=
|
120
|
+
icmap = mpl.colors.ListedColormap(rgb, name="my_color")
|
116
121
|
return icmap
|
117
122
|
|
118
123
|
|
119
|
-
|
124
|
+
def my_cmap(cmap_name=None, query=False):
|
125
|
+
"""
|
126
|
+
description: Choosing a colormap from the list of available colormaps or a custom colormap
|
127
|
+
param {*} cmap_name:
|
128
|
+
param {*} query:
|
129
|
+
return {*}
|
130
|
+
"""
|
131
|
+
|
132
|
+
my_cmap_dict = {
|
133
|
+
"diverging_1": create_custom(["#4e00b3", "#0000FF", "#00c0ff", "#a1d3ff", "#DCDCDC", "#FFD39B", "#FF8247", "#FF0000", "#FF5F9E"]),
|
134
|
+
"cold_1": create_custom(["#4e00b3", "#0000FF", "#00c0ff", "#a1d3ff", "#DCDCDC"]),
|
135
|
+
"warm_1": create_custom(["#DCDCDC", "#FFD39B", "#FF8247", "#FF0000", "#FF5F9E"]),
|
136
|
+
# "land_1": create_custom(["#3E6436", "#678A59", "#91A176", "#B8A87D", "#D9CBB2"], under="#A6CEE3", over="#FFFFFF"), # 陆地颜色从深绿到浅棕,表示从植被到沙地的递减
|
137
|
+
# "ocean_1": create_custom(["#126697", "#2D88B3", "#4EA1C9", "#78B9D8", "#A6CEE3"], under="#8470FF", over="#3E6436"), # 海洋颜色从深蓝到浅蓝,表示从深海到浅海的递减
|
138
|
+
# "ocean_land_1": create_custom(
|
139
|
+
# [
|
140
|
+
# "#126697", # 深蓝(深海)
|
141
|
+
# "#2D88B3", # 蓝
|
142
|
+
# "#4EA1C9", # 蓝绿
|
143
|
+
# "#78B9D8", # 浅蓝(浅海)
|
144
|
+
# "#A6CEE3", # 浅蓝(近岸)
|
145
|
+
# "#AAAAAA", # 灰色(0值,海平面)
|
146
|
+
# "#D9CBB2", # 沙质土壤色(陆地开始)
|
147
|
+
# "#B8A87D", # 浅棕
|
148
|
+
# "#91A176", # 浅绿
|
149
|
+
# "#678A59", # 中绿
|
150
|
+
# "#3E6436", # 深绿(高山)
|
151
|
+
# ]
|
152
|
+
# ),
|
153
|
+
"colorful_1": create_custom(["#6d00db", "#9800cb", "#F2003C", "#ff4500", "#ff7f00", "#FE28A2", "#FFC0CB", "#DDA0DD", "#40E0D0", "#1a66f2", "#00f7fb", "#8fff88", "#E3FF00"]),
|
154
|
+
}
|
155
|
+
if query:
|
156
|
+
for key, _ in my_cmap_dict.items():
|
157
|
+
print(key)
|
158
|
+
|
159
|
+
if cmap_name in my_cmap_dict:
|
160
|
+
return my_cmap_dict[cmap_name]
|
161
|
+
else:
|
162
|
+
try:
|
163
|
+
return mpl.cm.get_cmap(cmap_name)
|
164
|
+
except ValueError:
|
165
|
+
raise ValueError(f"Unknown cmap name: {cmap_name}")
|
166
|
+
|
167
|
+
|
168
|
+
if __name__ == "__main__":
|
120
169
|
# ** 测试自制cmap
|
121
|
-
colors = [
|
170
|
+
colors = ["#C2B7F3", "#B3BBF2", "#B0CBF1", "#ACDCF0", "#A8EEED"]
|
122
171
|
nodes = [0.0, 0.2, 0.4, 0.6, 1.0]
|
123
172
|
c_map = create_custom(colors, nodes)
|
124
173
|
show([c_map])
|
@@ -128,8 +177,8 @@ if __name__ == '__main__':
|
|
128
177
|
show([diverging_cmap])
|
129
178
|
|
130
179
|
# ** 测试根据RGB的txt文档制作色卡
|
131
|
-
file_path =
|
180
|
+
file_path = "E:/python/colorbar/test.txt"
|
132
181
|
cmap_color = create_5rgb_txt(file_path)
|
133
182
|
|
134
183
|
# ** 测试将cmap转为list
|
135
|
-
out_cmap = extract_colors(
|
184
|
+
out_cmap = extract_colors("viridis", 256)
|
@@ -9442,21 +9442,9 @@ Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.432
|
|
9442
9442
|
Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; (R1 1.6); .NET CLR 2.0.50727; TheWorld)
|
9443
9443
|
Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; SV1; TheWorld)
|
9444
9444
|
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; TheWorld)
|
9445
|
-
Webkit/1.1.8 (Linux; en_US) Uzbl
|
9446
|
-
Webkit/1.1.8 (Linux; en-us) Uzbl
|
9447
9445
|
Uzbl (X11; U; Arch Linux; de-DE) Webkit/1.1.10
|
9448
9446
|
Uzbl (X11; U; Arch Linux i686; de-DE) Webkit/1.1.10
|
9449
|
-
Vimprobable/0.9.20.5
|
9450
9447
|
Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1pre) Gecko/20090629 Vonkeror/1.0
|
9451
|
-
w3m/0.52
|
9452
|
-
w3m/0.5.2 (Linux i686; it; Debian-3.0.6-3)
|
9453
|
-
w3m/0.5.2 (Linux i686; en; Debian-3.0.6-3)
|
9454
|
-
w3m/0.5.2 (Debian-3.0.6-3)
|
9455
|
-
w3m/0.5.2
|
9456
|
-
w3m/0.5.1+cvs-1.968
|
9457
|
-
w3m/0.5.1
|
9458
|
-
w3m/0.2.1
|
9459
|
-
w3m/0.1.9
|
9460
9448
|
Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/534.12 (KHTML, like Gecko) WeltweitimnetzBrowser/0.25 Safari/534.12
|
9461
9449
|
Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/533.3 (KHTML, like Gecko) WeltweitimnetzBrowser/0.25 Safari/533.3
|
9462
9450
|
Mozilla/5.0 (Windows; U; Windows NT 5.1; pt-BR) AppleWebKit/532.4 (KHTML, like Gecko) WeltweitimnetzBrowser/0.25 Safari/532.4
|
@@ -9472,3 +9460,62 @@ Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.9) Gecko/2009042
|
|
9472
9460
|
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.9) Gecko/2009042318 Firefox/3.0.9 Wyzo/3.0.2
|
9473
9461
|
Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.8.1.6) Gecko/20070801 Firefox/2.0 Wyzo/0.5.3
|
9474
9462
|
Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070801 Firefox/2.0 Wyzo/0.5.3
|
9463
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60
|
9464
|
+
Opera/8.0 (Windows NT 5.1; U; en)
|
9465
|
+
Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50
|
9466
|
+
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50
|
9467
|
+
Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11
|
9468
|
+
Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11
|
9469
|
+
Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10
|
9470
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0
|
9471
|
+
Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10
|
9472
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1
|
9473
|
+
Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1
|
9474
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2
|
9475
|
+
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36
|
9476
|
+
Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50
|
9477
|
+
Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5
|
9478
|
+
Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5
|
9479
|
+
Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5
|
9480
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36
|
9481
|
+
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11
|
9482
|
+
Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16
|
9483
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11
|
9484
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36
|
9485
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko
|
9486
|
+
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)
|
9487
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11
|
9488
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER
|
9489
|
+
Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)
|
9490
|
+
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)
|
9491
|
+
Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)
|
9492
|
+
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)
|
9493
|
+
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0
|
9494
|
+
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)
|
9495
|
+
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)
|
9496
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36
|
9497
|
+
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11
|
9498
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36
|
9499
|
+
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36
|
9500
|
+
Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5
|
9501
|
+
Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5
|
9502
|
+
Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5
|
9503
|
+
Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5
|
9504
|
+
Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1
|
9505
|
+
Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1
|
9506
|
+
MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1
|
9507
|
+
Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10
|
9508
|
+
Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13
|
9509
|
+
Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+
|
9510
|
+
Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0
|
9511
|
+
Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;
|
9512
|
+
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)
|
9513
|
+
Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)
|
9514
|
+
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
|
9515
|
+
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)
|
9516
|
+
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)
|
9517
|
+
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)
|
9518
|
+
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)
|
9519
|
+
Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1
|
9520
|
+
Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124
|
9521
|
+
Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)
|
oafuncs/oa_down/__init__.py
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-09-17 16:09:20
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\
|
7
|
+
LastEditTime: 2024-12-01 19:49:40
|
8
|
+
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\__init__.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
@@ -18,4 +18,3 @@ Python Version: 3.11
|
|
18
18
|
|
19
19
|
from .hycom_3hourly import *
|
20
20
|
from .literature import *
|
21
|
-
from .refs_pdf import * # 在2024/12/31之后删除此脚本
|
oafuncs/oa_down/test.py
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-12-01 19:32:25
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-12-01 19:
|
7
|
+
LastEditTime: 2024-12-01 19:50:32
|
8
8
|
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\test.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
@@ -16,6 +16,24 @@ Python Version: 3.12
|
|
16
16
|
import os
|
17
17
|
import random
|
18
18
|
|
19
|
+
txtfile = r'E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\User_Agent-list.txt'
|
20
|
+
|
21
|
+
with open(txtfile, 'r') as f:
|
22
|
+
lines = f.readlines()
|
23
|
+
# 去掉换行符和空行
|
24
|
+
lines = [line.strip() for line in lines if line.strip()]
|
25
|
+
new_line = []
|
26
|
+
for i in range(len(lines)):
|
27
|
+
if '/' in lines[i]:
|
28
|
+
new_line.append(lines[i])
|
29
|
+
else:
|
30
|
+
print(lines[i])
|
31
|
+
|
32
|
+
newtxtfile = r'E:\Code\Python\My_Funcs\OAFuncs\oafuncs\oa_down\ua_list_new.txt'
|
33
|
+
""" with open(newtxtfile, 'w') as f:
|
34
|
+
for line in new_line:
|
35
|
+
f.write(line + '\n') """
|
36
|
+
|
19
37
|
|
20
38
|
def get_ua():
|
21
39
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
@@ -99,6 +117,11 @@ def get_ua_org():
|
|
99
117
|
"Openwave/UCWEB7.0.2.37/28/999",
|
100
118
|
|
101
119
|
]
|
120
|
+
with open(newtxtfile, 'w') as f:
|
121
|
+
for line in ua_list:
|
122
|
+
f.write(line + '\n')
|
102
123
|
# print(f'Using User-Agent: {ua}')
|
103
124
|
ua = random.choice(ua_list)
|
104
125
|
return ua
|
126
|
+
|
127
|
+
# get_ua_org()
|
oafuncs/oa_file.py
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-09-17 15:07:13
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-
|
7
|
+
LastEditTime: 2024-12-02 10:33:19
|
8
8
|
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_file.py
|
9
9
|
Description:
|
10
10
|
EditPlatform: vscode
|
@@ -19,7 +19,43 @@ import os
|
|
19
19
|
import re
|
20
20
|
import shutil
|
21
21
|
|
22
|
-
__all__ = ['link_file', 'copy_file', '
|
22
|
+
__all__ = ['find_file', 'link_file', 'copy_file', 'rename_file', 'make_folder', 'clear_folder', 'remove_empty_folders', 'remove', 'file_size']
|
23
|
+
|
24
|
+
|
25
|
+
def find_file(parent_path, fname, mode='path'):
|
26
|
+
'''
|
27
|
+
description:
|
28
|
+
param {*} parent_path: The parent path where the files are located
|
29
|
+
param {*} fname: The file name pattern to search for
|
30
|
+
param {*} mode: 'path' to return the full path of the files, 'file' to return only the file names
|
31
|
+
return {*} A list of file paths or file names if files are found, None otherwise
|
32
|
+
'''
|
33
|
+
def natural_sort_key(s):
|
34
|
+
"""生成一个用于自然排序的键"""
|
35
|
+
return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]
|
36
|
+
|
37
|
+
# 将parent_path和fname结合成完整的搜索路径
|
38
|
+
search_pattern = os.path.join(str(parent_path), fname)
|
39
|
+
|
40
|
+
# 使用glob模块查找所有匹配的文件
|
41
|
+
matched_files = glob.glob(search_pattern)
|
42
|
+
|
43
|
+
# 如果没有找到任何文件,则返回False
|
44
|
+
if not matched_files:
|
45
|
+
return None
|
46
|
+
|
47
|
+
# 在find_files函数中替换natsorted调用
|
48
|
+
matched_files = sorted(matched_files, key=natural_sort_key)
|
49
|
+
|
50
|
+
# 根据mode参数决定返回的内容
|
51
|
+
if mode == 'file':
|
52
|
+
# 只返回文件名
|
53
|
+
result = [os.path.basename(file) for file in matched_files]
|
54
|
+
else: # 默认为'path'
|
55
|
+
# 返回文件的绝对路径
|
56
|
+
result = [os.path.abspath(file) for file in matched_files]
|
57
|
+
|
58
|
+
return result
|
23
59
|
|
24
60
|
|
25
61
|
def link_file(src_pattern, dst):
|
@@ -113,14 +149,14 @@ def copy_file(src_pattern, dst):
|
|
113
149
|
print(f'复制文件或目录并重命名: {src_file} -> {dst_file}')
|
114
150
|
|
115
151
|
|
116
|
-
def
|
152
|
+
def rename_file(directory, old_str, new_str):
|
117
153
|
'''
|
118
154
|
# 描述:重命名目录下的文件,支持通配符
|
119
155
|
# 使用示例
|
120
156
|
directory_path = r"E:\windfarm\CROCO_FILES"
|
121
157
|
old_str = "croco"
|
122
158
|
new_str = "roms"
|
123
|
-
|
159
|
+
rename_file(directory_path, old_str, new_str)
|
124
160
|
param {*} directory # 目录
|
125
161
|
param {*} old_str # 要替换的字符串
|
126
162
|
param {*} new_str # 新字符串
|
oafuncs/oa_nc.py
CHANGED
@@ -1,18 +1,17 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# coding=utf-8
|
3
|
-
|
3
|
+
"""
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-09-17 14:58:50
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-
|
7
|
+
LastEditTime: 2024-12-06 14:16:56
|
8
8
|
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_nc.py
|
9
|
-
Description:
|
9
|
+
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
12
12
|
SystemInfo: Windows 11
|
13
13
|
Python Version: 3.11
|
14
|
-
|
15
|
-
|
14
|
+
"""
|
16
15
|
|
17
16
|
import os
|
18
17
|
|
@@ -20,16 +19,16 @@ import netCDF4 as nc
|
|
20
19
|
import numpy as np
|
21
20
|
import xarray as xr
|
22
21
|
|
23
|
-
__all__ = [
|
22
|
+
__all__ = ["get_var", "extract5nc", "write2nc", "merge5nc", "modify_var_value", "modify_var_attr", "rename_var_or_dim", "check_ncfile"]
|
24
23
|
|
25
24
|
|
26
25
|
def get_var(file, *vars):
|
27
|
-
|
26
|
+
"""
|
28
27
|
description: 读取nc文件中的变量
|
29
28
|
param {file: 文件路径, *vars: 变量名}
|
30
29
|
example: datas = get_var(file_ecm, 'h', 't', 'u', 'v')
|
31
30
|
return {datas: 变量数据}
|
32
|
-
|
31
|
+
"""
|
33
32
|
ds = xr.open_dataset(file)
|
34
33
|
datas = []
|
35
34
|
for var in vars:
|
@@ -40,7 +39,7 @@ def get_var(file, *vars):
|
|
40
39
|
|
41
40
|
|
42
41
|
def extract5nc(file, varname):
|
43
|
-
|
42
|
+
"""
|
44
43
|
描述:
|
45
44
|
1、提取nc文件中的变量
|
46
45
|
2、将相应维度提取,建立字典
|
@@ -49,7 +48,7 @@ def extract5nc(file, varname):
|
|
49
48
|
file: 文件路径
|
50
49
|
varname: 变量名
|
51
50
|
example: data, dimdict = extract5nc(file_ecm, 'h')
|
52
|
-
|
51
|
+
"""
|
53
52
|
ds = xr.open_dataset(file)
|
54
53
|
vardata = ds[varname]
|
55
54
|
dims = vardata.dims
|
@@ -63,22 +62,22 @@ def extract5nc(file, varname):
|
|
63
62
|
def _numpy_to_nc_type(numpy_type):
|
64
63
|
"""将NumPy数据类型映射到NetCDF数据类型"""
|
65
64
|
numpy_to_nc = {
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
65
|
+
"float32": "f4",
|
66
|
+
"float64": "f8",
|
67
|
+
"int8": "i1",
|
68
|
+
"int16": "i2",
|
69
|
+
"int32": "i4",
|
70
|
+
"int64": "i8",
|
71
|
+
"uint8": "u1",
|
72
|
+
"uint16": "u2",
|
73
|
+
"uint32": "u4",
|
74
|
+
"uint64": "u8",
|
76
75
|
}
|
77
|
-
return numpy_to_nc.get(str(numpy_type),
|
76
|
+
return numpy_to_nc.get(str(numpy_type), "f4") # 默认使用 'float32'
|
78
77
|
|
79
78
|
|
80
79
|
def write2nc(file, data, varname, coords, mode):
|
81
|
-
|
80
|
+
"""
|
82
81
|
description: 写入数据到nc文件
|
83
82
|
参数:
|
84
83
|
file: 文件路径
|
@@ -87,16 +86,16 @@ def write2nc(file, data, varname, coords, mode):
|
|
87
86
|
coords: 坐标,字典,键为维度名称,值为坐标数据
|
88
87
|
mode: 写入模式,'w'为写入,'a'为追加
|
89
88
|
example: write2nc(r'test.nc', data, 'data', {'time': np.linspace(0, 120, 100), 'lev': np.linspace(0, 120, 50)}, 'a')
|
90
|
-
|
89
|
+
"""
|
91
90
|
# 判断mode是写入还是追加
|
92
|
-
if mode ==
|
91
|
+
if mode == "w":
|
93
92
|
if os.path.exists(file):
|
94
93
|
os.remove(file)
|
95
94
|
print("Warning: File already exists. Deleting it.")
|
96
|
-
elif mode ==
|
95
|
+
elif mode == "a":
|
97
96
|
if not os.path.exists(file):
|
98
97
|
print("Warning: File doesn't exist. Creating a new file.")
|
99
|
-
mode =
|
98
|
+
mode = "w"
|
100
99
|
|
101
100
|
# 打开 NetCDF 文件
|
102
101
|
with nc.Dataset(file, mode, format="NETCDF4") as ncfile:
|
@@ -146,7 +145,72 @@ def write2nc(file, data, varname, coords, mode):
|
|
146
145
|
raise ValueError("Number of dimensions does not match the data shape.")
|
147
146
|
|
148
147
|
|
149
|
-
def merge5nc(file_list, var_name, dim_name, target_filename):
|
148
|
+
def merge5nc(file_list, var_name=None, dim_name=None, target_filename=None):
|
149
|
+
"""
|
150
|
+
批量提取 nc 文件中的变量,按照某一维度合并后写入新的 nc 文件。
|
151
|
+
如果 var_name 是字符串,则认为是单变量;如果是列表,且只有一个元素,也是单变量;
|
152
|
+
如果列表元素大于1,则是多变量;如果 var_name 是 None,则合并所有变量。
|
153
|
+
|
154
|
+
参数:
|
155
|
+
file_list:nc 文件路径列表
|
156
|
+
var_name:要提取的变量名或变量名列表,默认为 None
|
157
|
+
dim_name:用于合并的维度名
|
158
|
+
target_filename:合并后的目标文件名
|
159
|
+
"""
|
160
|
+
# 初始化变量名列表
|
161
|
+
var_names = None
|
162
|
+
|
163
|
+
# 判断 var_name 是单变量、多变量还是合并所有变量
|
164
|
+
if var_name is None:
|
165
|
+
# 获取第一个文件中的所有变量名
|
166
|
+
ds = xr.open_dataset(file_list[0])
|
167
|
+
var_names = list(ds.variables.keys())
|
168
|
+
ds.close()
|
169
|
+
elif isinstance(var_name, str):
|
170
|
+
var_names = [var_name]
|
171
|
+
elif isinstance(var_name, list):
|
172
|
+
var_names = var_name
|
173
|
+
else:
|
174
|
+
raise ValueError("var_name must be a string, a list of strings, or None")
|
175
|
+
|
176
|
+
# 初始化合并数据字典
|
177
|
+
merged_data = {}
|
178
|
+
|
179
|
+
# 遍历文件列表
|
180
|
+
for i, file in enumerate(file_list):
|
181
|
+
print(f"\rReading file {i + 1}/{len(file_list)}...", end="")
|
182
|
+
ds = xr.open_dataset(file)
|
183
|
+
for var_name in var_names:
|
184
|
+
var = ds[var_name]
|
185
|
+
# 如果变量包含合并维度,则合并它们
|
186
|
+
if dim_name in var.dims:
|
187
|
+
if var_name not in merged_data:
|
188
|
+
merged_data[var_name] = [var]
|
189
|
+
else:
|
190
|
+
merged_data[var_name].append(var)
|
191
|
+
# 如果变量不包含合并维度,则仅保留第一个文件中的值
|
192
|
+
else:
|
193
|
+
if var_name not in merged_data:
|
194
|
+
merged_data[var_name] = var
|
195
|
+
ds.close()
|
196
|
+
|
197
|
+
print("\nMerging data...")
|
198
|
+
for var_name in merged_data:
|
199
|
+
if isinstance(merged_data[var_name], list):
|
200
|
+
merged_data[var_name] = xr.concat(merged_data[var_name], dim=dim_name)
|
201
|
+
|
202
|
+
merged_data = xr.Dataset(merged_data)
|
203
|
+
|
204
|
+
print("Writing data to file...")
|
205
|
+
if os.path.exists(target_filename):
|
206
|
+
print("Warning: The target file already exists.")
|
207
|
+
print("Removing existing file...")
|
208
|
+
os.remove(target_filename)
|
209
|
+
merged_data.to_netcdf(target_filename)
|
210
|
+
print(f'File "{target_filename}" has been created.')
|
211
|
+
|
212
|
+
|
213
|
+
def merge5nc_his1(file_list, var_name, dim_name, target_filename):
|
150
214
|
"""
|
151
215
|
批量提取 nc 文件中的某一变量,按照某一维度合并后写入新的 nc 文件。
|
152
216
|
|
@@ -174,7 +238,7 @@ def merge5nc(file_list, var_name, dim_name, target_filename):
|
|
174
238
|
data.to_netcdf(target_filename)
|
175
239
|
|
176
240
|
|
177
|
-
def
|
241
|
+
def merge5nc_vars_his1(file_list, var_names, dim_name, target_filename):
|
178
242
|
"""
|
179
243
|
批量提取 nc 文件中的两个变量,按照某一维度合并后写入新的 nc 文件。
|
180
244
|
|
@@ -220,7 +284,7 @@ def modify_var_value(nc_file_path, variable_name, new_value):
|
|
220
284
|
"""
|
221
285
|
try:
|
222
286
|
# Open the NetCDF file
|
223
|
-
dataset = nc.Dataset(nc_file_path,
|
287
|
+
dataset = nc.Dataset(nc_file_path, "r+")
|
224
288
|
# Get the variable to be modified
|
225
289
|
variable = dataset.variables[variable_name]
|
226
290
|
# Modify the value of the variable
|
@@ -243,7 +307,7 @@ def modify_var_attr(nc_file_path, variable_name, attribute_name, attribute_value
|
|
243
307
|
example: modify_var_attr('test.nc', 'data', 'long_name', 'This is a test variable.')
|
244
308
|
"""
|
245
309
|
try:
|
246
|
-
ds = nc.Dataset(nc_file_path,
|
310
|
+
ds = nc.Dataset(nc_file_path, "r+")
|
247
311
|
if variable_name not in ds.variables:
|
248
312
|
raise ValueError(f"Variable '{variable_name}' not found in the NetCDF file.")
|
249
313
|
|
@@ -272,7 +336,7 @@ def rename_var_or_dim(ncfile_path, old_name, new_name):
|
|
272
336
|
example: rename_var_or_dim('test.nc', 'time', 'ocean_time')
|
273
337
|
"""
|
274
338
|
try:
|
275
|
-
with nc.Dataset(ncfile_path,
|
339
|
+
with nc.Dataset(ncfile_path, "r+") as dataset:
|
276
340
|
# If the old name is not found as a variable or dimension, print a message
|
277
341
|
if old_name not in dataset.variables and old_name not in dataset.dimensions:
|
278
342
|
print(f"Variable or dimension {old_name} not found in the file.")
|
@@ -299,7 +363,7 @@ def check_ncfile(ncfile, if_delete=False):
|
|
299
363
|
return False
|
300
364
|
|
301
365
|
try:
|
302
|
-
with nc.Dataset(ncfile,
|
366
|
+
with nc.Dataset(ncfile, "r") as f:
|
303
367
|
# 确保f被使用,这里我们检查文件中变量的数量
|
304
368
|
if len(f.variables) > 0:
|
305
369
|
return True
|
@@ -322,8 +386,6 @@ def check_ncfile(ncfile, if_delete=False):
|
|
322
386
|
return False
|
323
387
|
|
324
388
|
|
325
|
-
if __name__ ==
|
326
|
-
|
389
|
+
if __name__ == "__main__":
|
327
390
|
data = np.random.rand(100, 50)
|
328
|
-
write2nc(r
|
329
|
-
'data', {'time': np.linspace(0, 120, 100), 'lev': np.linspace(0, 120, 50)}, 'a')
|
391
|
+
write2nc(r"test.nc", data, "data", {"time": np.linspace(0, 120, 100), "lev": np.linspace(0, 120, 50)}, "a")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: oafuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.77
|
4
4
|
Summary: My short description for my project.
|
5
5
|
Home-page: https://github.com/Industry-Pays/OAFuncs
|
6
6
|
Author: Kun Liu
|
@@ -9,9 +9,6 @@ License: MIT
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
10
10
|
Classifier: Programming Language :: Python
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
|
-
Classifier: Programming Language :: Python :: 3.6
|
13
|
-
Classifier: Programming Language :: Python :: 3.7
|
14
|
-
Classifier: Programming Language :: Python :: 3.8
|
15
12
|
Classifier: Programming Language :: Python :: 3.9
|
16
13
|
Classifier: Programming Language :: Python :: 3.10
|
17
14
|
Classifier: Programming Language :: Python :: 3.11
|
@@ -267,6 +264,12 @@ oafuncs.oa_nc.write2nc(r'I:\test.nc', data,
|
|
267
264
|
|
268
265
|
- oa_file
|
269
266
|
|
267
|
+
- find_file
|
268
|
+
|
269
|
+
2024/12/02更新
|
270
|
+
|
271
|
+
查找满足条件的所有文件
|
272
|
+
|
270
273
|
- link_file
|
271
274
|
|
272
275
|
2024/10/28更新
|
@@ -279,9 +282,9 @@ oafuncs.oa_nc.write2nc(r'I:\test.nc', data,
|
|
279
282
|
|
280
283
|
复制文件
|
281
284
|
|
282
|
-
-
|
285
|
+
- rename_file
|
283
286
|
|
284
|
-
2024/
|
287
|
+
2024/12/02更新
|
285
288
|
|
286
289
|
按一定规则重命名文件(可多个)
|
287
290
|
|
@@ -1,25 +1,24 @@
|
|
1
1
|
oafuncs/__init__.py,sha256=2QiNjIIMtstD8y9HWlu23yiZGmmljkNUQknHEbnRwYI,673
|
2
|
-
oafuncs/oa_cmap.py,sha256=
|
2
|
+
oafuncs/oa_cmap.py,sha256=LnHI6vMCoFFkMq4P3RgItmJ01Kx5MjjwwlhnaqhRLKI,7242
|
3
3
|
oafuncs/oa_data.py,sha256=H9qZrUziOpc456iIL-1lBwSkBPApl2rlR-ajZg-mDMs,8119
|
4
4
|
oafuncs/oa_draw.py,sha256=K5B_otgx7Bu5P6ZYipNt9C-uRI1w9oxwY1M1F0-kGuM,17329
|
5
|
-
oafuncs/oa_file.py,sha256=
|
5
|
+
oafuncs/oa_file.py,sha256=iHgv0CTH4k_7YUnQ8-qQbLoz_f2lUmVhzGWQ2LkPFP8,11624
|
6
6
|
oafuncs/oa_help.py,sha256=ppNktmtNzs15R20MD1bM7yImlTQ_ngMwvoIglePOKXA,1000
|
7
|
-
oafuncs/oa_nc.py,sha256=
|
7
|
+
oafuncs/oa_nc.py,sha256=7KSONwujhElQo1dfuEE4EpVfaqnWyILdGvsdqpKrnMI,14666
|
8
8
|
oafuncs/oa_python.py,sha256=XPTP3o7zTFzfJR_YhsKfQksa3bSYwXsne9YxlJplCEA,3994
|
9
|
-
oafuncs/oa_down/User_Agent-list.txt,sha256=
|
10
|
-
oafuncs/oa_down/__init__.py,sha256=
|
9
|
+
oafuncs/oa_down/User_Agent-list.txt,sha256=j88ML0zwVibNj484ehurfZMX-PZ7G_1TwhwpcJZMIB0,884393
|
10
|
+
oafuncs/oa_down/__init__.py,sha256=nY5X7gM1jw7DJxyooR2UJSq4difkw-flz2Ucr_OuDbA,540
|
11
11
|
oafuncs/oa_down/hycom_3hourly.py,sha256=wVJgA8SBMr0RurTz7AkI8d5pJ4sj1oWl__cnSPATiCA,50343
|
12
12
|
oafuncs/oa_down/literature.py,sha256=dT3-7-beEzQ9mTP8LNV9Gf3q5Z1Pqqjc6FOS010HZeQ,17833
|
13
|
-
oafuncs/oa_down/
|
14
|
-
oafuncs/oa_down/test.py,sha256=9HTGKrQ8V2YVKsvBoVsuG7W-z8BurLGFBKUuf_LP94o,8256
|
13
|
+
oafuncs/oa_down/test.py,sha256=5sm73uduL0WO1GFv66ONIDLDAFavGz5qFoQpah5PbW8,8934
|
15
14
|
oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,563
|
16
15
|
oafuncs/oa_sign/meteorological.py,sha256=mLbupsZSq427HTfVbZMvIlFzDHwSzQAbK3X19o8anFY,6525
|
17
16
|
oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
|
18
17
|
oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
|
19
18
|
oafuncs/oa_tool/__init__.py,sha256=IKOlqpWlb4cMDCtq2VKR_RTxQHDNqR_vfqqsOsp_lKQ,466
|
20
19
|
oafuncs/oa_tool/email.py,sha256=4lJxV_KUzhxgLYfVwYTqp0qxRugD7fvsZkXDe5WkUKo,3052
|
21
|
-
oafuncs-0.0.
|
22
|
-
oafuncs-0.0.
|
23
|
-
oafuncs-0.0.
|
24
|
-
oafuncs-0.0.
|
25
|
-
oafuncs-0.0.
|
20
|
+
oafuncs-0.0.77.dist-info/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
|
21
|
+
oafuncs-0.0.77.dist-info/METADATA,sha256=66VNZ7aM1IFNy5H50VrBOMDlphZ0Mxr-gmBZVC6oAT4,22481
|
22
|
+
oafuncs-0.0.77.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109
|
23
|
+
oafuncs-0.0.77.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
|
24
|
+
oafuncs-0.0.77.dist-info/RECORD,,
|
oafuncs/oa_down/refs_pdf.py
DELETED
@@ -1,338 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# coding=utf-8
|
3
|
-
'''
|
4
|
-
Author: Liu Kun && 16031215@qq.com
|
5
|
-
Date: 2024-11-09 13:58:28
|
6
|
-
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime: 2024-11-30 20:29:51
|
8
|
-
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\refs_pdf.py
|
9
|
-
Description:
|
10
|
-
EditPlatform: vscode
|
11
|
-
ComputerInfo: XPS 15 9510
|
12
|
-
SystemInfo: Windows 11
|
13
|
-
Python Version: 3.12
|
14
|
-
'''
|
15
|
-
|
16
|
-
import os
|
17
|
-
import random
|
18
|
-
import re
|
19
|
-
import time
|
20
|
-
from pathlib import Path
|
21
|
-
|
22
|
-
import pandas as pd
|
23
|
-
import requests
|
24
|
-
from rich import print
|
25
|
-
from rich.progress import track
|
26
|
-
|
27
|
-
__all__ = ['download5doi']
|
28
|
-
|
29
|
-
|
30
|
-
def _get_file_size(file_path, unit='KB'):
|
31
|
-
# 检查文件是否存在
|
32
|
-
if not os.path.exists(file_path):
|
33
|
-
return "文件不存在"
|
34
|
-
|
35
|
-
# 获取文件大小(字节)
|
36
|
-
file_size = os.path.getsize(file_path)
|
37
|
-
|
38
|
-
# 单位转换字典
|
39
|
-
unit_dict = {
|
40
|
-
'PB': 1024**5,
|
41
|
-
'TB': 1024**4,
|
42
|
-
'GB': 1024**3,
|
43
|
-
'MB': 1024**2,
|
44
|
-
'KB': 1024,
|
45
|
-
}
|
46
|
-
|
47
|
-
# 检查传入的单位是否合法
|
48
|
-
if unit not in unit_dict:
|
49
|
-
return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
|
50
|
-
|
51
|
-
# 转换文件大小到指定单位
|
52
|
-
converted_size = file_size / unit_dict[unit]
|
53
|
-
|
54
|
-
return converted_size
|
55
|
-
|
56
|
-
|
57
|
-
class _Downloader:
|
58
|
-
'''
|
59
|
-
根据doi下载文献pdf
|
60
|
-
'''
|
61
|
-
|
62
|
-
def __init__(self, doi, store_path):
|
63
|
-
self.url_list = [r'https://sci-hub.se',
|
64
|
-
r'https://sci-hub.ren',
|
65
|
-
r'https://sci-hub.st',
|
66
|
-
r'https://sci-hub.ru',
|
67
|
-
]
|
68
|
-
self.base_url = None
|
69
|
-
self.url = None
|
70
|
-
self.doi = doi
|
71
|
-
self.pdf_url = None
|
72
|
-
self.pdf_path = None
|
73
|
-
self.headers = {'User-Agent': self.get_ua().encode('utf-8')}
|
74
|
-
# 10.1175/1520-0493(1997)125<0742:IODAOO>2.0.CO;2.pdf
|
75
|
-
# self.fname = doi.replace(r'/', '_') + '.pdf'
|
76
|
-
self.fname = re.sub(r'[/<>:"?*|]', '_', doi) + '.pdf'
|
77
|
-
self.store_path = Path(store_path)
|
78
|
-
self.fpath = self.store_path / self.fname
|
79
|
-
self.wrong_record_file = self.store_path / 'wrong_record.txt'
|
80
|
-
self.sleep = 5
|
81
|
-
self.cookies = None
|
82
|
-
self.check_size = 50
|
83
|
-
self.url_index = 0
|
84
|
-
self.try_times_each_url_max = 3
|
85
|
-
self.try_times = 0
|
86
|
-
|
87
|
-
def get_ua(self):
|
88
|
-
ua_list = [
|
89
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
|
90
|
-
"Opera/8.0 (Windows NT 5.1; U; en)",
|
91
|
-
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
|
92
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
|
93
|
-
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
|
94
|
-
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
|
95
|
-
"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
|
96
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
|
97
|
-
"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
|
98
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
|
99
|
-
"Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
|
100
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
|
101
|
-
"MAC:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
|
102
|
-
"Windows:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
|
103
|
-
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
104
|
-
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
105
|
-
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
106
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
|
107
|
-
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
|
108
|
-
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
|
109
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
110
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
|
111
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
|
112
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
|
113
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
|
114
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
|
115
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
|
116
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)"
|
117
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
|
118
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
|
119
|
-
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
|
120
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
|
121
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
|
122
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
|
123
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
124
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
|
125
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36",
|
126
|
-
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
127
|
-
"Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
128
|
-
"Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
|
129
|
-
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
130
|
-
"Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
131
|
-
"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
132
|
-
"MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
133
|
-
"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
|
134
|
-
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
|
135
|
-
"Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
|
136
|
-
"Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
|
137
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",
|
138
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
|
139
|
-
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
|
140
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
|
141
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
|
142
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
|
143
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
|
144
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
|
145
|
-
"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
146
|
-
"Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
|
147
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
|
148
|
-
"UCWEB7.0.2.37/28/999",
|
149
|
-
"NOKIA5700/UCWEB7.0.2.37/28/999",
|
150
|
-
"Openwave/UCWEB7.0.2.37/28/999",
|
151
|
-
"Openwave/UCWEB7.0.2.37/28/999",
|
152
|
-
]
|
153
|
-
ua_index = random.randint(0, len(ua_list)-1)
|
154
|
-
ua = ua_list[ua_index]
|
155
|
-
return ua
|
156
|
-
|
157
|
-
def get_pdf_url(self):
|
158
|
-
print('[bold #E6E6FA]-'*100)
|
159
|
-
print(f"DOI: {self.doi}")
|
160
|
-
print(f"Requesting: {self.url}...")
|
161
|
-
response = requests.get(self.url, headers=self.headers)
|
162
|
-
if response.status_code == 200:
|
163
|
-
self.cookies = response.cookies
|
164
|
-
text = response.text.replace('\\', '')
|
165
|
-
# text = text.replace(' ', '') # It is important to remove the space
|
166
|
-
# print(text)
|
167
|
-
pattern = re.compile(
|
168
|
-
r'onclick = "location.href=\'(.*?\.pdf\?download=true)\'"')
|
169
|
-
match = pattern.search(text)
|
170
|
-
if match:
|
171
|
-
got_url = match.group(1)
|
172
|
-
if r'http' not in got_url:
|
173
|
-
if got_url[:2] == '//':
|
174
|
-
self.pdf_url = 'https:' + got_url
|
175
|
-
else:
|
176
|
-
self.pdf_url = self.base_url + got_url
|
177
|
-
else:
|
178
|
-
self.pdf_url = got_url
|
179
|
-
print(f"URL: {self.pdf_url}")
|
180
|
-
else:
|
181
|
-
print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
|
182
|
-
self.try_times = self.try_times_each_url_max+1
|
183
|
-
else:
|
184
|
-
print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
|
185
|
-
print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
|
186
|
-
self.try_times = self.try_times_each_url_max+1
|
187
|
-
|
188
|
-
def url_iterate(self):
|
189
|
-
if self.url_index >= len(self.url_list):
|
190
|
-
return
|
191
|
-
url = self.url_list[self.url_index]
|
192
|
-
self.base_url = url
|
193
|
-
self.url = url + '/' + self.doi
|
194
|
-
self.get_pdf_url()
|
195
|
-
# for url in self.url_list:
|
196
|
-
# self.url = url + self.doi
|
197
|
-
# self.get_pdf_url()
|
198
|
-
# if self.pdf_url:
|
199
|
-
# break
|
200
|
-
|
201
|
-
def write_wrong_record(self):
|
202
|
-
with open(self.wrong_record_file, 'a') as f:
|
203
|
-
f.write(self.doi + '\n')
|
204
|
-
|
205
|
-
def download_pdf(self):
|
206
|
-
if self.fpath.exists():
|
207
|
-
fsize = _get_file_size(self.fpath, unit='KB')
|
208
|
-
if fsize < self.check_size:
|
209
|
-
# delete the wrong file
|
210
|
-
os.remove(self.fpath)
|
211
|
-
print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
|
212
|
-
else:
|
213
|
-
print('[bold #E6E6FA]-'*100)
|
214
|
-
print(f"[bold purple]The PDF file {self.fpath} already exists.")
|
215
|
-
return
|
216
|
-
self.url_index = 0
|
217
|
-
already_downloaded = False
|
218
|
-
self.try_times = 0
|
219
|
-
while not already_downloaded:
|
220
|
-
self.url_iterate()
|
221
|
-
if not self.pdf_url:
|
222
|
-
self.url_index += 1
|
223
|
-
if self.url_index >= len(self.url_list):
|
224
|
-
print("Failed to download the PDF file.")
|
225
|
-
self.write_wrong_record()
|
226
|
-
return
|
227
|
-
else:
|
228
|
-
self.try_times = 0
|
229
|
-
continue
|
230
|
-
else:
|
231
|
-
self.try_times += 1
|
232
|
-
if self.try_times > self.try_times_each_url_max:
|
233
|
-
self.url_index += 1
|
234
|
-
if self.url_index >= len(self.url_list):
|
235
|
-
# print("Failed to download the PDF file.")
|
236
|
-
self.write_wrong_record()
|
237
|
-
return
|
238
|
-
print(f"Downloading: {self.fname}...")
|
239
|
-
try:
|
240
|
-
response = requests.get(self.pdf_url, headers=self.headers, cookies=self.cookies)
|
241
|
-
if response.status_code == 200:
|
242
|
-
with open(self.fpath, 'wb') as f:
|
243
|
-
f.write(response.content)
|
244
|
-
fsize = _get_file_size(self.fpath, unit='KB')
|
245
|
-
if fsize < self.check_size:
|
246
|
-
# delete the wrong file
|
247
|
-
os.remove(self.fpath)
|
248
|
-
print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
|
249
|
-
else:
|
250
|
-
print(f"[bold green]Sucessful to download {self.fpath}")
|
251
|
-
already_downloaded = True
|
252
|
-
else:
|
253
|
-
self.try_times = self.try_times_each_url_max+1
|
254
|
-
print(f"Failed to download the PDF file. Status code: {response.status_code}")
|
255
|
-
print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
|
256
|
-
except Exception as e:
|
257
|
-
print(f"Failed to download the PDF file. Error: {e}")
|
258
|
-
time.sleep(self.sleep)
|
259
|
-
if self.try_times >= self.try_times_each_url_max:
|
260
|
-
self.url_index += 1
|
261
|
-
if self.url_index >= len(self.url_list):
|
262
|
-
print("\n[bold #CD5C5C]Failed to download the PDF file.")
|
263
|
-
self.write_wrong_record()
|
264
|
-
return
|
265
|
-
if self.try_times == self.try_times_each_url_max:
|
266
|
-
print(f'Tried {self.try_times} times for {self.url_list[self.url_index-1]}.')
|
267
|
-
print("Try another URL...")
|
268
|
-
|
269
|
-
|
270
|
-
def read_excel(file, col_name=r'DOI'):
|
271
|
-
df = pd.read_excel(file)
|
272
|
-
df_list = df[col_name].tolist()
|
273
|
-
# 去掉nan
|
274
|
-
df_list = [doi for doi in df_list if str(doi) != 'nan']
|
275
|
-
return df_list
|
276
|
-
|
277
|
-
|
278
|
-
def read_txt(file):
|
279
|
-
with open(file, 'r') as f:
|
280
|
-
lines = f.readlines()
|
281
|
-
# 去掉换行符以及空行
|
282
|
-
lines = [line.strip() for line in lines if line.strip()]
|
283
|
-
return lines
|
284
|
-
|
285
|
-
|
286
|
-
def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None, col_name=r'DOI'):
|
287
|
-
'''
|
288
|
-
Description: Download PDF files by DOI.
|
289
|
-
|
290
|
-
Args:
|
291
|
-
store_path: str, The path to store the PDF files.
|
292
|
-
doi_list: list or str, The list of DOIs.
|
293
|
-
txt_file: str, The path of the txt file that contains the DOIs.
|
294
|
-
excel_file: str, The path of the excel file that contains the DOIs.
|
295
|
-
col_name: str, The column name of the DOIs in the excel file. Default is 'DOI'.
|
296
|
-
|
297
|
-
Returns:
|
298
|
-
None
|
299
|
-
|
300
|
-
Example:
|
301
|
-
download5doi(doi_list='10.3389/feart.2021.698876')
|
302
|
-
download5doi(store_path=r'I:\Delete\ref_pdf', doi_list='10.3389/feart.2021.698876')
|
303
|
-
download5doi(store_path=r'I:\Delete\ref_pdf', doi_list=['10.3389/feart.2021.698876', '10.3389/feart.2021.698876'])
|
304
|
-
download5doi(store_path=r'I:\Delete\ref_pdf', txt_file=r'I:\Delete\ref_pdf\wrong_record.txt')
|
305
|
-
download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx')
|
306
|
-
download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx', col_name='DOI')
|
307
|
-
'''
|
308
|
-
print('[bold #EE33fA]Note:\n 升级0.0.62版本后,函数路径将改为oafuncs.oa_down.literature.download5doi,此路径将被弃用。')
|
309
|
-
print('[bold #EE33fA]Note:\n 升级0.0.62版本后,函数路径将改为oafuncs.oa_down.literature.download5doi,此路径将被弃用。')
|
310
|
-
print('[bold #EE33fA]Note:\n 升级0.0.62版本后,函数路径将改为oafuncs.oa_down.literature.download5doi,此路径将被弃用。')
|
311
|
-
print('[bold #EE33fA]Note:\n 升级0.0.62版本后,函数路径将改为oafuncs.oa_down.literature.download5doi,此路径将被弃用。')
|
312
|
-
print('[bold #EE33fA]Note:\n 升级0.0.62版本后,函数路径将改为oafuncs.oa_down.literature.download5doi,此路径将被弃用。')
|
313
|
-
print('[bold #EE33fA]Note:\n 升级0.0.62版本后,函数路径将改为oafuncs.oa_down.literature.download5doi,此路径将被弃用。')
|
314
|
-
|
315
|
-
if not store_path:
|
316
|
-
store_path = Path.cwd()
|
317
|
-
else:
|
318
|
-
store_path = Path(str(store_path))
|
319
|
-
store_path.mkdir(parents=True, exist_ok=True)
|
320
|
-
store_path = str(store_path)
|
321
|
-
|
322
|
-
# 如果doi_list是str,转换为list
|
323
|
-
if isinstance(doi_list, str) and doi_list:
|
324
|
-
doi_list = [doi_list]
|
325
|
-
if txt_file:
|
326
|
-
doi_list = read_txt(txt_file)
|
327
|
-
if excel_file:
|
328
|
-
doi_list = read_excel(excel_file, col_name)
|
329
|
-
print(f"Downloading {len(doi_list)} PDF files...")
|
330
|
-
for doi in track(doi_list, description='Downloading...'):
|
331
|
-
download = _Downloader(doi, store_path)
|
332
|
-
download.download_pdf()
|
333
|
-
|
334
|
-
|
335
|
-
if __name__ == '__main__':
|
336
|
-
store_path = r'I:\Delete\ref_pdf'
|
337
|
-
# download5doi(store_path, doi_list='10.1007/s00382-022-06260-x')
|
338
|
-
download5doi(store_path, excel_file=r'I:\Delete\ref_pdf\savedrecs.xls')
|
File without changes
|
File without changes
|
File without changes
|