oafuncs 0.0.60__py2.py3-none-any.whl → 0.0.62__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,332 @@
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ '''
4
+ Author: Liu Kun && 16031215@qq.com
5
+ Date: 2024-11-28 10:42:56
6
+ LastEditors: Liu Kun && 16031215@qq.com
7
+ LastEditTime: 2024-11-28 10:43:18
8
+ FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\literature.py
9
+ Description:
10
+ EditPlatform: vscode
11
+ ComputerInfo: XPS 15 9510
12
+ SystemInfo: Windows 11
13
+ Python Version: 3.12
14
+ '''
15
+
16
+
17
+ import os
18
+ import random
19
+ import re
20
+ import time
21
+ from pathlib import Path
22
+
23
+ import pandas as pd
24
+ import requests
25
+ from rich import print
26
+ from rich.progress import track
27
+
28
+ __all__ = ['download5doi']
29
+
30
+
31
+ def _get_file_size(file_path, unit='KB'):
32
+ # 检查文件是否存在
33
+ if not os.path.exists(file_path):
34
+ return "文件不存在"
35
+
36
+ # 获取文件大小(字节)
37
+ file_size = os.path.getsize(file_path)
38
+
39
+ # 单位转换字典
40
+ unit_dict = {
41
+ 'PB': 1024**5,
42
+ 'TB': 1024**4,
43
+ 'GB': 1024**3,
44
+ 'MB': 1024**2,
45
+ 'KB': 1024,
46
+ }
47
+
48
+ # 检查传入的单位是否合法
49
+ if unit not in unit_dict:
50
+ return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
51
+
52
+ # 转换文件大小到指定单位
53
+ converted_size = file_size / unit_dict[unit]
54
+
55
+ return converted_size
56
+
57
+
58
+ class _Downloader:
59
+ '''
60
+ 根据doi下载文献pdf
61
+ '''
62
+
63
+ def __init__(self, doi, store_path):
64
+ self.url_list = [r'https://sci-hub.se',
65
+ r'https://sci-hub.ren',
66
+ r'https://sci-hub.st',
67
+ r'https://sci-hub.ru',
68
+ ]
69
+ self.base_url = None
70
+ self.url = None
71
+ self.doi = doi
72
+ self.pdf_url = None
73
+ self.pdf_path = None
74
+ self.headers = {'User-Agent': self.get_ua().encode('utf-8')}
75
+ # 10.1175/1520-0493(1997)125<0742:IODAOO>2.0.CO;2.pdf
76
+ # self.fname = doi.replace(r'/', '_') + '.pdf'
77
+ self.fname = re.sub(r'[/<>:"?*|]', '_', doi) + '.pdf'
78
+ self.store_path = Path(store_path)
79
+ self.fpath = self.store_path / self.fname
80
+ self.wrong_record_file = self.store_path / 'wrong_record.txt'
81
+ self.sleep = 5
82
+ self.cookies = None
83
+ self.check_size = 50
84
+ self.url_index = 0
85
+ self.try_times_each_url_max = 3
86
+ self.try_times = 0
87
+
88
+ def get_ua(self):
89
+ ua_list = [
90
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
91
+ "Opera/8.0 (Windows NT 5.1; U; en)",
92
+ "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
93
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
94
+ "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
95
+ "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
96
+ "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
97
+ "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
98
+ "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
99
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
100
+ "Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
101
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
102
+ "MAC:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
103
+ "Windows:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
104
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
105
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
106
+ "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
107
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
108
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
109
+ "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
110
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
111
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
112
+ "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
113
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
114
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
115
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
116
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
117
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)"
118
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
119
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
120
+ "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
121
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
122
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
123
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
124
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
125
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
126
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36",
127
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
128
+ "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
129
+ "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
130
+ "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
131
+ "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
132
+ "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
133
+ "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
134
+ "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
135
+ "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
136
+ "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
137
+ "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
138
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",
139
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
140
+ "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
141
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
142
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
143
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
144
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
145
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
146
+ "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
147
+ "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
148
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
149
+ "UCWEB7.0.2.37/28/999",
150
+ "NOKIA5700/UCWEB7.0.2.37/28/999",
151
+ "Openwave/UCWEB7.0.2.37/28/999",
152
+ "Openwave/UCWEB7.0.2.37/28/999",
153
+ ]
154
+ ua_index = random.randint(0, len(ua_list)-1)
155
+ ua = ua_list[ua_index]
156
+ return ua
157
+
158
+ def get_pdf_url(self):
159
+ print('[bold #E6E6FA]-'*100)
160
+ print(f"DOI: {self.doi}")
161
+ print(f"Requesting: {self.url}...")
162
+ response = requests.get(self.url, headers=self.headers)
163
+ if response.status_code == 200:
164
+ self.cookies = response.cookies
165
+ text = response.text.replace('\\', '')
166
+ # text = text.replace(' ', '') # It is important to remove the space
167
+ # print(text)
168
+ pattern = re.compile(
169
+ r'onclick = "location.href=\'(.*?\.pdf\?download=true)\'"')
170
+ match = pattern.search(text)
171
+ if match:
172
+ got_url = match.group(1)
173
+ if r'http' not in got_url:
174
+ if got_url[:2] == '//':
175
+ self.pdf_url = 'https:' + got_url
176
+ else:
177
+ self.pdf_url = self.base_url + got_url
178
+ else:
179
+ self.pdf_url = got_url
180
+ print(f"URL: {self.pdf_url}")
181
+ else:
182
+ print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
183
+ self.try_times = self.try_times_each_url_max+1
184
+ else:
185
+ print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
186
+ print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
187
+ self.try_times = self.try_times_each_url_max+1
188
+
189
+ def url_iterate(self):
190
+ if self.url_index >= len(self.url_list):
191
+ return
192
+ url = self.url_list[self.url_index]
193
+ self.base_url = url
194
+ self.url = url + '/' + self.doi
195
+ self.get_pdf_url()
196
+ # for url in self.url_list:
197
+ # self.url = url + self.doi
198
+ # self.get_pdf_url()
199
+ # if self.pdf_url:
200
+ # break
201
+
202
+ def write_wrong_record(self):
203
+ with open(self.wrong_record_file, 'a') as f:
204
+ f.write(self.doi + '\n')
205
+
206
+ def download_pdf(self):
207
+ if self.fpath.exists():
208
+ fsize = _get_file_size(self.fpath, unit='KB')
209
+ if fsize < self.check_size:
210
+ # delete the wrong file
211
+ os.remove(self.fpath)
212
+ print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
213
+ else:
214
+ print('[bold #E6E6FA]-'*100)
215
+ print(f"[bold purple]The PDF file {self.fpath} already exists.")
216
+ return
217
+ self.url_index = 0
218
+ already_downloaded = False
219
+ self.try_times = 0
220
+ while not already_downloaded:
221
+ self.url_iterate()
222
+ if not self.pdf_url:
223
+ self.url_index += 1
224
+ if self.url_index >= len(self.url_list):
225
+ print("Failed to download the PDF file.")
226
+ self.write_wrong_record()
227
+ return
228
+ else:
229
+ self.try_times = 0
230
+ continue
231
+ else:
232
+ self.try_times += 1
233
+ if self.try_times > self.try_times_each_url_max:
234
+ self.url_index += 1
235
+ if self.url_index >= len(self.url_list):
236
+ # print("Failed to download the PDF file.")
237
+ self.write_wrong_record()
238
+ return
239
+ print(f"Downloading: {self.fname}...")
240
+ try:
241
+ response = requests.get(self.pdf_url, headers=self.headers, cookies=self.cookies)
242
+ if response.status_code == 200:
243
+ with open(self.fpath, 'wb') as f:
244
+ f.write(response.content)
245
+ fsize = _get_file_size(self.fpath, unit='KB')
246
+ if fsize < self.check_size:
247
+ # delete the wrong file
248
+ os.remove(self.fpath)
249
+ print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
250
+ else:
251
+ print(f"[bold green]Sucessful to download {self.fpath}")
252
+ already_downloaded = True
253
+ else:
254
+ self.try_times = self.try_times_each_url_max+1
255
+ print(f"Failed to download the PDF file. Status code: {response.status_code}")
256
+ print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
257
+ except Exception as e:
258
+ print(f"Failed to download the PDF file. Error: {e}")
259
+ time.sleep(self.sleep)
260
+ if self.try_times >= self.try_times_each_url_max:
261
+ self.url_index += 1
262
+ if self.url_index >= len(self.url_list):
263
+ print("\n[bold #CD5C5C]Failed to download the PDF file.")
264
+ self.write_wrong_record()
265
+ return
266
+ if self.try_times == self.try_times_each_url_max:
267
+ print(f'Tried {self.try_times} times for {self.url_list[self.url_index-1]}.')
268
+ print("Try another URL...")
269
+
270
+
271
+ def read_excel(file, col_name=r'DOI'):
272
+ df = pd.read_excel(file)
273
+ df_list = df[col_name].tolist()
274
+ # 去掉nan
275
+ df_list = [doi for doi in df_list if str(doi) != 'nan']
276
+ return df_list
277
+
278
+
279
+ def read_txt(file):
280
+ with open(file, 'r') as f:
281
+ lines = f.readlines()
282
+ # 去掉换行符以及空行
283
+ lines = [line.strip() for line in lines if line.strip()]
284
+ return lines
285
+
286
+
287
+ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None, col_name=r'DOI'):
288
+ '''
289
+ Description: Download PDF files by DOI.
290
+
291
+ Args:
292
+ store_path: str, The path to store the PDF files.
293
+ doi_list: list or str, The list of DOIs.
294
+ txt_file: str, The path of the txt file that contains the DOIs.
295
+ excel_file: str, The path of the excel file that contains the DOIs.
296
+ col_name: str, The column name of the DOIs in the excel file. Default is 'DOI'.
297
+
298
+ Returns:
299
+ None
300
+
301
+ Example:
302
+ download5doi(doi_list='10.3389/feart.2021.698876')
303
+ download5doi(store_path=r'I:\Delete\ref_pdf', doi_list='10.3389/feart.2021.698876')
304
+ download5doi(store_path=r'I:\Delete\ref_pdf', doi_list=['10.3389/feart.2021.698876', '10.3389/feart.2021.698876'])
305
+ download5doi(store_path=r'I:\Delete\ref_pdf', txt_file=r'I:\Delete\ref_pdf\wrong_record.txt')
306
+ download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx')
307
+ download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx', col_name='DOI')
308
+ '''
309
+ if not store_path:
310
+ store_path = Path.cwd()
311
+ else:
312
+ store_path = Path(str(store_path))
313
+ store_path.mkdir(parents=True, exist_ok=True)
314
+ store_path = str(store_path)
315
+
316
+ # 如果doi_list是str,转换为list
317
+ if isinstance(doi_list, str) and doi_list:
318
+ doi_list = [doi_list]
319
+ if txt_file:
320
+ doi_list = read_txt(txt_file)
321
+ if excel_file:
322
+ doi_list = read_excel(excel_file, col_name)
323
+ print(f"Downloading {len(doi_list)} PDF files...")
324
+ for doi in track(doi_list, description='Downloading...'):
325
+ download = _Downloader(doi, store_path)
326
+ download.download_pdf()
327
+
328
+
329
+ if __name__ == '__main__':
330
+ store_path = r'I:\Delete\ref_pdf'
331
+ # download5doi(store_path, doi_list='10.1007/s00382-022-06260-x')
332
+ download5doi(store_path, excel_file=r'I:\Delete\ref_pdf\savedrecs.xls')
@@ -4,8 +4,8 @@
4
4
  Author: Liu Kun && 16031215@qq.com
5
5
  Date: 2024-11-09 13:58:28
6
6
  LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2024-11-11 16:12:20
8
- FilePath: \\Python\\My_Funcs\\OAFuncs\\OAFuncs\\oa_down\\refs_pdf.py
7
+ LastEditTime: 2024-11-21 13:18:18
8
+ FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\refs_pdf.py
9
9
  Description:
10
10
  EditPlatform: vscode
11
11
  ComputerInfo: XPS 15 9510
@@ -41,7 +41,7 @@ def _get_file_size(file_path, unit='KB'):
41
41
  'TB': 1024**4,
42
42
  'GB': 1024**3,
43
43
  'MB': 1024**2,
44
- 'KB': 1024
44
+ 'KB': 1024,
45
45
  }
46
46
 
47
47
  # 检查传入的单位是否合法
@@ -178,14 +178,11 @@ class _Downloader:
178
178
  self.pdf_url = got_url
179
179
  print(f"URL: {self.pdf_url}")
180
180
  else:
181
- print(f'[bold #AFEEEE]The website {
182
- self.url_list[self.url_index]} do not inlcude the PDF file.')
181
+ print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
183
182
  self.try_times = self.try_times_each_url_max+1
184
183
  else:
185
- print(f"Failed to retrieve the webpage. Status code: {
186
- response.status_code}")
187
- print(f'[bold #AFEEEE]The website {
188
- self.url_list[self.url_index]} do not inlcude the PDF file.')
184
+ print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
185
+ print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
189
186
  self.try_times = self.try_times_each_url_max+1
190
187
 
191
188
  def url_iterate(self):
@@ -211,12 +208,10 @@ class _Downloader:
211
208
  if fsize < self.check_size:
212
209
  # delete the wrong file
213
210
  os.remove(self.fpath)
214
- print(f"[bold yellow]The PDF file {
215
- self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
211
+ print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
216
212
  else:
217
213
  print('[bold #E6E6FA]-'*100)
218
- print(f"[bold purple]The PDF file {
219
- self.fpath} already exists.")
214
+ print(f"[bold purple]The PDF file {self.fpath} already exists.")
220
215
  return
221
216
  self.url_index = 0
222
217
  already_downloaded = False
@@ -242,8 +237,7 @@ class _Downloader:
242
237
  return
243
238
  print(f"Downloading: {self.fname}...")
244
239
  try:
245
- response = requests.get(
246
- self.pdf_url, headers=self.headers, cookies=self.cookies)
240
+ response = requests.get(self.pdf_url, headers=self.headers, cookies=self.cookies)
247
241
  if response.status_code == 200:
248
242
  with open(self.fpath, 'wb') as f:
249
243
  f.write(response.content)
@@ -251,18 +245,14 @@ class _Downloader:
251
245
  if fsize < self.check_size:
252
246
  # delete the wrong file
253
247
  os.remove(self.fpath)
254
- print(f"[bold yellow]The PDF file {
255
- self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
248
+ print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
256
249
  else:
257
- print(f"[bold green]Sucessful to download {
258
- self.fpath}")
250
+ print(f"[bold green]Sucessful to download {self.fpath}")
259
251
  already_downloaded = True
260
252
  else:
261
253
  self.try_times = self.try_times_each_url_max+1
262
- print(f"Failed to download the PDF file. Status code: {
263
- response.status_code}")
264
- print(f'[bold #AFEEEE]The website {
265
- self.url_list[self.url_index]} do not inlcude the PDF file.')
254
+ print(f"Failed to download the PDF file. Status code: {response.status_code}")
255
+ print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
266
256
  except Exception as e:
267
257
  print(f"Failed to download the PDF file. Error: {e}")
268
258
  time.sleep(self.sleep)
@@ -273,8 +263,7 @@ class _Downloader:
273
263
  self.write_wrong_record()
274
264
  return
275
265
  if self.try_times == self.try_times_each_url_max:
276
- print(f'Tried {self.try_times} times for {
277
- self.url_list[self.url_index-1]}.')
266
+ print(f'Tried {self.try_times} times for {self.url_list[self.url_index-1]}.')
278
267
  print("Try another URL...")
279
268
 
280
269
 
@@ -316,6 +305,8 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
316
305
  download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx')
317
306
  download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx', col_name='DOI')
318
307
  '''
308
+ print('[bold #EE33fA]Note:\n 函数路径将改为oafuncs.oa_down.literature.download5doi,此路径将被弃用。')
309
+
319
310
  if not store_path:
320
311
  store_path = Path.cwd()
321
312
  else:
oafuncs/oa_draw.py CHANGED
@@ -4,8 +4,8 @@
4
4
  Author: Liu Kun && 16031215@qq.com
5
5
  Date: 2024-09-17 17:26:11
6
6
  LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2024-10-20 21:12:58
8
- FilePath: \\Python\\My_Funcs\\OAFuncs\\OAFuncs\\oa_draw.py
7
+ LastEditTime: 2024-11-21 13:10:47
8
+ FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_draw.py
9
9
  Description:
10
10
  EditPlatform: vscode
11
11
  ComputerInfo: XPS 15 9510
@@ -26,13 +26,13 @@ import xarray as xr
26
26
  from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter
27
27
  from mpl_toolkits.axes_grid1 import make_axes_locatable
28
28
 
29
- __all__ = ['create_gif', 'xy2lonlat', 'plot_contourf',
30
- 'plot_contourf_lonlat', 'plot_quiver', 'plot_contourf_cartopy']
29
+ __all__ = ['create_gif', 'xy2lonlat', 'plot_contourf', 'plot_contourf_lonlat', 'plot_quiver', 'plot_contourf_cartopy']
31
30
 
32
31
  warnings.filterwarnings('ignore')
33
32
 
34
-
35
33
  # ** 将生成图片/已有图片制作成动图
34
+
35
+
36
36
  def create_gif(image_list: list, gif_name: str, duration=0.2): # 制作动图,默认间隔0.2
37
37
  '''
38
38
  func : 制作动图,将已有图片拼接
@@ -69,8 +69,7 @@ def xy2lonlat(xy, lonlat='lon', decimal=2):
69
69
  # degrees = int(abs(x))
70
70
  degrees = round(abs(x), decimal)
71
71
  direction = "E" if x >= 0 else "W"
72
- out_list.append(
73
- f"{degrees:.{decimal}f}°{direction}" if x != 0 and x != 180 else f"{degrees}°")
72
+ out_list.append(f"{degrees:.{decimal}f}°{direction}" if x != 0 and x != 180 else f"{degrees}°")
74
73
  return out_list if len(out_list) > 1 else out_list[0]
75
74
 
76
75
  def format_latitude(y_list):
@@ -81,8 +80,7 @@ def xy2lonlat(xy, lonlat='lon', decimal=2):
81
80
  # degrees = int(abs(y))
82
81
  degrees = round(abs(y), decimal)
83
82
  direction = "N" if y >= 0 else "S"
84
- out_list.append(
85
- f"{degrees:.{decimal}f}°{direction}" if y != 0 else f"{degrees}°")
83
+ out_list.append(f"{degrees:.{decimal}f}°{direction}" if y != 0 else f"{degrees}°")
86
84
  return out_list if len(out_list) > 1 else out_list[0]
87
85
 
88
86
  if lonlat == 'lon':
@@ -107,12 +105,6 @@ class _MyFormatter(mpl.ticker.ScalarFormatter):
107
105
 
108
106
  def __call__(self, x, pos):
109
107
  if ((abs(x) < 1e-2) or (abs(x) > 1e4)) and x != 0:
110
- # if self.magnitude_max - self.magnitude_min == 1 and (int(math.modf(math.log10(abs(x)))[1]) == self.magnitude_min):
111
- # a, b = '{:.1e}'.format(x).split('e')
112
- # b = int(b)
113
- # return '${}{:.2f} \\times 10^{{{}}}$'.format(' ' if self.p_n and x > 0 else '', float(a)/10, b+1)
114
- # else:
115
- # return '${}{} \\times 10^{{{}}}$'.format(' ' if self.p_n and x > 0 else '', *'{:.2e}'.format(x).split('e'))
116
108
  if self.magnitude_max - self.magnitude_min == 1 and (int(math.modf(math.log10(abs(x)))[1]) == self.magnitude_min):
117
109
  a, b = '{:.1e}'.format(x).split('e')
118
110
  a = float(a) / 10
@@ -152,20 +144,16 @@ def plot_contourf(pic_data, picname=None, c_map='rainbow', minmax=None, labels=N
152
144
  flag = (value_min < 0) and (value_max > 0)
153
145
  norm = mpl.colors.TwoSlopeNorm(
154
146
  vmin=-1 * v_bry, vcenter=0, vmax=v_bry) if flag else mpl.colors.Normalize(vmin=value_min, vmax=value_max)
155
- cticks = [num for num in np.linspace(-1 * v_bry if flag else value_min,
156
- v_bry if flag else value_max, 9)] if value_min != value_max else None
157
- levels = np.linspace(-1 * v_bry, v_bry,
158
- 20) if flag else None if value_min == value_max else np.linspace(value_min, value_max, 20)
147
+ cticks = [num for num in np.linspace(-1 * v_bry if flag else value_min, v_bry if flag else value_max, 9)] if value_min != value_max else None
148
+ levels = np.linspace(-1 * v_bry, v_bry, 20) if flag else None if value_min == value_max else np.linspace(value_min, value_max, 20)
159
149
 
160
150
  shape = np.array(pic_data).shape
161
151
  x, y = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]))
162
152
 
163
153
  fig, ax = plt.subplots(figsize=figsize)
164
154
  flag_lc = levels is not None and cticks is not None
165
- CS = ax.contourf(x, y, pic_data, cmap=cmap, norm=norm, levels=levels, extend='both') if flag_lc else ax.contourf(
166
- x, y, pic_data, cmap=cmap, norm=norm, extend='both')
167
- cb = fig.colorbar(CS, ax=ax, orientation='vertical', shrink=1, format='%.3g', spacing='uniform', ticks=cticks) if cticks is not None else fig.colorbar(
168
- CS, ax=ax, orientation='vertical', shrink=1, format='%.3g', spacing='uniform')
155
+ CS = ax.contourf(x, y, pic_data, cmap=cmap, norm=norm, levels=levels, extend='both') if flag_lc else ax.contourf(x, y, pic_data, cmap=cmap, norm=norm, extend='both')
156
+ cb = fig.colorbar(CS, ax=ax, orientation='vertical', shrink=1, format='%.3g', spacing='uniform', ticks=cticks) if cticks is not None else fig.colorbar(CS, ax=ax, orientation='vertical', shrink=1, format='%.3g', spacing='uniform')
169
157
  """%.3g采用的是自动调整格式,也可设置为%.3f,则改为3位小数"""
170
158
 
171
159
  # 将格式化器设置为自定义的函数
@@ -232,10 +220,8 @@ def plot_contourf_lonlat(data, lon, lat, interval=5, picname=None, c_map='rainbo
232
220
  plt.contourf(data, cmap=c_map)
233
221
  x_space = int(len(lon) * interval / (lon[-1] - lon[0]))
234
222
  y_space = int(len(lat) * interval / (lat[-1] - lat[0]))
235
- plt.xticks(np.arange(0, len(lon), x_space), [
236
- format_longitude(lon[i]) for i in range(0, len(lon), x_space)])
237
- plt.yticks(np.arange(0, len(lat), y_space), [
238
- format_latitude(lat[i]) for i in range(0, len(lat), y_space)])
223
+ plt.xticks(np.arange(0, len(lon), x_space), [format_longitude(lon[i]) for i in range(0, len(lon), x_space)])
224
+ plt.yticks(np.arange(0, len(lat), y_space), [format_latitude(lat[i]) for i in range(0, len(lat), y_space)])
239
225
  plt.colorbar()
240
226
  plt.savefig(
241
227
  picname, bbox_inches='tight') if picname is not None else plt.show()
@@ -288,14 +274,12 @@ def plot_quiver(u, v, lon, lat, picname=None, cmap='coolwarm', scale=0.25, width
288
274
  cmap=cmap, # 矢量的颜色,多色
289
275
  width=width)
290
276
  # plt.quiverkey(quiver_plot, X=0.90, Y=0.975, U=1, label='1 m/s', labelpos='E', fontproperties={'size': 10})
291
- plt.quiverkey(quiver_plot, X=0.87, Y=0.975, U=mean_S,
292
- label=f'{mean_S:.2f} m/s', labelpos='E', fontproperties={'size': 10})
277
+ plt.quiverkey(quiver_plot, X=0.87, Y=0.975, U=mean_S, label=f'{mean_S:.2f} m/s', labelpos='E', fontproperties={'size': 10})
293
278
  plt.colorbar(quiver_plot)
294
279
  plt.xlabel('X')
295
280
  plt.ylabel('Y')
296
281
 
297
- plt.savefig(
298
- picname, bbox_inches='tight') if picname is not None else plt.show()
282
+ plt.savefig(picname, bbox_inches='tight') if picname is not None else plt.show()
299
283
  plt.clf()
300
284
  plt.close()
301
285
 
@@ -353,8 +337,7 @@ def plot_contourf_cartopy(data, lon, lat, picname=None, cmap='rainbow', cn_fill_
353
337
  cticks = cbar_ticks
354
338
  norm = mpl.colors.BoundaryNorm(cticks, cmap.N)
355
339
 
356
- cnplot = ax.contourf(X, Y, data, levels=levels, cmap=cmap,
357
- norm=norm, transform=proj, extend='both', alpha=1, zorder=0)
340
+ cnplot = ax.contourf(X, Y, data, levels=levels, cmap=cmap, norm=norm, transform=proj, extend='both', alpha=1, zorder=0)
358
341
  # cllevels = np.linspace(data_min, data_max, 9)
359
342
  # clplot = ax.contour(X, Y, data, levels=levels[9::10], colors='k', linewidths=0.5, transform=proj, zorder=1, alpha=0.8, linestyle='--')
360
343
  # 添加色标,并选择位置
@@ -376,15 +359,13 @@ def plot_contourf_cartopy(data, lon, lat, picname=None, cmap='rainbow', cn_fill_
376
359
  cax = divider.new_horizontal(size="5%", pad=0.1, axes_class=plt.Axes)
377
360
  fig.add_axes(cax)
378
361
  # cbar = plt.colorbar(cnplot, cax=cax, orientation='vertical', extend='both', format='%.0f')
379
- cbar = fig.colorbar(mpl.cm.ScalarMappable(cmap=cmap, norm=norm),
380
- cax=cax, orientation='vertical', extend='both', format='%.3f')
362
+ cbar = fig.colorbar(mpl.cm.ScalarMappable(cmap=cmap, norm=norm), cax=cax, orientation='vertical', extend='both', format='%.3f')
381
363
  cax.yaxis.set_ticks_position('right')
382
364
  cax.yaxis.set_label_position('right')
383
365
  else: # 上方
384
366
  cax = divider.new_vertical(size="5%", pad=0.2, axes_class=plt.Axes)
385
367
  fig.add_axes(cax)
386
- cbar = plt.colorbar(
387
- cnplot, cax=cax, orientation='horizontal', extend='both')
368
+ cbar = plt.colorbar(cnplot, cax=cax, orientation='horizontal', extend='both')
388
369
  cbar.ax.tick_params(labelsize=10)
389
370
  cbar.ax.xaxis.set_tick_params(direction='in', width=1, length=2)
390
371
  # 添加cbar_ticks
@@ -396,11 +377,9 @@ def plot_contourf_cartopy(data, lon, lat, picname=None, cmap='rainbow', cn_fill_
396
377
  # cbar.set_ticks(np.arange(round(levels[0]), round(levels[-1]), round((levels[-1]-levels[0])/9))) # 设置色标刻度
397
378
 
398
379
  # 单独设置label
399
- cbar.set_label(title, fontsize=10,
400
- weight='bold')
380
+ cbar.set_label(title, fontsize=10, weight='bold')
401
381
  # cax.set_position([0.1, 0.2, 0.02, 0.6]) # 调整色标位置
402
- fig.savefig(
403
- picname, bbox_inches='tight', dpi=600) if picname is not None else plt.show()
382
+ fig.savefig(picname, bbox_inches='tight', dpi=600) if picname is not None else plt.show()
404
383
  plt.close()
405
384
 
406
385
 
@@ -408,23 +387,20 @@ if __name__ == '__main__':
408
387
  # ** 绘制填色图
409
388
  data = np.random.randn(100, 100)
410
389
  picname = 'test.png'
411
- plot_contourf(data, picname, c_map='rainbow', minmax=None,
412
- labels=None, ticks_space=None, ticks=None, figsize=(12, 9))
390
+ plot_contourf(data, picname, c_map='rainbow', minmax=None, labels=None, ticks_space=None, ticks=None, figsize=(12, 9))
413
391
  # ** 绘制矢量场
414
392
  u = np.random.randn(100, 100)
415
393
  v = np.random.randn(100, 100)
416
394
  lon = np.linspace(0, 360, 100)
417
395
  lat = np.linspace(-90, 90, 100)
418
396
  picname = 'test.png'
419
- plot_quiver(u, v, lon, lat, picname, cmap='coolwarm',
420
- scale=0.25, width=0.002, x_space=5, y_space=5)
397
+ plot_quiver(u, v, lon, lat, picname, cmap='coolwarm', scale=0.25, width=0.002, x_space=5, y_space=5)
421
398
  # ** 绘制经纬度填色图
422
399
  data = np.random.randn(100, 100)
423
400
  lon = np.linspace(0, 360, 100)
424
401
  lat = np.linspace(-90, 90, 100)
425
402
  picname = 'test.png'
426
- plot_contourf_lonlat(data, lon, lat, interval=5,
427
- picname=picname, c_map='rainbow')
403
+ plot_contourf_lonlat(data, lon, lat, interval=5, picname=picname, c_map='rainbow')
428
404
  # ** 制作动图
429
405
  image_list = ['test1.png', 'test2.png', 'test3.png']
430
406
  gif_name = 'test.gif'