oafuncs 0.0.61__py2.py3-none-any.whl → 0.0.63__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,4 +17,5 @@ Python Version: 3.11
17
17
  # from .meteorological_home import sign as meteorological_home
18
18
 
19
19
  from .hycom_3hourly import *
20
- from .refs_pdf import *
20
+ from .literature import *
21
+ from .refs_pdf import * # 在2024/12/31之后删除此脚本
@@ -4,7 +4,7 @@
4
4
  Author: Liu Kun && 16031215@qq.com
5
5
  Date: 2024-11-01 10:31:09
6
6
  LastEditors: Liu Kun && 16031215@qq.com
7
- LastEditTime: 2024-11-21 13:24:49
7
+ LastEditTime: 2024-11-28 16:04:50
8
8
  FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\hycom_3hourly.py
9
9
  Description:
10
10
  EditPlatform: vscode
@@ -69,12 +69,12 @@ variable_info = {
69
69
  'u': {'var_name': 'water_u', 'standard_name': 'eastward_sea_water_velocity'},
70
70
  'v': {'var_name': 'water_v', 'standard_name': 'northward_sea_water_velocity'},
71
71
  'temp': {'var_name': 'water_temp', 'standard_name': 'sea_water_potential_temperature'},
72
- 'salinity': {'var_name': 'salinity', 'standard_name': 'sea_water_salinity'},
72
+ 'salt': {'var_name': 'salinity', 'standard_name': 'sea_water_salinity'},
73
73
  'ssh': {'var_name': 'surf_el', 'standard_name': 'sea_surface_elevation'},
74
74
  'u_b': {'var_name': 'water_u_bottom', 'standard_name': 'eastward_sea_water_velocity_at_sea_floor'},
75
75
  'v_b': {'var_name': 'water_v_bottom', 'standard_name': 'northward_sea_water_velocity_at_sea_floor'},
76
76
  'temp_b': {'var_name': 'water_temp_bottom', 'standard_name': 'sea_water_potential_temperature_at_sea_floor'},
77
- 'salinity_b': {'var_name': 'salinity_bottom', 'standard_name': 'sea_water_salinity_at_sea_floor'},
77
+ 'salt_b': {'var_name': 'salinity_bottom', 'standard_name': 'sea_water_salinity_at_sea_floor'},
78
78
  }
79
79
 
80
80
  # classification method
@@ -305,7 +305,7 @@ def get_query_dict_single_depth(var, lon_min, lon_max, lat_min, lat_max, depth,
305
305
  query_dict = set_query_dict_no_vertical(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
306
306
  else:
307
307
  query_dict = set_query_dict_depth_or_level(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
308
- if var in ['u', 'v', 'temp', 'salinity']:
308
+ if var in ['u', 'v', 'temp', 'salt']:
309
309
  print('Please ensure the depth is in the range of 0-5000 m')
310
310
  query_dict['vertCoord'] = get_nearest_level_index(depth)+1
311
311
  return query_dict
@@ -317,7 +317,7 @@ def get_query_dict_single_level(var, lon_min, lon_max, lat_min, lat_max, level_n
317
317
  else:
318
318
  # level_num: 1-40
319
319
  query_dict = set_query_dict_depth_or_level(var, lon_min, lon_max, lat_min, lat_max, time_str_ymdh)
320
- if var in ['u', 'v', 'temp', 'salinity']:
320
+ if var in ['u', 'v', 'temp', 'salt']:
321
321
  print('Please ensure the level_num is in the range of 1-40')
322
322
  if level_num == 0:
323
323
  level_num = 1
@@ -397,21 +397,21 @@ def get_base_url(dataset_name, version_name, var, year_str):
397
397
  elif classification_method == 'var_different':
398
398
  if var in ['u', 'v', 'u_b', 'v_b']:
399
399
  base_url = url_dict['uv3z']
400
- elif var in ['temp', 'salinity', 'temp_b', 'salinity_b']:
400
+ elif var in ['temp', 'salt', 'temp_b', 'salt_b']:
401
401
  base_url = url_dict['ts3z']
402
402
  elif var in ['ssh']:
403
403
  base_url = url_dict['ssh']
404
404
  else:
405
- print('Please ensure the var is in [u,v,temp,salinity,ssh,u_b,v_b,temp_b,salinity_b]')
405
+ print('Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]')
406
406
  elif classification_method == 'var_year_different':
407
407
  if var in ['u', 'v', 'u_b', 'v_b']:
408
408
  base_url = url_dict['uv3z'][str(year_str)]
409
- elif var in ['temp', 'salinity', 'temp_b', 'salinity_b']:
409
+ elif var in ['temp', 'salt', 'temp_b', 'salt_b']:
410
410
  base_url = url_dict['ts3z'][str(year_str)]
411
411
  elif var in ['ssh']:
412
412
  base_url = url_dict['ssh'][str(year_str)]
413
413
  else:
414
- print('Please ensure the var is in [u,v,temp,salinity,ssh,u_b,v_b,temp_b,salinity_b]')
414
+ print('Please ensure the var is in [u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b]')
415
415
  return base_url
416
416
 
417
417
 
@@ -630,7 +630,7 @@ def convert_full_name_to_short_name(full_name):
630
630
  for var, info in variable_info.items():
631
631
  if full_name == info['var_name'] or full_name == info['standard_name'] or full_name == var:
632
632
  return var
633
- print('[bold #FFE4E1]Please ensure the var is in:\n[bold blue]u,v,temp,salinity,ssh,u_b,v_b,temp_b,salinity_b')
633
+ print('[bold #FFE4E1]Please ensure the var is in:\n[bold blue]u,v,temp,salt,ssh,u_b,v_b,temp_b,salt_b')
634
634
  print('or')
635
635
  print('[bold blue]water_u, water_v, water_temp, salinity, surf_el, water_u_bottom, water_v_bottom, water_temp_bottom, salinity_bottom')
636
636
  return False
@@ -655,7 +655,7 @@ def download(var, time_s, time_e, lon_min=0, lon_max=359.92, lat_min=-80, lat_ma
655
655
  Download the data of single time or a series of time
656
656
 
657
657
  Parameters:
658
- var: str, the variable name, such as 'u', 'v', 'temp', 'salinity', 'ssh', 'u_b', 'v_b', 'temp_b', 'salinity_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
658
+ var: str, the variable name, such as 'u', 'v', 'temp', 'salt', 'ssh', 'u_b', 'v_b', 'temp_b', 'salt_b' or 'water_u', 'water_v', 'water_temp', 'salinity', 'surf_el', 'water_u_bottom', 'water_v_bottom', 'water_temp_bottom', 'salinity_bottom'
659
659
  time_s: str, the start time, such as '2024110100' or '20241101', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
660
660
  time_e: str, the end time, such as '2024110221' or '20241102', if add hour, the hour should be 00, 03, 06, 09, 12, 15, 18, 21
661
661
  lon_min: float, the minimum longitude, default is 0
@@ -725,11 +725,11 @@ def how_to_use():
725
725
 
726
726
  # 2. Get the base url according to the dataset, version, var and year:
727
727
  # 2.1 Dataset and version were found in step 1
728
- # 2.2 Var: u, v, temp, salinity, ssh, u_b, v_b, temp_b, salinity_b
728
+ # 2.2 Var: u, v, temp, salt, ssh, u_b, v_b, temp_b, salt_b
729
729
  # 2.3 Year: 1994-2024(current year)
730
730
 
731
731
  # 3. Get the query_dict according to the var, lon_min, lon_max, lat_min, lat_max, depth, level_num, time_str_ymdh:
732
- # 3.1 Var: u, v, temp, salinity, ssh, u_b, v_b, temp_b, salinity_b
732
+ # 3.1 Var: u, v, temp, salt, ssh, u_b, v_b, temp_b, salt_b
733
733
  # 3.2 Lon_min, lon_max, lat_min, lat_max: float
734
734
  # 3.3 Depth: 0-5000m, if you wanna get single depth data, you can set the depth
735
735
  # 3.4 Level_num: 1-40, if you wanna get single level data, you can set the level_num
@@ -772,7 +772,7 @@ if __name__ == '__main__':
772
772
  'water_v': {'simple_name': 'v', 'download': 0},
773
773
  'surf_el': {'simple_name': 'ssh', 'download': 0},
774
774
  'water_temp': {'simple_name': 'temp', 'download': 0},
775
- 'salinity': {'simple_name': 'salinity', 'download': 1},
775
+ 'salinity': {'simple_name': 'salt', 'download': 1},
776
776
  }
777
777
 
778
778
  # set depth or level, only one can be True
@@ -0,0 +1,332 @@
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ '''
4
+ Author: Liu Kun && 16031215@qq.com
5
+ Date: 2024-11-28 10:42:56
6
+ LastEditors: Liu Kun && 16031215@qq.com
7
+ LastEditTime: 2024-11-28 10:43:18
8
+ FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\literature.py
9
+ Description:
10
+ EditPlatform: vscode
11
+ ComputerInfo: XPS 15 9510
12
+ SystemInfo: Windows 11
13
+ Python Version: 3.12
14
+ '''
15
+
16
+
17
+ import os
18
+ import random
19
+ import re
20
+ import time
21
+ from pathlib import Path
22
+
23
+ import pandas as pd
24
+ import requests
25
+ from rich import print
26
+ from rich.progress import track
27
+
28
+ __all__ = ['download5doi']
29
+
30
+
31
+ def _get_file_size(file_path, unit='KB'):
32
+ # 检查文件是否存在
33
+ if not os.path.exists(file_path):
34
+ return "文件不存在"
35
+
36
+ # 获取文件大小(字节)
37
+ file_size = os.path.getsize(file_path)
38
+
39
+ # 单位转换字典
40
+ unit_dict = {
41
+ 'PB': 1024**5,
42
+ 'TB': 1024**4,
43
+ 'GB': 1024**3,
44
+ 'MB': 1024**2,
45
+ 'KB': 1024,
46
+ }
47
+
48
+ # 检查传入的单位是否合法
49
+ if unit not in unit_dict:
50
+ return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
51
+
52
+ # 转换文件大小到指定单位
53
+ converted_size = file_size / unit_dict[unit]
54
+
55
+ return converted_size
56
+
57
+
58
+ class _Downloader:
59
+ '''
60
+ 根据doi下载文献pdf
61
+ '''
62
+
63
+ def __init__(self, doi, store_path):
64
+ self.url_list = [r'https://sci-hub.se',
65
+ r'https://sci-hub.ren',
66
+ r'https://sci-hub.st',
67
+ r'https://sci-hub.ru',
68
+ ]
69
+ self.base_url = None
70
+ self.url = None
71
+ self.doi = doi
72
+ self.pdf_url = None
73
+ self.pdf_path = None
74
+ self.headers = {'User-Agent': self.get_ua().encode('utf-8')}
75
+ # 10.1175/1520-0493(1997)125<0742:IODAOO>2.0.CO;2.pdf
76
+ # self.fname = doi.replace(r'/', '_') + '.pdf'
77
+ self.fname = re.sub(r'[/<>:"?*|]', '_', doi) + '.pdf'
78
+ self.store_path = Path(store_path)
79
+ self.fpath = self.store_path / self.fname
80
+ self.wrong_record_file = self.store_path / 'wrong_record.txt'
81
+ self.sleep = 5
82
+ self.cookies = None
83
+ self.check_size = 50
84
+ self.url_index = 0
85
+ self.try_times_each_url_max = 3
86
+ self.try_times = 0
87
+
88
+ def get_ua(self):
89
+ ua_list = [
90
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
91
+ "Opera/8.0 (Windows NT 5.1; U; en)",
92
+ "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
93
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
94
+ "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
95
+ "Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
96
+ "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
97
+ "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
98
+ "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
99
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
100
+ "Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
101
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
102
+ "MAC:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
103
+ "Windows:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
104
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
105
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
106
+ "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
107
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
108
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
109
+ "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
110
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
111
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
112
+ "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
113
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
114
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
115
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
116
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
117
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)"
118
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
119
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
120
+ "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
121
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
122
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
123
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
124
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
125
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
126
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36",
127
+ "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
128
+ "Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
129
+ "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
130
+ "Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
131
+ "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
132
+ "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
133
+ "MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
134
+ "Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
135
+ "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
136
+ "Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
137
+ "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
138
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",
139
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
140
+ "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
141
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
142
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
143
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
144
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
145
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
146
+ "Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
147
+ "Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
148
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
149
+ "UCWEB7.0.2.37/28/999",
150
+ "NOKIA5700/UCWEB7.0.2.37/28/999",
151
+ "Openwave/UCWEB7.0.2.37/28/999",
152
+ "Openwave/UCWEB7.0.2.37/28/999",
153
+ ]
154
+ ua_index = random.randint(0, len(ua_list)-1)
155
+ ua = ua_list[ua_index]
156
+ return ua
157
+
158
+ def get_pdf_url(self):
159
+ print('[bold #E6E6FA]-'*100)
160
+ print(f"DOI: {self.doi}")
161
+ print(f"Requesting: {self.url}...")
162
+ response = requests.get(self.url, headers=self.headers)
163
+ if response.status_code == 200:
164
+ self.cookies = response.cookies
165
+ text = response.text.replace('\\', '')
166
+ # text = text.replace(' ', '') # It is important to remove the space
167
+ # print(text)
168
+ pattern = re.compile(
169
+ r'onclick = "location.href=\'(.*?\.pdf\?download=true)\'"')
170
+ match = pattern.search(text)
171
+ if match:
172
+ got_url = match.group(1)
173
+ if r'http' not in got_url:
174
+ if got_url[:2] == '//':
175
+ self.pdf_url = 'https:' + got_url
176
+ else:
177
+ self.pdf_url = self.base_url + got_url
178
+ else:
179
+ self.pdf_url = got_url
180
+ print(f"URL: {self.pdf_url}")
181
+ else:
182
+ print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
183
+ self.try_times = self.try_times_each_url_max+1
184
+ else:
185
+ print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
186
+ print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
187
+ self.try_times = self.try_times_each_url_max+1
188
+
189
+ def url_iterate(self):
190
+ if self.url_index >= len(self.url_list):
191
+ return
192
+ url = self.url_list[self.url_index]
193
+ self.base_url = url
194
+ self.url = url + '/' + self.doi
195
+ self.get_pdf_url()
196
+ # for url in self.url_list:
197
+ # self.url = url + self.doi
198
+ # self.get_pdf_url()
199
+ # if self.pdf_url:
200
+ # break
201
+
202
+ def write_wrong_record(self):
203
+ with open(self.wrong_record_file, 'a') as f:
204
+ f.write(self.doi + '\n')
205
+
206
+ def download_pdf(self):
207
+ if self.fpath.exists():
208
+ fsize = _get_file_size(self.fpath, unit='KB')
209
+ if fsize < self.check_size:
210
+ # delete the wrong file
211
+ os.remove(self.fpath)
212
+ print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
213
+ else:
214
+ print('[bold #E6E6FA]-'*100)
215
+ print(f"[bold purple]The PDF file {self.fpath} already exists.")
216
+ return
217
+ self.url_index = 0
218
+ already_downloaded = False
219
+ self.try_times = 0
220
+ while not already_downloaded:
221
+ self.url_iterate()
222
+ if not self.pdf_url:
223
+ self.url_index += 1
224
+ if self.url_index >= len(self.url_list):
225
+ print("Failed to download the PDF file.")
226
+ self.write_wrong_record()
227
+ return
228
+ else:
229
+ self.try_times = 0
230
+ continue
231
+ else:
232
+ self.try_times += 1
233
+ if self.try_times > self.try_times_each_url_max:
234
+ self.url_index += 1
235
+ if self.url_index >= len(self.url_list):
236
+ # print("Failed to download the PDF file.")
237
+ self.write_wrong_record()
238
+ return
239
+ print(f"Downloading: {self.fname}...")
240
+ try:
241
+ response = requests.get(self.pdf_url, headers=self.headers, cookies=self.cookies)
242
+ if response.status_code == 200:
243
+ with open(self.fpath, 'wb') as f:
244
+ f.write(response.content)
245
+ fsize = _get_file_size(self.fpath, unit='KB')
246
+ if fsize < self.check_size:
247
+ # delete the wrong file
248
+ os.remove(self.fpath)
249
+ print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
250
+ else:
251
+ print(f"[bold green]Sucessful to download {self.fpath}")
252
+ already_downloaded = True
253
+ else:
254
+ self.try_times = self.try_times_each_url_max+1
255
+ print(f"Failed to download the PDF file. Status code: {response.status_code}")
256
+ print(f'[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.')
257
+ except Exception as e:
258
+ print(f"Failed to download the PDF file. Error: {e}")
259
+ time.sleep(self.sleep)
260
+ if self.try_times >= self.try_times_each_url_max:
261
+ self.url_index += 1
262
+ if self.url_index >= len(self.url_list):
263
+ print("\n[bold #CD5C5C]Failed to download the PDF file.")
264
+ self.write_wrong_record()
265
+ return
266
+ if self.try_times == self.try_times_each_url_max:
267
+ print(f'Tried {self.try_times} times for {self.url_list[self.url_index-1]}.')
268
+ print("Try another URL...")
269
+
270
+
271
+ def read_excel(file, col_name=r'DOI'):
272
+ df = pd.read_excel(file)
273
+ df_list = df[col_name].tolist()
274
+ # 去掉nan
275
+ df_list = [doi for doi in df_list if str(doi) != 'nan']
276
+ return df_list
277
+
278
+
279
+ def read_txt(file):
280
+ with open(file, 'r') as f:
281
+ lines = f.readlines()
282
+ # 去掉换行符以及空行
283
+ lines = [line.strip() for line in lines if line.strip()]
284
+ return lines
285
+
286
+
287
+ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None, col_name=r'DOI'):
288
+ '''
289
+ Description: Download PDF files by DOI.
290
+
291
+ Args:
292
+ store_path: str, The path to store the PDF files.
293
+ doi_list: list or str, The list of DOIs.
294
+ txt_file: str, The path of the txt file that contains the DOIs.
295
+ excel_file: str, The path of the excel file that contains the DOIs.
296
+ col_name: str, The column name of the DOIs in the excel file. Default is 'DOI'.
297
+
298
+ Returns:
299
+ None
300
+
301
+ Example:
302
+ download5doi(doi_list='10.3389/feart.2021.698876')
303
+ download5doi(store_path=r'I:\Delete\ref_pdf', doi_list='10.3389/feart.2021.698876')
304
+ download5doi(store_path=r'I:\Delete\ref_pdf', doi_list=['10.3389/feart.2021.698876', '10.3389/feart.2021.698876'])
305
+ download5doi(store_path=r'I:\Delete\ref_pdf', txt_file=r'I:\Delete\ref_pdf\wrong_record.txt')
306
+ download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx')
307
+ download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx', col_name='DOI')
308
+ '''
309
+ if not store_path:
310
+ store_path = Path.cwd()
311
+ else:
312
+ store_path = Path(str(store_path))
313
+ store_path.mkdir(parents=True, exist_ok=True)
314
+ store_path = str(store_path)
315
+
316
+ # 如果doi_list是str,转换为list
317
+ if isinstance(doi_list, str) and doi_list:
318
+ doi_list = [doi_list]
319
+ if txt_file:
320
+ doi_list = read_txt(txt_file)
321
+ if excel_file:
322
+ doi_list = read_excel(excel_file, col_name)
323
+ print(f"Downloading {len(doi_list)} PDF files...")
324
+ for doi in track(doi_list, description='Downloading...'):
325
+ download = _Downloader(doi, store_path)
326
+ download.download_pdf()
327
+
328
+
329
+ if __name__ == '__main__':
330
+ store_path = r'I:\Delete\ref_pdf'
331
+ # download5doi(store_path, doi_list='10.1007/s00382-022-06260-x')
332
+ download5doi(store_path, excel_file=r'I:\Delete\ref_pdf\savedrecs.xls')
@@ -305,6 +305,8 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
305
305
  download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx')
306
306
  download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx', col_name='DOI')
307
307
  '''
308
+ print('[bold #EE33fA]Note:\n 升级0.0.62版本后,函数路径将改为oafuncs.oa_down.literature.download5doi,此路径将被弃用。')
309
+
308
310
  if not store_path:
309
311
  store_path = Path.cwd()
310
312
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: oafuncs
3
- Version: 0.0.61
3
+ Version: 0.0.63
4
4
  Summary: My short description for my project.
5
5
  Home-page: https://github.com/Industry-Pays/OAFuncs
6
6
  Author: Kun Liu
@@ -144,7 +144,7 @@ oafuncs.oa_nc.write2nc(r'I:\test.nc', data,
144
144
 
145
145
  方便获取时间序列,间隔为hour
146
146
 
147
- - refs_pdf
147
+ - literature
148
148
 
149
149
  - download5doi
150
150
 
@@ -6,17 +6,18 @@ oafuncs/oa_file.py,sha256=rUfxlks9uR5B9_TBPJFnNswfDF0lRm_ZBDUwIjYtf9U,10250
6
6
  oafuncs/oa_help.py,sha256=ppNktmtNzs15R20MD1bM7yImlTQ_ngMwvoIglePOKXA,1000
7
7
  oafuncs/oa_nc.py,sha256=ALAYfqDy5lbUNJsTU29j6ZWkM4wgqQU3p2Fxn5pkvsQ,12102
8
8
  oafuncs/oa_python.py,sha256=XPTP3o7zTFzfJR_YhsKfQksa3bSYwXsne9YxlJplCEA,3994
9
- oafuncs/oa_down/__init__.py,sha256=s-XGVJ4z5m3pwIJGfifvkbSt_VFVqtrfFvP7fJxbs04,538
10
- oafuncs/oa_down/hycom_3hourly.py,sha256=nagKxShJOaDL0H7BcBSFkZ-OZeMAlirpiGgKXjyPTAA,43996
11
- oafuncs/oa_down/refs_pdf.py,sha256=g-XY5hBPRm15QT-dCyDt--vBCXyQ1cJ1VP_vb2_lCfE,17829
9
+ oafuncs/oa_down/__init__.py,sha256=a6rgxHQi8spvlI-TaVEqnrDNhYsKm5_IQf7ckAZ8U4w,603
10
+ oafuncs/oa_down/hycom_3hourly.py,sha256=5ssqWxIpmCF77ojguxUem2zGULTPmPKZjqPTgne29fg,43912
11
+ oafuncs/oa_down/literature.py,sha256=dT3-7-beEzQ9mTP8LNV9Gf3q5Z1Pqqjc6FOS010HZeQ,17833
12
+ oafuncs/oa_down/refs_pdf.py,sha256=wr1sIYO2FUBJTn-K79NylbRUvCAFaee6_fQljxSI_Q8,17979
12
13
  oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,563
13
14
  oafuncs/oa_sign/meteorological.py,sha256=mLbupsZSq427HTfVbZMvIlFzDHwSzQAbK3X19o8anFY,6525
14
15
  oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
15
16
  oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
16
17
  oafuncs/oa_tool/__init__.py,sha256=IKOlqpWlb4cMDCtq2VKR_RTxQHDNqR_vfqqsOsp_lKQ,466
17
18
  oafuncs/oa_tool/email.py,sha256=7EX3VkD8TxYAKPLOuC_yS104p9zbKilZlGfkVa2C6BQ,2947
18
- oafuncs-0.0.61.dist-info/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
19
- oafuncs-0.0.61.dist-info/METADATA,sha256=Vkq1QP3YE6PKv61eVEyGVE0IBPNO2dHVo3b7da0DFcs,22376
20
- oafuncs-0.0.61.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109
21
- oafuncs-0.0.61.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
22
- oafuncs-0.0.61.dist-info/RECORD,,
19
+ oafuncs-0.0.63.dist-info/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
20
+ oafuncs-0.0.63.dist-info/METADATA,sha256=c0kTonzu73DG0FabuV9DU_7sRnKBws4NPDq4IQhUPAY,22378
21
+ oafuncs-0.0.63.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109
22
+ oafuncs-0.0.63.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
23
+ oafuncs-0.0.63.dist-info/RECORD,,