oafuncs 0.0.88__py2.py3-none-any.whl → 0.0.90__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oafuncs/data_store/OAFuncs.png +0 -0
- oafuncs/oa_data.py +1 -0
- oafuncs/oa_down/hycom_3hourly.py +213 -345
- oafuncs/oa_down/literature.py +57 -126
- oafuncs/oa_down/user_agent.py +0 -3
- oafuncs/oa_draw.py +1 -0
- oafuncs/oa_file.py +23 -7
- oafuncs/oa_help.py +3 -2
- oafuncs/oa_nc.py +1 -0
- oafuncs/oa_python.py +1 -0
- {oafuncs-0.0.88.dist-info → oafuncs-0.0.90.dist-info}/METADATA +9 -10
- oafuncs-0.0.90.dist-info/RECORD +26 -0
- oafuncs-0.0.88.dist-info/RECORD +0 -26
- {oafuncs-0.0.88.dist-info → oafuncs-0.0.90.dist-info}/LICENSE.txt +0 -0
- {oafuncs-0.0.88.dist-info → oafuncs-0.0.90.dist-info}/WHEEL +0 -0
- {oafuncs-0.0.88.dist-info → oafuncs-0.0.90.dist-info}/top_level.txt +0 -0
oafuncs/oa_down/literature.py
CHANGED
@@ -1,21 +1,19 @@
|
|
1
1
|
#!/usr/bin/env python
|
2
2
|
# coding=utf-8
|
3
|
-
|
3
|
+
"""
|
4
4
|
Author: Liu Kun && 16031215@qq.com
|
5
5
|
Date: 2024-11-28 10:42:56
|
6
6
|
LastEditors: Liu Kun && 16031215@qq.com
|
7
|
-
LastEditTime:
|
7
|
+
LastEditTime: 2025-01-05 10:51:42
|
8
8
|
FilePath: \\Python\\My_Funcs\\OAFuncs\\oafuncs\\oa_down\\literature.py
|
9
|
-
Description:
|
9
|
+
Description:
|
10
10
|
EditPlatform: vscode
|
11
11
|
ComputerInfo: XPS 15 9510
|
12
12
|
SystemInfo: Windows 11
|
13
13
|
Python Version: 3.12
|
14
|
-
|
15
|
-
|
14
|
+
"""
|
16
15
|
|
17
16
|
import os
|
18
|
-
import random
|
19
17
|
import re
|
20
18
|
import time
|
21
19
|
from pathlib import Path
|
@@ -24,11 +22,12 @@ import pandas as pd
|
|
24
22
|
import requests
|
25
23
|
from rich import print
|
26
24
|
from rich.progress import track
|
25
|
+
from oafuncs.oa_down.user_agent import get_ua
|
27
26
|
|
28
|
-
__all__ = [
|
27
|
+
__all__ = ["download5doi"]
|
29
28
|
|
30
29
|
|
31
|
-
def _get_file_size(file_path, unit=
|
30
|
+
def _get_file_size(file_path, unit="KB"):
|
32
31
|
# 检查文件是否存在
|
33
32
|
if not os.path.exists(file_path):
|
34
33
|
return "文件不存在"
|
@@ -38,11 +37,11 @@ def _get_file_size(file_path, unit='KB'):
|
|
38
37
|
|
39
38
|
# 单位转换字典
|
40
39
|
unit_dict = {
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
40
|
+
"PB": 1024**5,
|
41
|
+
"TB": 1024**4,
|
42
|
+
"GB": 1024**3,
|
43
|
+
"MB": 1024**2,
|
44
|
+
"KB": 1024,
|
46
45
|
}
|
47
46
|
|
48
47
|
# 检查传入的单位是否合法
|
@@ -56,28 +55,29 @@ def _get_file_size(file_path, unit='KB'):
|
|
56
55
|
|
57
56
|
|
58
57
|
class _Downloader:
|
59
|
-
|
58
|
+
"""
|
60
59
|
根据doi下载文献pdf
|
61
|
-
|
60
|
+
"""
|
62
61
|
|
63
62
|
def __init__(self, doi, store_path):
|
64
|
-
self.url_list = [
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
63
|
+
self.url_list = [
|
64
|
+
r"https://sci-hub.se",
|
65
|
+
r"https://sci-hub.ren",
|
66
|
+
r"https://sci-hub.st",
|
67
|
+
r"https://sci-hub.ru",
|
68
|
+
]
|
69
69
|
self.base_url = None
|
70
70
|
self.url = None
|
71
71
|
self.doi = doi
|
72
72
|
self.pdf_url = None
|
73
73
|
self.pdf_path = None
|
74
|
-
self.headers = {
|
74
|
+
self.headers = {"User-Agent": get_ua().encode("utf-8")}
|
75
75
|
# 10.1175/1520-0493(1997)125<0742:IODAOO>2.0.CO;2.pdf
|
76
76
|
# self.fname = doi.replace(r'/', '_') + '.pdf'
|
77
|
-
self.fname = re.sub(r'[/<>:"?*|]',
|
77
|
+
self.fname = re.sub(r'[/<>:"?*|]', "_", doi) + ".pdf"
|
78
78
|
self.store_path = Path(store_path)
|
79
79
|
self.fpath = self.store_path / self.fname
|
80
|
-
self.wrong_record_file = self.store_path /
|
80
|
+
self.wrong_record_file = self.store_path / "wrong_record.txt"
|
81
81
|
self.sleep = 5
|
82
82
|
self.cookies = None
|
83
83
|
self.check_size = 50
|
@@ -85,113 +85,42 @@ class _Downloader:
|
|
85
85
|
self.try_times_each_url_max = 3
|
86
86
|
self.try_times = 0
|
87
87
|
|
88
|
-
def get_ua(self):
|
89
|
-
ua_list = [
|
90
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60",
|
91
|
-
"Opera/8.0 (Windows NT 5.1; U; en)",
|
92
|
-
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50",
|
93
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; en) Opera 9.50",
|
94
|
-
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
|
95
|
-
"Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11",
|
96
|
-
"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
|
97
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0",
|
98
|
-
"Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
|
99
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
|
100
|
-
"Mozilla/5.0 (Windows NT 6.1; rv,2.0.1) Gecko/20100101 Firefox/4.0.1",
|
101
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2",
|
102
|
-
"MAC:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36",
|
103
|
-
"Windows:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
|
104
|
-
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
105
|
-
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
106
|
-
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
107
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36",
|
108
|
-
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11",
|
109
|
-
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
|
110
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
111
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36",
|
112
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
|
113
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
|
114
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
|
115
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
|
116
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
|
117
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E; LBBROWSER)"
|
118
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)",
|
119
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)",
|
120
|
-
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0",
|
121
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SV1; QQDownload 732; .NET4.0C; .NET4.0E; SE 2.X MetaSr 1.0)",
|
122
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
|
123
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36",
|
124
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
125
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 UBrowser/4.0.3214.0 Safari/537.36",
|
126
|
-
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.2.4094.1 Safari/537.36",
|
127
|
-
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
128
|
-
"Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
129
|
-
"Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
|
130
|
-
"Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
|
131
|
-
"Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
132
|
-
"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
133
|
-
"MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
134
|
-
"Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10",
|
135
|
-
"Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
|
136
|
-
"Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+",
|
137
|
-
"Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0",
|
138
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",
|
139
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
|
140
|
-
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
|
141
|
-
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
|
142
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
|
143
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)",
|
144
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; TencentTraveler 4.0)",
|
145
|
-
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)",
|
146
|
-
"Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
147
|
-
"Mozilla/5.0 (SymbianOS/9.4; Series60/5.0 NokiaN97-1/20.0.019; Profile/MIDP-2.1 Configuration/CLDC-1.1) AppleWebKit/525 (KHTML, like Gecko) BrowserNG/7.1.18124",
|
148
|
-
"Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; HTC; Titan)",
|
149
|
-
"UCWEB7.0.2.37/28/999",
|
150
|
-
"NOKIA5700/UCWEB7.0.2.37/28/999",
|
151
|
-
"Openwave/UCWEB7.0.2.37/28/999",
|
152
|
-
"Openwave/UCWEB7.0.2.37/28/999",
|
153
|
-
]
|
154
|
-
ua_index = random.randint(0, len(ua_list)-1)
|
155
|
-
ua = ua_list[ua_index]
|
156
|
-
return ua
|
157
|
-
|
158
88
|
def get_pdf_url(self):
|
159
|
-
print(
|
89
|
+
print("[bold #E6E6FA]-" * 100)
|
160
90
|
print(f"DOI: {self.doi}")
|
161
91
|
print(f"Requesting: {self.url}...")
|
162
92
|
response = requests.get(self.url, headers=self.headers)
|
163
93
|
if response.status_code == 200:
|
164
94
|
self.cookies = response.cookies
|
165
|
-
text = response.text.replace(
|
95
|
+
text = response.text.replace("\\", "")
|
166
96
|
# text = text.replace(' ', '') # It is important to remove the space
|
167
97
|
# print(text)
|
168
|
-
pattern = re.compile(
|
169
|
-
r'onclick = "location.href=\'(.*?\.pdf\?download=true)\'"')
|
98
|
+
pattern = re.compile(r'onclick = "location.href=\'(.*?\.pdf\?download=true)\'"')
|
170
99
|
match = pattern.search(text)
|
171
100
|
if match:
|
172
101
|
got_url = match.group(1)
|
173
|
-
if r
|
174
|
-
if got_url[:2] ==
|
175
|
-
self.pdf_url =
|
102
|
+
if r"http" not in got_url:
|
103
|
+
if got_url[:2] == "//":
|
104
|
+
self.pdf_url = "https:" + got_url
|
176
105
|
else:
|
177
106
|
self.pdf_url = self.base_url + got_url
|
178
107
|
else:
|
179
108
|
self.pdf_url = got_url
|
180
109
|
print(f"URL: {self.pdf_url}")
|
181
110
|
else:
|
182
|
-
print(f
|
183
|
-
self.try_times = self.try_times_each_url_max+1
|
111
|
+
print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
|
112
|
+
self.try_times = self.try_times_each_url_max + 1
|
184
113
|
else:
|
185
114
|
print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
|
186
|
-
print(f
|
187
|
-
self.try_times = self.try_times_each_url_max+1
|
115
|
+
print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
|
116
|
+
self.try_times = self.try_times_each_url_max + 1
|
188
117
|
|
189
118
|
def url_iterate(self):
|
190
119
|
if self.url_index >= len(self.url_list):
|
191
120
|
return
|
192
121
|
url = self.url_list[self.url_index]
|
193
122
|
self.base_url = url
|
194
|
-
self.url = url +
|
123
|
+
self.url = url + "/" + self.doi
|
195
124
|
self.get_pdf_url()
|
196
125
|
# for url in self.url_list:
|
197
126
|
# self.url = url + self.doi
|
@@ -200,18 +129,18 @@ class _Downloader:
|
|
200
129
|
# break
|
201
130
|
|
202
131
|
def write_wrong_record(self):
|
203
|
-
with open(self.wrong_record_file,
|
204
|
-
f.write(self.doi +
|
132
|
+
with open(self.wrong_record_file, "a") as f:
|
133
|
+
f.write(self.doi + "\n")
|
205
134
|
|
206
135
|
def download_pdf(self):
|
207
136
|
if self.fpath.exists():
|
208
|
-
fsize = _get_file_size(self.fpath, unit=
|
137
|
+
fsize = _get_file_size(self.fpath, unit="KB")
|
209
138
|
if fsize < self.check_size:
|
210
139
|
# delete the wrong file
|
211
140
|
os.remove(self.fpath)
|
212
141
|
print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
|
213
142
|
else:
|
214
|
-
print(
|
143
|
+
print("[bold #E6E6FA]-" * 100)
|
215
144
|
print(f"[bold purple]The PDF file {self.fpath} already exists.")
|
216
145
|
return
|
217
146
|
self.url_index = 0
|
@@ -240,9 +169,9 @@ class _Downloader:
|
|
240
169
|
try:
|
241
170
|
response = requests.get(self.pdf_url, headers=self.headers, cookies=self.cookies)
|
242
171
|
if response.status_code == 200:
|
243
|
-
with open(self.fpath,
|
172
|
+
with open(self.fpath, "wb") as f:
|
244
173
|
f.write(response.content)
|
245
|
-
fsize = _get_file_size(self.fpath, unit=
|
174
|
+
fsize = _get_file_size(self.fpath, unit="KB")
|
246
175
|
if fsize < self.check_size:
|
247
176
|
# delete the wrong file
|
248
177
|
os.remove(self.fpath)
|
@@ -251,9 +180,9 @@ class _Downloader:
|
|
251
180
|
print(f"[bold green]Sucessful to download {self.fpath}")
|
252
181
|
already_downloaded = True
|
253
182
|
else:
|
254
|
-
self.try_times = self.try_times_each_url_max+1
|
183
|
+
self.try_times = self.try_times_each_url_max + 1
|
255
184
|
print(f"Failed to download the PDF file. Status code: {response.status_code}")
|
256
|
-
print(f
|
185
|
+
print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
|
257
186
|
except Exception as e:
|
258
187
|
print(f"Failed to download the PDF file. Error: {e}")
|
259
188
|
time.sleep(self.sleep)
|
@@ -264,31 +193,32 @@ class _Downloader:
|
|
264
193
|
self.write_wrong_record()
|
265
194
|
return
|
266
195
|
if self.try_times == self.try_times_each_url_max:
|
267
|
-
print(f
|
196
|
+
print(f"Tried {self.try_times} times for {self.url_list[self.url_index-1]}.")
|
268
197
|
print("Try another URL...")
|
269
198
|
|
270
199
|
|
271
|
-
def read_excel(file, col_name=r
|
200
|
+
def read_excel(file, col_name=r"DOI"):
|
272
201
|
df = pd.read_excel(file)
|
273
202
|
df_list = df[col_name].tolist()
|
274
203
|
# 去掉nan
|
275
|
-
df_list = [doi for doi in df_list if str(doi) !=
|
204
|
+
df_list = [doi for doi in df_list if str(doi) != "nan"]
|
276
205
|
return df_list
|
277
206
|
|
278
207
|
|
279
208
|
def read_txt(file):
|
280
|
-
with open(file,
|
209
|
+
with open(file, "r") as f:
|
281
210
|
lines = f.readlines()
|
282
211
|
# 去掉换行符以及空行
|
283
212
|
lines = [line.strip() for line in lines if line.strip()]
|
284
213
|
return lines
|
285
214
|
|
286
215
|
|
287
|
-
def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None, col_name=r
|
288
|
-
|
289
|
-
Description:
|
216
|
+
def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None, col_name=r"DOI"):
|
217
|
+
"""
|
218
|
+
Description:
|
219
|
+
Download PDF files by DOI.
|
290
220
|
|
291
|
-
|
221
|
+
Parameters:
|
292
222
|
store_path: str, The path to store the PDF files.
|
293
223
|
doi_list: list or str, The list of DOIs.
|
294
224
|
txt_file: str, The path of the txt file that contains the DOIs.
|
@@ -305,7 +235,7 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
|
|
305
235
|
download5doi(store_path=r'I:\Delete\ref_pdf', txt_file=r'I:\Delete\ref_pdf\wrong_record.txt')
|
306
236
|
download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx')
|
307
237
|
download5doi(store_path=r'I:\Delete\ref_pdf', excel_file=r'I:\Delete\ref_pdf\wrong_record.xlsx', col_name='DOI')
|
308
|
-
|
238
|
+
"""
|
309
239
|
if not store_path:
|
310
240
|
store_path = Path.cwd()
|
311
241
|
else:
|
@@ -321,12 +251,13 @@ def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None,
|
|
321
251
|
if excel_file:
|
322
252
|
doi_list = read_excel(excel_file, col_name)
|
323
253
|
print(f"Downloading {len(doi_list)} PDF files...")
|
324
|
-
for doi in track(doi_list, description=
|
254
|
+
for doi in track(doi_list, description="Downloading..."):
|
325
255
|
download = _Downloader(doi, store_path)
|
326
256
|
download.download_pdf()
|
327
257
|
|
328
258
|
|
329
|
-
if __name__ ==
|
330
|
-
store_path = r
|
259
|
+
if __name__ == "__main__":
|
260
|
+
store_path = r"I:\Delete\ref_pdf"
|
261
|
+
excel_file = r"I:\Delete\Ref_DA_ROMS\savedrecs.xls"
|
331
262
|
# download5doi(store_path, doi_list='10.1007/s00382-022-06260-x')
|
332
|
-
download5doi(store_path, excel_file=
|
263
|
+
download5doi(store_path, excel_file=excel_file)
|
oafuncs/oa_down/user_agent.py
CHANGED
oafuncs/oa_draw.py
CHANGED
@@ -23,6 +23,7 @@ import matplotlib.pyplot as plt
|
|
23
23
|
import numpy as np
|
24
24
|
import xarray as xr
|
25
25
|
from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter
|
26
|
+
from rich import print
|
26
27
|
|
27
28
|
__all__ = ["fig_minus", "gif", "add_cartopy", "add_gridlines", "MidpointNormalize", "add_lonlat_unit", "contour", "contourf", "quiver"]
|
28
29
|
|
oafuncs/oa_file.py
CHANGED
@@ -17,6 +17,7 @@ import glob
|
|
17
17
|
import os
|
18
18
|
import re
|
19
19
|
import shutil
|
20
|
+
from rich import print
|
20
21
|
|
21
22
|
__all__ = ["find_file", "link_file", "copy_file", "rename_file", "make_folder", "clear_folder", "remove_empty_folder", "remove", "file_size"]
|
22
23
|
|
@@ -325,16 +326,31 @@ def file_size(file_path, unit="KB"):
|
|
325
326
|
|
326
327
|
|
327
328
|
# ** 计算文件夹下指定相关文件的平均大小
|
328
|
-
def mean_size(parent_path,fname):
|
329
|
+
def mean_size(parent_path,fname,max_num=None,unit="KB"):
|
330
|
+
"""
|
331
|
+
Description:
|
332
|
+
Calculate the average size of the specified related files in the folder
|
333
|
+
|
334
|
+
Parameters:
|
335
|
+
parent_path: The parent path where the files are located
|
336
|
+
fname: The file name pattern to search for
|
337
|
+
max_num: The maximum number of files to search for
|
338
|
+
unit: The unit of the file size, default is "KB"
|
339
|
+
|
340
|
+
Returns:
|
341
|
+
The average size
|
342
|
+
"""
|
329
343
|
flist = find_file(parent_path, fname)
|
330
344
|
if flist:
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
345
|
+
if max_num:
|
346
|
+
flist = flist[:int(max_num)]
|
347
|
+
size_list = [file_size(f,unit) for f in flist if file_size(f,unit) > 0]
|
348
|
+
if size_list:
|
349
|
+
return sum(size_list) / len(size_list)
|
350
|
+
else:
|
351
|
+
return 0.0
|
335
352
|
else:
|
336
|
-
|
337
|
-
return mean_size, min_size, max_size
|
353
|
+
return 0.0
|
338
354
|
|
339
355
|
|
340
356
|
if __name__ == "__main__":
|
oafuncs/oa_help.py
CHANGED
oafuncs/oa_nc.py
CHANGED
oafuncs/oa_python.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: oafuncs
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.90
|
4
4
|
Summary: Oceanic and Atmospheric Functions
|
5
5
|
Home-page: https://github.com/Industry-Pays/OAFuncs
|
6
6
|
Author: Kun Liu
|
@@ -9,28 +9,27 @@ License: MIT
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
10
10
|
Classifier: Programming Language :: Python
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
12
|
-
Classifier: Programming Language :: Python :: 3.7
|
13
|
-
Classifier: Programming Language :: Python :: 3.8
|
14
12
|
Classifier: Programming Language :: Python :: 3.9
|
15
13
|
Classifier: Programming Language :: Python :: 3.10
|
16
14
|
Classifier: Programming Language :: Python :: 3.11
|
17
15
|
Classifier: Programming Language :: Python :: 3.12
|
18
16
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
19
17
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
20
|
-
Requires-Python: >=3.
|
18
|
+
Requires-Python: >=3.9.0
|
21
19
|
Description-Content-Type: text/markdown
|
22
20
|
License-File: LICENSE.txt
|
23
|
-
Requires-Dist: matplotlib
|
24
21
|
Requires-Dist: numpy
|
25
22
|
Requires-Dist: scipy
|
23
|
+
Requires-Dist: pandas
|
26
24
|
Requires-Dist: xarray
|
27
|
-
Requires-Dist: Cartopy
|
28
|
-
Requires-Dist: netCDF4
|
29
|
-
Requires-Dist: requests
|
30
|
-
Requires-Dist: bs4
|
31
25
|
Requires-Dist: rich
|
32
26
|
Requires-Dist: pathlib
|
33
|
-
Requires-Dist:
|
27
|
+
Requires-Dist: requests
|
28
|
+
Requires-Dist: bs4
|
29
|
+
Requires-Dist: matplotlib
|
30
|
+
Requires-Dist: Cartopy
|
31
|
+
Requires-Dist: netCDF4
|
32
|
+
Requires-Dist: xlrd
|
34
33
|
|
35
34
|
|
36
35
|
# oafuncs
|
@@ -0,0 +1,26 @@
|
|
1
|
+
oafuncs/__init__.py,sha256=glcIlhQ9xSK4WtL58dq7Od2S3JPqsuEyhUQ-VWO8hOc,1426
|
2
|
+
oafuncs/oa_cmap.py,sha256=azVg9QR_IlG9lXCCXXVs1LS1kFci8yjxDmb_VA_TdTQ,7408
|
3
|
+
oafuncs/oa_data.py,sha256=21HC_7GVFAtU9AMYKGSSzY9J6_0Ju-5n8dJKwOOx5HI,15641
|
4
|
+
oafuncs/oa_draw.py,sha256=QypQp4vJIrbAyFddEVxd9K9Q4d85PRYqYQi9xDUmSZw,11150
|
5
|
+
oafuncs/oa_file.py,sha256=9b2uXTOqJqds5IhEqA_702G-qzyCZiguGY5JcT9CZ78,12728
|
6
|
+
oafuncs/oa_help.py,sha256=42xvmv6BSTyrKfQtW0bvedyv6ElhFJLMblq5jhziuB4,4076
|
7
|
+
oafuncs/oa_nc.py,sha256=m_80xWzoyY2niupfpTSvej1D_k4WvTnDYlnlYbIfqGI,17525
|
8
|
+
oafuncs/oa_python.py,sha256=Q-6UGGw_dJff7Ef8i87fsLPoGeHV5jBzfb-7HP4THR0,4018
|
9
|
+
oafuncs/data_store/OAFuncs.png,sha256=HZORbnBSRX0MZSLTGAZAPK24RBUTmihguMeG9YiU_So,3261697
|
10
|
+
oafuncs/oa_down/User_Agent-list.txt,sha256=pazxSip8_lphEBOPHG902zmIBUg8sBKXgmqp_g6j_E4,661062
|
11
|
+
oafuncs/oa_down/__init__.py,sha256=pKPqxD0z09NEXWCemuemfgTct7Kcu3APPJqqB1FPXRM,565
|
12
|
+
oafuncs/oa_down/hycom_3hourly.py,sha256=Bt4MjcshhAyDckfFvdqxjNvzU7JuBVYCwvY8b1OPbPw,59501
|
13
|
+
oafuncs/oa_down/literature.py,sha256=Txv1YGSG-Z7m4o7FGHvXOR40EFxYozMsyM0-gy5CMEg,10086
|
14
|
+
oafuncs/oa_down/test_ua.py,sha256=0IQq3NjqfNr7KkyjS_U-a4mYu-r-E7gzawwo4IfEa6Y,10851
|
15
|
+
oafuncs/oa_down/user_agent.py,sha256=TsPcAxFmMTYAEHRFjurI1bQBJfDhcA70MdHoUPwQmks,785
|
16
|
+
oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,563
|
17
|
+
oafuncs/oa_sign/meteorological.py,sha256=mLbupsZSq427HTfVbZMvIlFzDHwSzQAbK3X19o8anFY,6525
|
18
|
+
oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
|
19
|
+
oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
|
20
|
+
oafuncs/oa_tool/__init__.py,sha256=IKOlqpWlb4cMDCtq2VKR_RTxQHDNqR_vfqqsOsp_lKQ,466
|
21
|
+
oafuncs/oa_tool/email.py,sha256=4lJxV_KUzhxgLYfVwYTqp0qxRugD7fvsZkXDe5WkUKo,3052
|
22
|
+
oafuncs-0.0.90.dist-info/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
|
23
|
+
oafuncs-0.0.90.dist-info/METADATA,sha256=s3X6lHw6yv20rd2528K-5cOk7zcYRGSIGYEg4SeIqqI,3321
|
24
|
+
oafuncs-0.0.90.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109
|
25
|
+
oafuncs-0.0.90.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
|
26
|
+
oafuncs-0.0.90.dist-info/RECORD,,
|
oafuncs-0.0.88.dist-info/RECORD
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
oafuncs/__init__.py,sha256=glcIlhQ9xSK4WtL58dq7Od2S3JPqsuEyhUQ-VWO8hOc,1426
|
2
|
-
oafuncs/oa_cmap.py,sha256=azVg9QR_IlG9lXCCXXVs1LS1kFci8yjxDmb_VA_TdTQ,7408
|
3
|
-
oafuncs/oa_data.py,sha256=2MkWCsESVPjZPnLnbQIwrBJUhcF9x-WXbZhni3I44ss,15617
|
4
|
-
oafuncs/oa_draw.py,sha256=PUk8DdGBUapCXrOVQ-d_DD6EccB_eHAX3r6jwrnQTk8,11126
|
5
|
-
oafuncs/oa_file.py,sha256=-AdRnFEtRTpIMXr5geYe5fEhyiMOFad5ethi_lF-Ogo,12277
|
6
|
-
oafuncs/oa_help.py,sha256=DsX6rFPZ-ZMaexcyCXeGeB3h1zPScY6ijH-uLNVWidA,3988
|
7
|
-
oafuncs/oa_nc.py,sha256=b8pbiPSRDi7Te42UcnR0AlW1sQEVtn5VU9ERiAdX-yY,17501
|
8
|
-
oafuncs/oa_python.py,sha256=XPTP3o7zTFzfJR_YhsKfQksa3bSYwXsne9YxlJplCEA,3994
|
9
|
-
oafuncs/data_store/OAFuncs.png,sha256=w2pR7MUyeeWrT8BVTSy40EIRDIrfpdo1QvWvvjLOgjM,3258809
|
10
|
-
oafuncs/oa_down/User_Agent-list.txt,sha256=pazxSip8_lphEBOPHG902zmIBUg8sBKXgmqp_g6j_E4,661062
|
11
|
-
oafuncs/oa_down/__init__.py,sha256=pKPqxD0z09NEXWCemuemfgTct7Kcu3APPJqqB1FPXRM,565
|
12
|
-
oafuncs/oa_down/hycom_3hourly.py,sha256=wYOZrV5mZVSTfx6a5QLay8Jg2sL2x3sWKWmxwidRzMo,62928
|
13
|
-
oafuncs/oa_down/literature.py,sha256=dT3-7-beEzQ9mTP8LNV9Gf3q5Z1Pqqjc6FOS010HZeQ,17833
|
14
|
-
oafuncs/oa_down/test_ua.py,sha256=0IQq3NjqfNr7KkyjS_U-a4mYu-r-E7gzawwo4IfEa6Y,10851
|
15
|
-
oafuncs/oa_down/user_agent.py,sha256=NSJjB2LAqPKS0hErvN5FfwsQeb58XjlOc68emiAK66c,893
|
16
|
-
oafuncs/oa_sign/__init__.py,sha256=QKqTFrJDFK40C5uvk48GlRRbGFzO40rgkYwu6dYxatM,563
|
17
|
-
oafuncs/oa_sign/meteorological.py,sha256=mLbupsZSq427HTfVbZMvIlFzDHwSzQAbK3X19o8anFY,6525
|
18
|
-
oafuncs/oa_sign/ocean.py,sha256=xrW-rWD7xBWsB5PuCyEwQ1Q_RDKq2KCLz-LOONHgldU,5932
|
19
|
-
oafuncs/oa_sign/scientific.py,sha256=a4JxOBgm9vzNZKpJ_GQIQf7cokkraV5nh23HGbmTYKw,5064
|
20
|
-
oafuncs/oa_tool/__init__.py,sha256=IKOlqpWlb4cMDCtq2VKR_RTxQHDNqR_vfqqsOsp_lKQ,466
|
21
|
-
oafuncs/oa_tool/email.py,sha256=4lJxV_KUzhxgLYfVwYTqp0qxRugD7fvsZkXDe5WkUKo,3052
|
22
|
-
oafuncs-0.0.88.dist-info/LICENSE.txt,sha256=rMtLpVg8sKiSlwClfR9w_Dd_5WubTQgoOzE2PDFxzs4,1074
|
23
|
-
oafuncs-0.0.88.dist-info/METADATA,sha256=FdhoLgCze7Aq-k7QCedDga-gUjZSGKGD2og_CZJ8NCs,3402
|
24
|
-
oafuncs-0.0.88.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109
|
25
|
-
oafuncs-0.0.88.dist-info/top_level.txt,sha256=bgC35QkXbN4EmPHEveg_xGIZ5i9NNPYWqtJqaKqTPsQ,8
|
26
|
-
oafuncs-0.0.88.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|