oafuncs 0.0.98.45__tar.gz → 0.0.98.46__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {oafuncs-0.0.98.45/oafuncs.egg-info → oafuncs-0.0.98.46}/PKG-INFO +1 -1
  2. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_cmap.py +3 -0
  3. oafuncs-0.0.98.46/oafuncs/oa_down/literature.py +497 -0
  4. oafuncs-0.0.98.46/oafuncs/oa_linux.py +108 -0
  5. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46/oafuncs.egg-info}/PKG-INFO +1 -1
  6. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/setup.py +1 -1
  7. oafuncs-0.0.98.45/oafuncs/oa_down/literature.py +0 -273
  8. oafuncs-0.0.98.45/oafuncs/oa_linux.py +0 -53
  9. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/LICENSE.txt +0 -0
  10. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/MANIFEST.in +0 -0
  11. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/README.md +0 -0
  12. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/__init__.py +0 -0
  13. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_data/hycom.png +0 -0
  14. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_data/oafuncs.png +0 -0
  15. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_script/cprogressbar.py +0 -0
  16. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_script/data_interp.py +0 -0
  17. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_script/email.py +0 -0
  18. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_script/netcdf_merge.py +0 -0
  19. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_script/netcdf_modify.py +0 -0
  20. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_script/netcdf_write.py +0 -0
  21. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_script/parallel.py +0 -0
  22. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_script/parallel_bak.py +0 -0
  23. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_script/plot_dataset.py +0 -0
  24. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/_script/replace_file_content.py +0 -0
  25. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_data.py +0 -0
  26. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_date.py +0 -0
  27. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_down/User_Agent-list.txt +0 -0
  28. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_down/__init__.py +0 -0
  29. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_down/hycom_3hourly.py +0 -0
  30. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_down/idm.py +0 -0
  31. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_down/read_proxy.py +0 -0
  32. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_down/test_ua.py +0 -0
  33. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_down/user_agent.py +0 -0
  34. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_draw.py +0 -0
  35. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_file.py +0 -0
  36. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_geo.py +0 -0
  37. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_help.py +0 -0
  38. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_model/__init__.py +0 -0
  39. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_model/roms/__init__.py +0 -0
  40. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_model/roms/test.py +0 -0
  41. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_model/wrf/__init__.py +0 -0
  42. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_model/wrf/little_r.py +0 -0
  43. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_nc.py +0 -0
  44. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_python.py +0 -0
  45. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_sign/__init__.py +0 -0
  46. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_sign/meteorological.py +0 -0
  47. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_sign/ocean.py +0 -0
  48. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_sign/scientific.py +0 -0
  49. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs/oa_tool.py +0 -0
  50. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs.egg-info/SOURCES.txt +0 -0
  51. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs.egg-info/dependency_links.txt +0 -0
  52. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs.egg-info/requires.txt +0 -0
  53. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/oafuncs.egg-info/top_level.txt +0 -0
  54. {oafuncs-0.0.98.45 → oafuncs-0.0.98.46}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: oafuncs
3
- Version: 0.0.98.45
3
+ Version: 0.0.98.46
4
4
  Summary: Oceanic and Atmospheric Functions
5
5
  Home-page: https://github.com/Industry-Pays/OAFuncs
6
6
  Author: Kun Liu
@@ -271,6 +271,9 @@ def get(colormap_name: Optional[str] = None, show_available: bool = False) -> Op
271
271
  "diverging_4": ["#5DADE2", "#A2D9F7", "#D6EAF8", "#F2F3F4", "#FADBD8", "#F1948A", "#E74C3C"],
272
272
  # ----------------------------------------------------------------------------
273
273
  "colorful_1": ["#6d00db", "#9800cb", "#F2003C", "#ff4500", "#ff7f00", "#FE28A2", "#FFC0CB", "#DDA0DD", "#40E0D0", "#1a66f2", "#00f7fb", "#8fff88", "#E3FF00"],
274
+ # ----------------------------------------------------------------------------
275
+ "increasing_1": ["#FFFFFF", "#E6F7FF", "#A5E6F8", "#049CD4", "#11B5A3", "#04BC4C", "#74CC54", "#D9DD5C", "#FB922E", "#FC2224", "#E51C18", "#8B0000"],
276
+ # ----------------------------------------------------------------------------
274
277
  }
275
278
 
276
279
  if show_available:
@@ -0,0 +1,497 @@
1
+ import os
2
+ import re
3
+ import time
4
+ from pathlib import Path
5
+ from urllib.parse import urljoin
6
+
7
+ import pandas as pd
8
+ import requests
9
+ from rich import print
10
+ from rich.progress import track
11
+ from oafuncs.oa_down.user_agent import get_ua
12
+ from oafuncs.oa_file import remove
13
+ from oafuncs.oa_data import ensure_list
14
+
15
+ __all__ = ["download5doi", "download5doi_via_unpaywall"]
16
+
17
+
18
+ def _get_file_size(file_path, unit="KB"):
19
+ # 检查文件是否存在
20
+ if not os.path.exists(file_path):
21
+ return "文件不存在"
22
+
23
+ # 获取文件大小(字节)
24
+ file_size = os.path.getsize(file_path)
25
+
26
+ # 单位转换字典
27
+ unit_dict = {
28
+ "PB": 1024**5,
29
+ "TB": 1024**4,
30
+ "GB": 1024**3,
31
+ "MB": 1024**2,
32
+ "KB": 1024,
33
+ }
34
+
35
+ # 检查传入的单位是否合法
36
+ if unit not in unit_dict:
37
+ return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
38
+
39
+ # 转换文件大小到指定单位
40
+ converted_size = file_size / unit_dict[unit]
41
+
42
+ return converted_size
43
+
44
+
45
+ class _Downloader:
46
+ """
47
+ 根据doi下载文献pdf
48
+ """
49
+
50
+ # 进程级缓存:首次探测后的可用镜像列表,后续复用
51
+ _alive_mirrors_cache: list[str] | None = None
52
+
53
+ def __init__(self, doi, store_path, *, min_size_kb=50, timeout_html=15, timeout_pdf=30, sleep_secs=5, tries_each_url=3, debug=False):
54
+ self.url_list = [
55
+ r"https://sci-hub.se",
56
+ r"https://sci-hub.ren",
57
+ r"https://sci-hub.st",
58
+ r"https://sci-hub.ru", # 最好用的一个网站
59
+ # ------------------------------------- 以下网站没验证
60
+ r"https://sci-hub.in",
61
+ r"https://sci-hub.hlgczx.com/",
62
+ ]
63
+ self.base_url = None
64
+ self.url = None
65
+ self.doi = doi
66
+ self.pdf_url = None
67
+ self.pdf_path = None
68
+ # requests 期望 header 值为 str,这里确保 UA 是字符串而不是 bytes
69
+ self.headers = {"User-Agent": str(get_ua())}
70
+ # 10.1175/1520-0493(1997)125<0742:IODAOO>2.0.CO;2.pdf
71
+ # self.fname = doi.replace(r'/', '_') + '.pdf'
72
+ self.fname = re.sub(r'[/<>:"?*|]', "_", doi) + ".pdf"
73
+ self.store_path = Path(store_path)
74
+ self.fpath = self.store_path / self.fname
75
+ self.wrong_record_file = self.store_path / "wrong_record.txt"
76
+ self.sleep = sleep_secs
77
+ self.cookies = None
78
+ self.check_size = max(1, int(min_size_kb))
79
+ self.url_index = 0
80
+ self.try_times_each_url_max = max(1, int(tries_each_url))
81
+ self.try_times = 0
82
+ self.timeout_html = max(5, int(timeout_html))
83
+ self.timeout_pdf = max(5, int(timeout_pdf))
84
+ self.debug = bool(debug)
85
+
86
+ # ---------------- 镜像可用性探测 ----------------
87
+ def _is_mirror_alive(self, base_url: str) -> bool:
88
+ """
89
+ 仅检测镜像根路径是否可连通(HTTP 200 即认为可用)。
90
+ 不访问具体 DOI,避免被动触发风控;只做连通性筛查。
91
+ """
92
+ try:
93
+ r = requests.get(base_url + "/", headers=self.headers, timeout=8, allow_redirects=True)
94
+ return 200 <= r.status_code < 400
95
+ except Exception:
96
+ return False
97
+
98
+ def _ensure_alive_mirrors(self):
99
+ # 若已经有进程级缓存,直接复用
100
+ if _Downloader._alive_mirrors_cache is not None:
101
+ self.url_list = list(_Downloader._alive_mirrors_cache)
102
+ return
103
+
104
+ print(f"[bold cyan]Probing mirrors connectivity (first run)...")
105
+ alive = []
106
+ for base in self.url_list:
107
+ ok = self._is_mirror_alive(base)
108
+ status = "OK" if ok else "DOWN"
109
+ print(f" [{status}] {base}")
110
+ if ok:
111
+ alive.append(base)
112
+ if alive:
113
+ _Downloader._alive_mirrors_cache = alive
114
+ self.url_list = alive
115
+ print(f"[bold cyan]Alive mirrors: {len(alive)}; pruned {len(set(self.url_list)) - len(alive) if self.url_list else 0}.")
116
+ else:
117
+ print("[bold yellow]No mirror passed probe; keep original list for fallback attempts.")
118
+
119
+ def _extract_pdf_url_from_html(self, html: str) -> str | None:
120
+ """
121
+ 从 Sci-Hub 页面 HTML 中尽可能稳健地提取 PDF 链接。
122
+
123
+ 兼容多种模式:
124
+ - onclick="location.href='...pdf?download=true'"
125
+ - <iframe id="pdf" src="...pdf?...">
126
+ - <a ... href="...pdf?...">
127
+ - 其他出现 .pdf 的 src/href 场景
128
+
129
+ 返回绝对 URL;若找不到返回 None。
130
+ """
131
+ text = html
132
+
133
+ # 先尝试常见 onclick 跳转
134
+ patterns = [
135
+ # onclick="location.href='...pdf?...'" 或 document.location
136
+ r"onclick\s*=\s*[\"']\s*(?:document\.)?location\.href\s*=\s*[\"']([^\"']+?\.pdf(?:[?#][^\"']*)?)[\"']",
137
+ # iframe id="pdf" src="...pdf?..."
138
+ r"<iframe[^>]+id\s*=\s*[\"']pdf[\"'][^>]+src\s*=\s*[\"']([^\"']+?\.pdf(?:[?#][^\"']*)?)[\"']",
139
+ # 通用 a 标签 href
140
+ r"<a[^>]+href\s*=\s*[\"']([^\"']+?\.pdf(?:[?#][^\"']*)?)[\"']",
141
+ # 通用任意 src/href
142
+ r"(?:src|href)\s*=\s*[\"']([^\"']+?\.pdf(?:[?#][^\"']*)?)[\"']",
143
+ ]
144
+
145
+ for pat in patterns:
146
+ m = re.search(pat, text, flags=re.IGNORECASE | re.DOTALL)
147
+ if m:
148
+ got_url = m.group(1)
149
+ # 规范化为绝对 URL
150
+ if got_url.startswith("//"):
151
+ return "https:" + got_url
152
+ if got_url.startswith("http://") or got_url.startswith("https://"):
153
+ return got_url
154
+ # 其余按相对路径处理
155
+ return urljoin(self.base_url + "/", got_url.lstrip("/"))
156
+
157
+ return None
158
+
159
+ def get_pdf_url(self):
160
+ print("[bold #E6E6FA]-" * 120)
161
+ print(f"DOI: {self.doi}")
162
+ print(f"Requesting: {self.url}...")
163
+ try:
164
+ # 使用较小的超时时间避免长时间阻塞
165
+ response = requests.get(self.url, headers=self.headers, timeout=self.timeout_html)
166
+ if response.status_code == 200:
167
+ self.cookies = response.cookies
168
+ text = response.text
169
+ # 去除转义反斜杠,提升正则匹配成功率
170
+ text = text.replace("\\", "")
171
+
172
+ self.pdf_url = self._extract_pdf_url_from_html(text)
173
+ if self.pdf_url:
174
+ if self.debug:
175
+ print(f"Found PDF link: {self.pdf_url}")
176
+ else:
177
+ print(f"Found PDF link (masked): .../{Path(self.pdf_url).name}")
178
+ else:
179
+ print(
180
+ f"[bold #AFEEEE]The website {self.url_list[self.url_index]} does not expose a detectable PDF link (pattern mismatch)."
181
+ )
182
+ self.try_times = self.try_times_each_url_max + 1
183
+ else:
184
+ print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
185
+ print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not include the PDF file (HTTP error).")
186
+ self.try_times = self.try_times_each_url_max + 1
187
+ except Exception as e:
188
+ print(f"Failed to retrieve the webpage. Error: {e}")
189
+ self.try_times = self.try_times_each_url_max + 1
190
+
191
+ def url_iterate(self):
192
+ if self.url_index >= len(self.url_list):
193
+ return
194
+ url = self.url_list[self.url_index]
195
+ self.base_url = url
196
+ self.url = url + "/" + self.doi
197
+ self.get_pdf_url()
198
+ # for url in self.url_list:
199
+ # self.url = url + self.doi
200
+ # self.get_pdf_url()
201
+ # if self.pdf_url:
202
+ # break
203
+
204
+ def write_wrong_record(self):
205
+ # 先读取txt中的内容,如果已经存在则不再写入
206
+ if self.wrong_record_file.exists():
207
+ with open(self.wrong_record_file, "r") as f:
208
+ lines = f.readlines()
209
+ if self.doi in lines:
210
+ return
211
+ with open(self.wrong_record_file, "a") as f:
212
+ f.write(self.doi + "\n")
213
+
214
+ def download_pdf(self):
215
+ if self.fpath.exists():
216
+ fsize = _get_file_size(self.fpath, unit="KB")
217
+ if fsize < self.check_size:
218
+ # delete the wrong file
219
+ os.remove(self.fpath)
220
+ print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
221
+ else:
222
+ print("[bold #E6E6FA]-" * 120)
223
+ print(f"[bold purple]The PDF file {self.fpath} already exists.")
224
+ return
225
+ self.url_index = 0
226
+ already_downloaded = False
227
+ self.try_times = 0
228
+ while not already_downloaded:
229
+ self.url_iterate()
230
+ if not self.pdf_url:
231
+ self.url_index += 1
232
+ if self.url_index >= len(self.url_list):
233
+ print("Failed to download the PDF file.")
234
+ self.write_wrong_record()
235
+ return
236
+ else:
237
+ self.try_times = 0
238
+ continue
239
+ else:
240
+ self.try_times += 1
241
+ if self.try_times > self.try_times_each_url_max:
242
+ self.url_index += 1
243
+ if self.url_index >= len(self.url_list):
244
+ # print("Failed to download the PDF file.")
245
+ self.write_wrong_record()
246
+ return
247
+ print(f"Downloading: {self.fname}...")
248
+ try:
249
+ response = requests.get(self.pdf_url, headers=self.headers, cookies=self.cookies, timeout=self.timeout_pdf)
250
+ if response.status_code == 200:
251
+ with open(self.fpath, "wb") as f:
252
+ f.write(response.content)
253
+ fsize = _get_file_size(self.fpath, unit="KB")
254
+ if fsize < self.check_size:
255
+ # delete the wrong file
256
+ os.remove(self.fpath)
257
+ print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
258
+ else:
259
+ print(f"[bold green]Sucessful to download {self.fpath}")
260
+ already_downloaded = True
261
+ else:
262
+ self.try_times = self.try_times_each_url_max + 1
263
+ print(f"Failed to download the PDF file. Status code: {response.status_code}")
264
+ print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
265
+ except Exception as e:
266
+ print(f"Failed to download the PDF file. Error: {e}")
267
+ time.sleep(self.sleep)
268
+ if self.try_times >= self.try_times_each_url_max:
269
+ self.url_index += 1
270
+ if self.url_index >= len(self.url_list):
271
+ print("\n[bold #CD5C5C]Failed to download the PDF file.")
272
+ self.write_wrong_record()
273
+ return
274
+ if self.try_times == self.try_times_each_url_max:
275
+ print(f"Tried {self.try_times} times for {self.url_list[self.url_index-1]}.")
276
+ print("Try another URL...")
277
+
278
+
279
+ def _read_excel(file, col_name=r"DOI"):
280
+ df = pd.read_excel(file)
281
+ df_list = df[col_name].tolist()
282
+ # 去掉nan
283
+ df_list = [doi for doi in df_list if str(doi) != "nan"]
284
+ return df_list
285
+
286
+
287
+ def _read_txt(file):
288
+ with open(file, "r") as f:
289
+ lines = f.readlines()
290
+ # 去掉换行符以及空行
291
+ lines = [line.strip() for line in lines if line.strip()]
292
+ return lines
293
+
294
+
295
+ def download5doi(
296
+ store_path=None,
297
+ doi_list=None,
298
+ txt_file=None,
299
+ excel_file=None,
300
+ col_name=r"DOI",
301
+ *,
302
+ probe_mirrors: bool = True,
303
+ min_size_kb: int = 50,
304
+ timeout_html: int = 15,
305
+ timeout_pdf: int = 30,
306
+ tries_each_url: int = 3,
307
+ sleep_secs: int = 5,
308
+ force: bool = False,
309
+ debug: bool = False,
310
+ ):
311
+ """
312
+ Description:
313
+ Download PDF files by DOI.
314
+
315
+ Parameters:
316
+ store_path: str, The path to store the PDF files.
317
+ doi_list: list or str, The list of DOIs.
318
+ txt_file: str, The path of the txt file that contains the DOIs.
319
+ excel_file: str, The path of the excel file that contains the DOIs.
320
+ col_name: str, The column name of the DOIs in the excel file. Default is 'DOI'.
321
+
322
+ Returns:
323
+ None
324
+
325
+ Example:
326
+ download5doi(doi_list='10.3389/feart.2021.698876')
327
+ download5doi(store_path='I:\\Delete\\ref_pdf', doi_list='10.3389/feart.2021.698876')
328
+ download5doi(store_path='I:\\Delete\\ref_pdf', doi_list=['10.3389/feart.2021.698876', '10.3389/feart.2021.698876'])
329
+ download5doi(store_path='I:\\Delete\\ref_pdf', txt_file='I:\\Delete\\ref_pdf\\wrong_record.txt')
330
+ download5doi(store_path='I:\\Delete\\ref_pdf', excel_file='I:\\Delete\\ref_pdf\\wrong_record.xlsx')
331
+ download5doi(store_path='I:\\Delete\\ref_pdf', excel_file='I:\\Delete\\ref_pdf\\wrong_record.xlsx', col_name='DOI')
332
+ """
333
+ if not store_path:
334
+ store_path = Path.cwd()
335
+ else:
336
+ store_path = Path(str(store_path))
337
+ store_path.mkdir(parents=True, exist_ok=True)
338
+ store_path = str(store_path)
339
+
340
+ if doi_list:
341
+ doi_list = ensure_list(doi_list)
342
+ if txt_file:
343
+ doi_list = _read_txt(txt_file)
344
+ if excel_file:
345
+ doi_list = _read_excel(excel_file, col_name)
346
+ # 去重并清洗
347
+ doi_list = [str(x).strip() for x in doi_list if str(x).strip()]
348
+ doi_list = list(dict.fromkeys(doi_list)) # 保序去重
349
+
350
+ # 只有在不是追加下载的场景下再清除 wrong_record
351
+ if not force:
352
+ remove(Path(store_path) / "wrong_record.txt")
353
+ print(f"Downloading {len(doi_list)} PDF files...")
354
+ for doi in track(doi_list, description="Downloading..."):
355
+ dl = _Downloader(
356
+ doi,
357
+ store_path,
358
+ min_size_kb=min_size_kb,
359
+ timeout_html=timeout_html,
360
+ timeout_pdf=timeout_pdf,
361
+ sleep_secs=sleep_secs,
362
+ tries_each_url=tries_each_url,
363
+ debug=debug,
364
+ )
365
+ # 是否进行镜像探测
366
+ if probe_mirrors:
367
+ dl._ensure_alive_mirrors()
368
+ dl.download_pdf()
369
+
370
+
371
+ # ------------------------------- 合规替代方案(Open Access 优先) -------------------------------
372
+ def _get_oa_pdf_url_from_unpaywall(doi: str, email: str | None) -> str | None:
373
+ """
374
+ 通过 Unpaywall 获取可开放访问的 PDF 链接(若存在)。
375
+ 需要提供 email(Unpaywall 要求标识邮件)。
376
+ 返回 PDF URL 或 None。
377
+ """
378
+ if not email:
379
+ print("[bold yellow]Unpaywall 需要 email 参数;请提供 email 以查询 OA 链接。")
380
+ return None
381
+ api = f"https://api.unpaywall.org/v2/{doi}?email={email}"
382
+ try:
383
+ r = requests.get(api, timeout=15)
384
+ if r.status_code != 200:
385
+ print(f"[bold yellow]Unpaywall 查询失败: HTTP {r.status_code}")
386
+ return None
387
+ data = r.json()
388
+ loc = data.get("best_oa_location") or {}
389
+ url_for_pdf = loc.get("url_for_pdf") or loc.get("url")
390
+ if url_for_pdf and url_for_pdf.lower().endswith(".pdf"):
391
+ return url_for_pdf
392
+ # 有些 OA 链接是落在 landing page,再尝试从记录的所有位置挑选 pdf
393
+ for k in ("oa_locations", "oa_location"):
394
+ entries = data.get(k) or []
395
+ if isinstance(entries, dict):
396
+ entries = [entries]
397
+ for e in entries:
398
+ u = e.get("url_for_pdf") or e.get("url")
399
+ if u and ".pdf" in u.lower():
400
+ return u
401
+ except Exception as e:
402
+ print(f"[bold yellow]Unpaywall 查询异常: {e}")
403
+ return None
404
+
405
+
406
+ def _download_pdf_from_url(url: str, dest_path: Path, headers: dict | None = None) -> bool:
407
+ """
408
+ 给定合法的 PDF 下载 URL,下载保存到 dest_path。
409
+ 返回 True/False 表示是否成功。
410
+ """
411
+ headers = headers or {"User-Agent": str(get_ua()), "Accept": "application/pdf"}
412
+ try:
413
+ with requests.get(url, headers=headers, stream=True, timeout=30) as r:
414
+ if r.status_code != 200 or "application/pdf" not in r.headers.get("Content-Type", "").lower():
415
+ # 仍可能是 PDF(某些服务器未正确设置头),尝试保存但标注提示
416
+ if r.status_code != 200:
417
+ print(f"[bold yellow]下载失败: HTTP {r.status_code}")
418
+ return False
419
+ dest_path.parent.mkdir(parents=True, exist_ok=True)
420
+ with open(dest_path, "wb") as f:
421
+ for chunk in r.iter_content(chunk_size=8192):
422
+ if chunk:
423
+ f.write(chunk)
424
+ return True
425
+ except Exception as e:
426
+ print(f"[bold yellow]下载异常: {e}")
427
+ return False
428
+
429
+
430
+ def download5doi_via_unpaywall(
431
+ store_path=None,
432
+ doi_list=None,
433
+ txt_file=None,
434
+ excel_file=None,
435
+ col_name=r"DOI",
436
+ email: str | None = None,
437
+ ):
438
+ """
439
+ 优先使用 Unpaywall 获取开放访问(OA)的 PDF 并下载,避免非合规站点。
440
+
441
+ 参数:
442
+ store_path: 保存目录
443
+ doi_list/txt_file/excel_file/col_name: 同 download5doi
444
+ email: 用于访问 Unpaywall API 的邮箱(必填,否则无法查询)
445
+ """
446
+ if not store_path:
447
+ store_path = Path.cwd()
448
+ else:
449
+ store_path = Path(str(store_path))
450
+ store_path.mkdir(parents=True, exist_ok=True)
451
+
452
+ if doi_list:
453
+ doi_list = ensure_list(doi_list)
454
+ if txt_file:
455
+ doi_list = _read_txt(txt_file)
456
+ if excel_file:
457
+ doi_list = _read_excel(excel_file, col_name)
458
+
459
+ if not doi_list:
460
+ print("[bold yellow]未提供 DOI 列表。")
461
+ return
462
+
463
+ print(f"[bold cyan]通过 Unpaywall 尝试下载 {len(doi_list)} 篇 OA PDF...")
464
+ ok, miss = 0, 0
465
+ for doi in track(doi_list, description="OA downloading..."):
466
+ # 规范化文件名
467
+ fname = re.sub(r'[/<>:"?*|]', "_", str(doi)) + ".pdf"
468
+ dest = store_path / fname
469
+ if dest.exists() and _get_file_size(dest, unit="KB") > 10:
470
+ ok += 1
471
+ continue
472
+
473
+ pdf_url = _get_oa_pdf_url_from_unpaywall(str(doi), email=email)
474
+ if not pdf_url:
475
+ miss += 1
476
+ print(f"[bold yellow]未找到 OA PDF: {doi}")
477
+ continue
478
+
479
+ if _download_pdf_from_url(pdf_url, dest):
480
+ size_kb = _get_file_size(dest, unit="KB")
481
+ if isinstance(size_kb, (int, float)) and size_kb < 10:
482
+ dest.unlink(missing_ok=True)
483
+ miss += 1
484
+ print(f"[bold yellow]文件过小,疑似异常,已删除: {dest}")
485
+ else:
486
+ ok += 1
487
+ print(f"[bold green]已下载: {dest}")
488
+ else:
489
+ miss += 1
490
+
491
+ print(f"[bold]完成。成功 {ok} 篇,未获取 {miss} 篇(可能无 OA 版本或需机构访问)。")
492
+
493
+
494
+ if __name__ == "__main__":
495
+ store_path = r"F:\AAA-Delete\DOI_Reference\5\pdf"
496
+ excel_file = r"F:\AAA-Delete\DOI_Reference\5\savedrecs.xls"
497
+ download5doi(store_path, excel_file=excel_file)
@@ -0,0 +1,108 @@
1
+ from rich import print
2
+ import time
3
+ import os
4
+
5
+ __all__ = ["os_command", "get_queue_node", "query_queue", "running_jobs", "submit_job"]
6
+
7
+
8
+ # 负责执行命令并返回输出
9
+ def os_command(cmd):
10
+ import subprocess
11
+ print(f'🔍 执行命令: {cmd}')
12
+ result = subprocess.run(
13
+ cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
14
+ )
15
+ # 打印错误信息(若有,方便排查问题)
16
+ if result.stderr:
17
+ print(f'❌ 错误输出: {result.stderr.strip()}')
18
+ # 检查命令是否执行成功(非0为失败)
19
+ if result.returncode != 0:
20
+ print(f'❌ 命令执行失败,退出码: {result.returncode}')
21
+ return None
22
+ return result.stdout
23
+
24
+ # 返回“队列名:节点数”的字典
25
+ def get_queue_node():
26
+ import re
27
+ # 执行 sinfo | grep "idle" 获取空闲队列数据
28
+ cmd = 'sinfo | grep "idle"'
29
+ output = os_command(cmd)
30
+ if not output: # 命令执行失败或无输出,返回空字典
31
+ return {}
32
+
33
+ # 初始化结果字典:键=队列名,值=节点数
34
+ queue_node_dict = {}
35
+ # 按行解析命令输出
36
+ for line in output.strip().split('\n'):
37
+ line = line.strip()
38
+ if not line: # 跳过空行
39
+ continue
40
+
41
+ # 正则匹配:仅捕获“队列名”(第1组)和“节点数”(第2组)
42
+ # 末尾用 .* 忽略节点列表,不影响匹配
43
+ pattern = r"^(\S+)\s+\S+\s+\S+\s+(\d+)\s+idle\s+.*$"
44
+ match = re.match(pattern, line)
45
+
46
+ if match:
47
+ queue_name = match.group(1) # 提取队列名作为字典的键
48
+ node_count = int(match.group(2))# 提取节点数作为字典的值(转为整数)
49
+ queue_node_dict[queue_name] = node_count # 存入字典
50
+
51
+ return queue_node_dict
52
+
53
+ def query_queue(need_node=1, queue_list =['dcu','bigmem','cpu_parallel','cpu_single']):
54
+ queue_dict = get_queue_node()
55
+ hs = None
56
+ for my_queue in queue_list:
57
+ if my_queue in queue_dict and queue_dict[my_queue] >= need_node:
58
+ # slurm_file = f'../run.slurm.{my_queue}'
59
+ hs = my_queue
60
+ break
61
+ return hs
62
+
63
+ def running_jobs():
64
+ # 通过qstat判断任务状态,是否还在进行中
65
+ # status = os.popen('qstat').read()
66
+ status = os.popen('squeue').read()
67
+ Jobs = status.split('\n')[1:]
68
+ ids = [job.split()[0] for job in Jobs if job != '']
69
+ return ids
70
+
71
+ def submit_job(working_dir, script_tmp='run.slurm', script_run='run.slurm', need_node=1, queue_tmp='<queue_name>', queue_list=['dcu', 'bigmem', 'cpu_parallel', 'cpu_single'], max_job=38):
72
+ from .oa_file import replace_content
73
+ import datetime
74
+ os.chdir(working_dir)
75
+ print(f'切换工作目录到: {working_dir}')
76
+ while True:
77
+ running_job = running_jobs()
78
+ if not running_job or len(running_job) < max_job:
79
+ queue = query_queue(need_node=need_node, queue_list=queue_list)
80
+ if queue:
81
+ replace_content(script_tmp, {f'{queue_tmp}': f"{queue}"}, False, f'{working_dir}', script_run)
82
+ print(f'找到计算资源,提交任务,队列:{queue}')
83
+ print(f'Time: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
84
+ content_sub = os_command(f"sbatch {script_run}")
85
+ # 避免在 None 上使用 'in' 导致 TypeError:os_command 在失败时会返回 None
86
+ if not content_sub:
87
+ print('提交任务命令没有返回输出或返回了错误,等待30秒后重试!')
88
+ time.sleep(30)
89
+ else:
90
+ content_sub_lower = content_sub.lower()
91
+ if 'error' in content_sub_lower or 'failed' in content_sub_lower:
92
+ print('提交任务时出现错误(从输出检测到 error/failed),等待30秒后重试!')
93
+ print(f'命令输出: {content_sub.strip()}')
94
+ time.sleep(30)
95
+ else:
96
+ print(f'提交任务成功,{content_sub.strip()}')
97
+ break
98
+ else:
99
+ print('没有足够的计算资源,等待30秒后重试!')
100
+ time.sleep(30)
101
+ else:
102
+ print(f'当前系统任务数:{len(running_job)},等待60秒后重试!')
103
+ time.sleep(60)
104
+ print(f'等待10秒后,继续检查任务状态!')
105
+ time.sleep(10)
106
+
107
+ if __name__ == "__main__":
108
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: oafuncs
3
- Version: 0.0.98.45
3
+ Version: 0.0.98.46
4
4
  Summary: Oceanic and Atmospheric Functions
5
5
  Home-page: https://github.com/Industry-Pays/OAFuncs
6
6
  Author: Kun Liu
@@ -18,7 +18,7 @@ URL = "https://github.com/Industry-Pays/OAFuncs"
18
18
  EMAIL = "liukun0312@stu.ouc.edu.cn"
19
19
  AUTHOR = "Kun Liu"
20
20
  REQUIRES_PYTHON = ">=3.10.0" # 2025/03/13
21
- VERSION = "0.0.98.45"
21
+ VERSION = "0.0.98.46"
22
22
 
23
23
  # What packages are required for this module to be executed?
24
24
  REQUIRED = [
@@ -1,273 +0,0 @@
1
- import os
2
- import re
3
- import time
4
- from pathlib import Path
5
-
6
- import pandas as pd
7
- import requests
8
- from rich import print
9
- from rich.progress import track
10
- from oafuncs.oa_down.user_agent import get_ua
11
- from oafuncs.oa_file import remove
12
- from oafuncs.oa_data import ensure_list
13
-
14
- __all__ = ["download5doi"]
15
-
16
-
17
- def _get_file_size(file_path, unit="KB"):
18
- # 检查文件是否存在
19
- if not os.path.exists(file_path):
20
- return "文件不存在"
21
-
22
- # 获取文件大小(字节)
23
- file_size = os.path.getsize(file_path)
24
-
25
- # 单位转换字典
26
- unit_dict = {
27
- "PB": 1024**5,
28
- "TB": 1024**4,
29
- "GB": 1024**3,
30
- "MB": 1024**2,
31
- "KB": 1024,
32
- }
33
-
34
- # 检查传入的单位是否合法
35
- if unit not in unit_dict:
36
- return "单位不合法,请选择PB、TB、GB、MB、KB中的一个"
37
-
38
- # 转换文件大小到指定单位
39
- converted_size = file_size / unit_dict[unit]
40
-
41
- return converted_size
42
-
43
-
44
- class _Downloader:
45
- """
46
- 根据doi下载文献pdf
47
- """
48
-
49
- def __init__(self, doi, store_path):
50
- self.url_list = [
51
- r"https://sci-hub.se",
52
- r"https://sci-hub.ren",
53
- r"https://sci-hub.st",
54
- r"https://sci-hub.ru", # 最好用的一个网站
55
- # ------------------------------------- 以下网站没验证
56
- r"https://sci-hub.wf",
57
- r"https://sci-hub.yt",
58
- r"https://sci-hub.ee",
59
- r"https://sci-hub.cat",
60
- r"https://sci-hub.in",
61
- r"https://www.pismin.com",
62
- r"https://sci-hub.vkif.top",
63
- r"https://www.bothonce.com",
64
- r"https://sci-hub.et-fine.com",
65
- r"https://sci-hub.hkvisa.net",
66
- # r"https://sci-hub.3800808.com", # 这个只能手动保存
67
- r"https://sci-hub.zidianzhan.net",
68
- r"https://sci-hub.usualwant.com",
69
- ]
70
- self.base_url = None
71
- self.url = None
72
- self.doi = doi
73
- self.pdf_url = None
74
- self.pdf_path = None
75
- self.headers = {"User-Agent": get_ua().encode("utf-8")}
76
- # 10.1175/1520-0493(1997)125<0742:IODAOO>2.0.CO;2.pdf
77
- # self.fname = doi.replace(r'/', '_') + '.pdf'
78
- self.fname = re.sub(r'[/<>:"?*|]', "_", doi) + ".pdf"
79
- self.store_path = Path(store_path)
80
- self.fpath = self.store_path / self.fname
81
- self.wrong_record_file = self.store_path / "wrong_record.txt"
82
- self.sleep = 5
83
- self.cookies = None
84
- self.check_size = 50
85
- self.url_index = 0
86
- self.try_times_each_url_max = 3
87
- self.try_times = 0
88
-
89
- def get_pdf_url(self):
90
- print("[bold #E6E6FA]-" * 120)
91
- print(f"DOI: {self.doi}")
92
- print(f"Requesting: {self.url}...")
93
- try:
94
- response = requests.get(self.url, headers=self.headers)
95
- if response.status_code == 200:
96
- self.cookies = response.cookies
97
- text = response.text.replace("\\", "")
98
- # text = text.replace(' ', '') # It is important to remove the space
99
- # print(text)
100
- pattern = re.compile(r'onclick = "location.href=\'(.*?\.pdf\?download=true)\'"')
101
- match = pattern.search(text)
102
- if match:
103
- got_url = match.group(1)
104
- if r"http" not in got_url:
105
- if got_url[:2] == "//":
106
- self.pdf_url = "https:" + got_url
107
- else:
108
- self.pdf_url = self.base_url + got_url
109
- else:
110
- self.pdf_url = got_url
111
- print(f"URL: {self.pdf_url}")
112
- else:
113
- print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
114
- self.try_times = self.try_times_each_url_max + 1
115
- else:
116
- print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
117
- print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
118
- self.try_times = self.try_times_each_url_max + 1
119
- except Exception as e:
120
- print(f"Failed to retrieve the webpage. Error: {e}")
121
- self.try_times = self.try_times_each_url_max + 1
122
-
123
- def url_iterate(self):
124
- if self.url_index >= len(self.url_list):
125
- return
126
- url = self.url_list[self.url_index]
127
- self.base_url = url
128
- self.url = url + "/" + self.doi
129
- self.get_pdf_url()
130
- # for url in self.url_list:
131
- # self.url = url + self.doi
132
- # self.get_pdf_url()
133
- # if self.pdf_url:
134
- # break
135
-
136
- def write_wrong_record(self):
137
- # 先读取txt中的内容,如果已经存在则不再写入
138
- if self.wrong_record_file.exists():
139
- with open(self.wrong_record_file, "r") as f:
140
- lines = f.readlines()
141
- if self.doi in lines:
142
- return
143
- with open(self.wrong_record_file, "a") as f:
144
- f.write(self.doi + "\n")
145
-
146
- def download_pdf(self):
147
- if self.fpath.exists():
148
- fsize = _get_file_size(self.fpath, unit="KB")
149
- if fsize < self.check_size:
150
- # delete the wrong file
151
- os.remove(self.fpath)
152
- print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
153
- else:
154
- print("[bold #E6E6FA]-" * 120)
155
- print(f"[bold purple]The PDF file {self.fpath} already exists.")
156
- return
157
- self.url_index = 0
158
- already_downloaded = False
159
- self.try_times = 0
160
- while not already_downloaded:
161
- self.url_iterate()
162
- if not self.pdf_url:
163
- self.url_index += 1
164
- if self.url_index >= len(self.url_list):
165
- print("Failed to download the PDF file.")
166
- self.write_wrong_record()
167
- return
168
- else:
169
- self.try_times = 0
170
- continue
171
- else:
172
- self.try_times += 1
173
- if self.try_times > self.try_times_each_url_max:
174
- self.url_index += 1
175
- if self.url_index >= len(self.url_list):
176
- # print("Failed to download the PDF file.")
177
- self.write_wrong_record()
178
- return
179
- print(f"Downloading: {self.fname}...")
180
- try:
181
- response = requests.get(self.pdf_url, headers=self.headers, cookies=self.cookies)
182
- if response.status_code == 200:
183
- with open(self.fpath, "wb") as f:
184
- f.write(response.content)
185
- fsize = _get_file_size(self.fpath, unit="KB")
186
- if fsize < self.check_size:
187
- # delete the wrong file
188
- os.remove(self.fpath)
189
- print(f"[bold yellow]The PDF file {self.fpath} is only {fsize:.2f} KB. It will be deleted and retry.")
190
- else:
191
- print(f"[bold green]Sucessful to download {self.fpath}")
192
- already_downloaded = True
193
- else:
194
- self.try_times = self.try_times_each_url_max + 1
195
- print(f"Failed to download the PDF file. Status code: {response.status_code}")
196
- print(f"[bold #AFEEEE]The website {self.url_list[self.url_index]} do not inlcude the PDF file.")
197
- except Exception as e:
198
- print(f"Failed to download the PDF file. Error: {e}")
199
- time.sleep(self.sleep)
200
- if self.try_times >= self.try_times_each_url_max:
201
- self.url_index += 1
202
- if self.url_index >= len(self.url_list):
203
- print("\n[bold #CD5C5C]Failed to download the PDF file.")
204
- self.write_wrong_record()
205
- return
206
- if self.try_times == self.try_times_each_url_max:
207
- print(f"Tried {self.try_times} times for {self.url_list[self.url_index-1]}.")
208
- print("Try another URL...")
209
-
210
-
211
- def _read_excel(file, col_name=r"DOI"):
212
- df = pd.read_excel(file)
213
- df_list = df[col_name].tolist()
214
- # 去掉nan
215
- df_list = [doi for doi in df_list if str(doi) != "nan"]
216
- return df_list
217
-
218
-
219
- def _read_txt(file):
220
- with open(file, "r") as f:
221
- lines = f.readlines()
222
- # 去掉换行符以及空行
223
- lines = [line.strip() for line in lines if line.strip()]
224
- return lines
225
-
226
-
227
- def download5doi(store_path=None, doi_list=None, txt_file=None, excel_file=None, col_name=r"DOI"):
228
- """
229
- Description:
230
- Download PDF files by DOI.
231
-
232
- Parameters:
233
- store_path: str, The path to store the PDF files.
234
- doi_list: list or str, The list of DOIs.
235
- txt_file: str, The path of the txt file that contains the DOIs.
236
- excel_file: str, The path of the excel file that contains the DOIs.
237
- col_name: str, The column name of the DOIs in the excel file. Default is 'DOI'.
238
-
239
- Returns:
240
- None
241
-
242
- Example:
243
- download5doi(doi_list='10.3389/feart.2021.698876')
244
- download5doi(store_path='I:\\Delete\\ref_pdf', doi_list='10.3389/feart.2021.698876')
245
- download5doi(store_path='I:\\Delete\\ref_pdf', doi_list=['10.3389/feart.2021.698876', '10.3389/feart.2021.698876'])
246
- download5doi(store_path='I:\\Delete\\ref_pdf', txt_file='I:\\Delete\\ref_pdf\\wrong_record.txt')
247
- download5doi(store_path='I:\\Delete\\ref_pdf', excel_file='I:\\Delete\\ref_pdf\\wrong_record.xlsx')
248
- download5doi(store_path='I:\\Delete\\ref_pdf', excel_file='I:\\Delete\\ref_pdf\\wrong_record.xlsx', col_name='DOI')
249
- """
250
- if not store_path:
251
- store_path = Path.cwd()
252
- else:
253
- store_path = Path(str(store_path))
254
- store_path.mkdir(parents=True, exist_ok=True)
255
- store_path = str(store_path)
256
-
257
- if doi_list:
258
- doi_list = ensure_list(doi_list)
259
- if txt_file:
260
- doi_list = _read_txt(txt_file)
261
- if excel_file:
262
- doi_list = _read_excel(excel_file, col_name)
263
- remove(Path(store_path) / "wrong_record.txt")
264
- print(f"Downloading {len(doi_list)} PDF files...")
265
- for doi in track(doi_list, description="Downloading..."):
266
- download = _Downloader(doi, store_path)
267
- download.download_pdf()
268
-
269
-
270
- if __name__ == "__main__":
271
- store_path = r"F:\AAA-Delete\DOI_Reference\5\pdf"
272
- excel_file = r"F:\AAA-Delete\DOI_Reference\5\savedrecs.xls"
273
- download5doi(store_path, excel_file=excel_file)
@@ -1,53 +0,0 @@
1
- from rich import print
2
-
3
-
4
- __all__ = ["os_command", "get_queue_node"]
5
-
6
-
7
- # 负责执行命令并返回输出
8
- def os_command(cmd):
9
- import subprocess
10
- print(f'🔍 执行命令: {cmd}')
11
- result = subprocess.run(
12
- cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
13
- )
14
- # 打印错误信息(若有,方便排查问题)
15
- if result.stderr:
16
- print(f'❌ 错误输出: {result.stderr.strip()}')
17
- # 检查命令是否执行成功(非0为失败)
18
- if result.returncode != 0:
19
- print(f'❌ 命令执行失败,退出码: {result.returncode}')
20
- return None
21
- return result.stdout
22
-
23
- # 返回“队列名:节点数”的字典
24
- def get_queue_node():
25
- import re
26
- # 执行 sinfo | grep "idle" 获取空闲队列数据
27
- cmd = 'sinfo | grep "idle"'
28
- output = os_command(cmd)
29
- if not output: # 命令执行失败或无输出,返回空字典
30
- return {}
31
-
32
- # 初始化结果字典:键=队列名,值=节点数
33
- queue_node_dict = {}
34
- # 按行解析命令输出
35
- for line in output.strip().split('\n'):
36
- line = line.strip()
37
- if not line: # 跳过空行
38
- continue
39
-
40
- # 正则匹配:仅捕获“队列名”(第1组)和“节点数”(第2组)
41
- # 末尾用 .* 忽略节点列表,不影响匹配
42
- pattern = r"^(\S+)\s+\S+\s+\S+\s+(\d+)\s+idle\s+.*$"
43
- match = re.match(pattern, line)
44
-
45
- if match:
46
- queue_name = match.group(1) # 提取队列名作为字典的键
47
- node_count = int(match.group(2))# 提取节点数作为字典的值(转为整数)
48
- queue_node_dict[queue_name] = node_count # 存入字典
49
-
50
- return queue_node_dict
51
-
52
- if __name__ == "__main__":
53
- pass
File without changes
File without changes
File without changes
File without changes