oafuncs 0.0.98.46__tar.gz → 0.0.98.49__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oafuncs-0.0.98.46/oafuncs.egg-info → oafuncs-0.0.98.49}/PKG-INFO +1 -1
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/literature.py +1 -124
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_linux.py +32 -5
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49/oafuncs.egg-info}/PKG-INFO +1 -1
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/setup.py +1 -1
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/LICENSE.txt +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/MANIFEST.in +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/README.md +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/__init__.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_data/hycom.png +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_data/oafuncs.png +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/cprogressbar.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/data_interp.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/email.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/netcdf_merge.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/netcdf_modify.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/netcdf_write.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/parallel.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/parallel_bak.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/plot_dataset.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/replace_file_content.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_cmap.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_data.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_date.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/User_Agent-list.txt +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/__init__.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/hycom_3hourly.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/idm.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/read_proxy.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/test_ua.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/user_agent.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_draw.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_file.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_geo.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_help.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_model/__init__.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_model/roms/__init__.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_model/roms/test.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_model/wrf/__init__.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_model/wrf/little_r.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_nc.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_python.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_sign/__init__.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_sign/meteorological.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_sign/ocean.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_sign/scientific.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_tool.py +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs.egg-info/SOURCES.txt +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs.egg-info/dependency_links.txt +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs.egg-info/requires.txt +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs.egg-info/top_level.txt +0 -0
- {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/setup.cfg +0 -0
|
@@ -12,7 +12,7 @@ from oafuncs.oa_down.user_agent import get_ua
|
|
|
12
12
|
from oafuncs.oa_file import remove
|
|
13
13
|
from oafuncs.oa_data import ensure_list
|
|
14
14
|
|
|
15
|
-
__all__ = ["download5doi"
|
|
15
|
+
__all__ = ["download5doi"]
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def _get_file_size(file_path, unit="KB"):
|
|
@@ -368,129 +368,6 @@ def download5doi(
|
|
|
368
368
|
dl.download_pdf()
|
|
369
369
|
|
|
370
370
|
|
|
371
|
-
# ------------------------------- 合规替代方案(Open Access 优先) -------------------------------
|
|
372
|
-
def _get_oa_pdf_url_from_unpaywall(doi: str, email: str | None) -> str | None:
|
|
373
|
-
"""
|
|
374
|
-
通过 Unpaywall 获取可开放访问的 PDF 链接(若存在)。
|
|
375
|
-
需要提供 email(Unpaywall 要求标识邮件)。
|
|
376
|
-
返回 PDF URL 或 None。
|
|
377
|
-
"""
|
|
378
|
-
if not email:
|
|
379
|
-
print("[bold yellow]Unpaywall 需要 email 参数;请提供 email 以查询 OA 链接。")
|
|
380
|
-
return None
|
|
381
|
-
api = f"https://api.unpaywall.org/v2/{doi}?email={email}"
|
|
382
|
-
try:
|
|
383
|
-
r = requests.get(api, timeout=15)
|
|
384
|
-
if r.status_code != 200:
|
|
385
|
-
print(f"[bold yellow]Unpaywall 查询失败: HTTP {r.status_code}")
|
|
386
|
-
return None
|
|
387
|
-
data = r.json()
|
|
388
|
-
loc = data.get("best_oa_location") or {}
|
|
389
|
-
url_for_pdf = loc.get("url_for_pdf") or loc.get("url")
|
|
390
|
-
if url_for_pdf and url_for_pdf.lower().endswith(".pdf"):
|
|
391
|
-
return url_for_pdf
|
|
392
|
-
# 有些 OA 链接是落在 landing page,再尝试从记录的所有位置挑选 pdf
|
|
393
|
-
for k in ("oa_locations", "oa_location"):
|
|
394
|
-
entries = data.get(k) or []
|
|
395
|
-
if isinstance(entries, dict):
|
|
396
|
-
entries = [entries]
|
|
397
|
-
for e in entries:
|
|
398
|
-
u = e.get("url_for_pdf") or e.get("url")
|
|
399
|
-
if u and ".pdf" in u.lower():
|
|
400
|
-
return u
|
|
401
|
-
except Exception as e:
|
|
402
|
-
print(f"[bold yellow]Unpaywall 查询异常: {e}")
|
|
403
|
-
return None
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
def _download_pdf_from_url(url: str, dest_path: Path, headers: dict | None = None) -> bool:
|
|
407
|
-
"""
|
|
408
|
-
给定合法的 PDF 下载 URL,下载保存到 dest_path。
|
|
409
|
-
返回 True/False 表示是否成功。
|
|
410
|
-
"""
|
|
411
|
-
headers = headers or {"User-Agent": str(get_ua()), "Accept": "application/pdf"}
|
|
412
|
-
try:
|
|
413
|
-
with requests.get(url, headers=headers, stream=True, timeout=30) as r:
|
|
414
|
-
if r.status_code != 200 or "application/pdf" not in r.headers.get("Content-Type", "").lower():
|
|
415
|
-
# 仍可能是 PDF(某些服务器未正确设置头),尝试保存但标注提示
|
|
416
|
-
if r.status_code != 200:
|
|
417
|
-
print(f"[bold yellow]下载失败: HTTP {r.status_code}")
|
|
418
|
-
return False
|
|
419
|
-
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
420
|
-
with open(dest_path, "wb") as f:
|
|
421
|
-
for chunk in r.iter_content(chunk_size=8192):
|
|
422
|
-
if chunk:
|
|
423
|
-
f.write(chunk)
|
|
424
|
-
return True
|
|
425
|
-
except Exception as e:
|
|
426
|
-
print(f"[bold yellow]下载异常: {e}")
|
|
427
|
-
return False
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
def download5doi_via_unpaywall(
|
|
431
|
-
store_path=None,
|
|
432
|
-
doi_list=None,
|
|
433
|
-
txt_file=None,
|
|
434
|
-
excel_file=None,
|
|
435
|
-
col_name=r"DOI",
|
|
436
|
-
email: str | None = None,
|
|
437
|
-
):
|
|
438
|
-
"""
|
|
439
|
-
优先使用 Unpaywall 获取开放访问(OA)的 PDF 并下载,避免非合规站点。
|
|
440
|
-
|
|
441
|
-
参数:
|
|
442
|
-
store_path: 保存目录
|
|
443
|
-
doi_list/txt_file/excel_file/col_name: 同 download5doi
|
|
444
|
-
email: 用于访问 Unpaywall API 的邮箱(必填,否则无法查询)
|
|
445
|
-
"""
|
|
446
|
-
if not store_path:
|
|
447
|
-
store_path = Path.cwd()
|
|
448
|
-
else:
|
|
449
|
-
store_path = Path(str(store_path))
|
|
450
|
-
store_path.mkdir(parents=True, exist_ok=True)
|
|
451
|
-
|
|
452
|
-
if doi_list:
|
|
453
|
-
doi_list = ensure_list(doi_list)
|
|
454
|
-
if txt_file:
|
|
455
|
-
doi_list = _read_txt(txt_file)
|
|
456
|
-
if excel_file:
|
|
457
|
-
doi_list = _read_excel(excel_file, col_name)
|
|
458
|
-
|
|
459
|
-
if not doi_list:
|
|
460
|
-
print("[bold yellow]未提供 DOI 列表。")
|
|
461
|
-
return
|
|
462
|
-
|
|
463
|
-
print(f"[bold cyan]通过 Unpaywall 尝试下载 {len(doi_list)} 篇 OA PDF...")
|
|
464
|
-
ok, miss = 0, 0
|
|
465
|
-
for doi in track(doi_list, description="OA downloading..."):
|
|
466
|
-
# 规范化文件名
|
|
467
|
-
fname = re.sub(r'[/<>:"?*|]', "_", str(doi)) + ".pdf"
|
|
468
|
-
dest = store_path / fname
|
|
469
|
-
if dest.exists() and _get_file_size(dest, unit="KB") > 10:
|
|
470
|
-
ok += 1
|
|
471
|
-
continue
|
|
472
|
-
|
|
473
|
-
pdf_url = _get_oa_pdf_url_from_unpaywall(str(doi), email=email)
|
|
474
|
-
if not pdf_url:
|
|
475
|
-
miss += 1
|
|
476
|
-
print(f"[bold yellow]未找到 OA PDF: {doi}")
|
|
477
|
-
continue
|
|
478
|
-
|
|
479
|
-
if _download_pdf_from_url(pdf_url, dest):
|
|
480
|
-
size_kb = _get_file_size(dest, unit="KB")
|
|
481
|
-
if isinstance(size_kb, (int, float)) and size_kb < 10:
|
|
482
|
-
dest.unlink(missing_ok=True)
|
|
483
|
-
miss += 1
|
|
484
|
-
print(f"[bold yellow]文件过小,疑似异常,已删除: {dest}")
|
|
485
|
-
else:
|
|
486
|
-
ok += 1
|
|
487
|
-
print(f"[bold green]已下载: {dest}")
|
|
488
|
-
else:
|
|
489
|
-
miss += 1
|
|
490
|
-
|
|
491
|
-
print(f"[bold]完成。成功 {ok} 篇,未获取 {miss} 篇(可能无 OA 版本或需机构访问)。")
|
|
492
|
-
|
|
493
|
-
|
|
494
371
|
if __name__ == "__main__":
|
|
495
372
|
store_path = r"F:\AAA-Delete\DOI_Reference\5\pdf"
|
|
496
373
|
excel_file = r"F:\AAA-Delete\DOI_Reference\5\savedrecs.xls"
|
|
@@ -54,10 +54,15 @@ def query_queue(need_node=1, queue_list =['dcu','bigmem','cpu_parallel','cpu_sin
|
|
|
54
54
|
queue_dict = get_queue_node()
|
|
55
55
|
hs = None
|
|
56
56
|
for my_queue in queue_list:
|
|
57
|
-
if my_queue
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
if my_queue == 'cpu_parallel':
|
|
58
|
+
for mq in ['cpu_parallel','cpu_parallel*']:
|
|
59
|
+
if mq in queue_dict and queue_dict[mq] >= need_node:
|
|
60
|
+
hs = 'cpu_parallel'
|
|
61
|
+
break
|
|
62
|
+
else:
|
|
63
|
+
if my_queue in queue_dict and queue_dict[my_queue] >= need_node:
|
|
64
|
+
hs = my_queue
|
|
65
|
+
break
|
|
61
66
|
return hs
|
|
62
67
|
|
|
63
68
|
def running_jobs():
|
|
@@ -68,11 +73,18 @@ def running_jobs():
|
|
|
68
73
|
ids = [job.split()[0] for job in Jobs if job != '']
|
|
69
74
|
return ids
|
|
70
75
|
|
|
71
|
-
def submit_job(working_dir, script_tmp='run.slurm', script_run='run.slurm', need_node=1, queue_tmp='<queue_name>', queue_list=['dcu', 'bigmem', 'cpu_parallel', 'cpu_single'], max_job=38):
|
|
76
|
+
def submit_job(working_dir=None, script_tmp='run.slurm', script_run='run.slurm', need_node=1, queue_tmp='<queue_name>', queue_list=['dcu', 'bigmem', 'cpu_parallel', 'cpu_single'], max_job=38, wait=False):
|
|
77
|
+
'''提交任务到集群,并返回任务ID'''
|
|
72
78
|
from .oa_file import replace_content
|
|
73
79
|
import datetime
|
|
80
|
+
if working_dir is None:
|
|
81
|
+
working_dir = os.getcwd()
|
|
74
82
|
os.chdir(working_dir)
|
|
75
83
|
print(f'切换工作目录到: {working_dir}')
|
|
84
|
+
|
|
85
|
+
if need_node > 1 and 'cpu_single' in queue_list:
|
|
86
|
+
queue_list.remove('cpu_single')
|
|
87
|
+
|
|
76
88
|
while True:
|
|
77
89
|
running_job = running_jobs()
|
|
78
90
|
if not running_job or len(running_job) < max_job:
|
|
@@ -94,6 +106,7 @@ def submit_job(working_dir, script_tmp='run.slurm', script_run='run.slurm', need
|
|
|
94
106
|
time.sleep(30)
|
|
95
107
|
else:
|
|
96
108
|
print(f'提交任务成功,{content_sub.strip()}')
|
|
109
|
+
job_id = content_sub.strip().split()[-1]
|
|
97
110
|
break
|
|
98
111
|
else:
|
|
99
112
|
print('没有足够的计算资源,等待30秒后重试!')
|
|
@@ -103,6 +116,20 @@ def submit_job(working_dir, script_tmp='run.slurm', script_run='run.slurm', need
|
|
|
103
116
|
time.sleep(60)
|
|
104
117
|
print(f'等待10秒后,继续检查任务状态!')
|
|
105
118
|
time.sleep(10)
|
|
119
|
+
|
|
120
|
+
if wait:
|
|
121
|
+
while True:
|
|
122
|
+
if job_id in running_jobs():
|
|
123
|
+
print(f'Time: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
|
|
124
|
+
print(f'任务{job_id}正在队列中...')
|
|
125
|
+
time.sleep(60)
|
|
126
|
+
else:
|
|
127
|
+
print(f'任务{job_id}已完成!')
|
|
128
|
+
break
|
|
129
|
+
else:
|
|
130
|
+
print(f'任务{job_id}已提交,不等待其完成,继续执行后续操作!')
|
|
131
|
+
|
|
132
|
+
return job_id
|
|
106
133
|
|
|
107
134
|
if __name__ == "__main__":
|
|
108
135
|
pass
|
|
@@ -18,7 +18,7 @@ URL = "https://github.com/Industry-Pays/OAFuncs"
|
|
|
18
18
|
EMAIL = "liukun0312@stu.ouc.edu.cn"
|
|
19
19
|
AUTHOR = "Kun Liu"
|
|
20
20
|
REQUIRES_PYTHON = ">=3.10.0" # 2025/03/13
|
|
21
|
-
VERSION = "0.0.98.
|
|
21
|
+
VERSION = "0.0.98.49"
|
|
22
22
|
|
|
23
23
|
# What packages are required for this module to be executed?
|
|
24
24
|
REQUIRED = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|