oafuncs 0.0.98.46__tar.gz → 0.0.98.49__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {oafuncs-0.0.98.46/oafuncs.egg-info → oafuncs-0.0.98.49}/PKG-INFO +1 -1
  2. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/literature.py +1 -124
  3. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_linux.py +32 -5
  4. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49/oafuncs.egg-info}/PKG-INFO +1 -1
  5. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/setup.py +1 -1
  6. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/LICENSE.txt +0 -0
  7. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/MANIFEST.in +0 -0
  8. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/README.md +0 -0
  9. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/__init__.py +0 -0
  10. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_data/hycom.png +0 -0
  11. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_data/oafuncs.png +0 -0
  12. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/cprogressbar.py +0 -0
  13. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/data_interp.py +0 -0
  14. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/email.py +0 -0
  15. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/netcdf_merge.py +0 -0
  16. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/netcdf_modify.py +0 -0
  17. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/netcdf_write.py +0 -0
  18. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/parallel.py +0 -0
  19. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/parallel_bak.py +0 -0
  20. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/plot_dataset.py +0 -0
  21. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/_script/replace_file_content.py +0 -0
  22. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_cmap.py +0 -0
  23. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_data.py +0 -0
  24. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_date.py +0 -0
  25. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/User_Agent-list.txt +0 -0
  26. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/__init__.py +0 -0
  27. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/hycom_3hourly.py +0 -0
  28. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/idm.py +0 -0
  29. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/read_proxy.py +0 -0
  30. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/test_ua.py +0 -0
  31. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_down/user_agent.py +0 -0
  32. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_draw.py +0 -0
  33. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_file.py +0 -0
  34. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_geo.py +0 -0
  35. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_help.py +0 -0
  36. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_model/__init__.py +0 -0
  37. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_model/roms/__init__.py +0 -0
  38. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_model/roms/test.py +0 -0
  39. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_model/wrf/__init__.py +0 -0
  40. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_model/wrf/little_r.py +0 -0
  41. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_nc.py +0 -0
  42. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_python.py +0 -0
  43. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_sign/__init__.py +0 -0
  44. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_sign/meteorological.py +0 -0
  45. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_sign/ocean.py +0 -0
  46. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_sign/scientific.py +0 -0
  47. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs/oa_tool.py +0 -0
  48. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs.egg-info/SOURCES.txt +0 -0
  49. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs.egg-info/dependency_links.txt +0 -0
  50. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs.egg-info/requires.txt +0 -0
  51. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/oafuncs.egg-info/top_level.txt +0 -0
  52. {oafuncs-0.0.98.46 → oafuncs-0.0.98.49}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: oafuncs
3
- Version: 0.0.98.46
3
+ Version: 0.0.98.49
4
4
  Summary: Oceanic and Atmospheric Functions
5
5
  Home-page: https://github.com/Industry-Pays/OAFuncs
6
6
  Author: Kun Liu
@@ -12,7 +12,7 @@ from oafuncs.oa_down.user_agent import get_ua
12
12
  from oafuncs.oa_file import remove
13
13
  from oafuncs.oa_data import ensure_list
14
14
 
15
- __all__ = ["download5doi", "download5doi_via_unpaywall"]
15
+ __all__ = ["download5doi"]
16
16
 
17
17
 
18
18
  def _get_file_size(file_path, unit="KB"):
@@ -368,129 +368,6 @@ def download5doi(
368
368
  dl.download_pdf()
369
369
 
370
370
 
371
- # ------------------------------- 合规替代方案(Open Access 优先) -------------------------------
372
- def _get_oa_pdf_url_from_unpaywall(doi: str, email: str | None) -> str | None:
373
- """
374
- 通过 Unpaywall 获取可开放访问的 PDF 链接(若存在)。
375
- 需要提供 email(Unpaywall 要求标识邮件)。
376
- 返回 PDF URL 或 None。
377
- """
378
- if not email:
379
- print("[bold yellow]Unpaywall 需要 email 参数;请提供 email 以查询 OA 链接。")
380
- return None
381
- api = f"https://api.unpaywall.org/v2/{doi}?email={email}"
382
- try:
383
- r = requests.get(api, timeout=15)
384
- if r.status_code != 200:
385
- print(f"[bold yellow]Unpaywall 查询失败: HTTP {r.status_code}")
386
- return None
387
- data = r.json()
388
- loc = data.get("best_oa_location") or {}
389
- url_for_pdf = loc.get("url_for_pdf") or loc.get("url")
390
- if url_for_pdf and url_for_pdf.lower().endswith(".pdf"):
391
- return url_for_pdf
392
- # 有些 OA 链接是落在 landing page,再尝试从记录的所有位置挑选 pdf
393
- for k in ("oa_locations", "oa_location"):
394
- entries = data.get(k) or []
395
- if isinstance(entries, dict):
396
- entries = [entries]
397
- for e in entries:
398
- u = e.get("url_for_pdf") or e.get("url")
399
- if u and ".pdf" in u.lower():
400
- return u
401
- except Exception as e:
402
- print(f"[bold yellow]Unpaywall 查询异常: {e}")
403
- return None
404
-
405
-
406
- def _download_pdf_from_url(url: str, dest_path: Path, headers: dict | None = None) -> bool:
407
- """
408
- 给定合法的 PDF 下载 URL,下载保存到 dest_path。
409
- 返回 True/False 表示是否成功。
410
- """
411
- headers = headers or {"User-Agent": str(get_ua()), "Accept": "application/pdf"}
412
- try:
413
- with requests.get(url, headers=headers, stream=True, timeout=30) as r:
414
- if r.status_code != 200 or "application/pdf" not in r.headers.get("Content-Type", "").lower():
415
- # 仍可能是 PDF(某些服务器未正确设置头),尝试保存但标注提示
416
- if r.status_code != 200:
417
- print(f"[bold yellow]下载失败: HTTP {r.status_code}")
418
- return False
419
- dest_path.parent.mkdir(parents=True, exist_ok=True)
420
- with open(dest_path, "wb") as f:
421
- for chunk in r.iter_content(chunk_size=8192):
422
- if chunk:
423
- f.write(chunk)
424
- return True
425
- except Exception as e:
426
- print(f"[bold yellow]下载异常: {e}")
427
- return False
428
-
429
-
430
- def download5doi_via_unpaywall(
431
- store_path=None,
432
- doi_list=None,
433
- txt_file=None,
434
- excel_file=None,
435
- col_name=r"DOI",
436
- email: str | None = None,
437
- ):
438
- """
439
- 优先使用 Unpaywall 获取开放访问(OA)的 PDF 并下载,避免非合规站点。
440
-
441
- 参数:
442
- store_path: 保存目录
443
- doi_list/txt_file/excel_file/col_name: 同 download5doi
444
- email: 用于访问 Unpaywall API 的邮箱(必填,否则无法查询)
445
- """
446
- if not store_path:
447
- store_path = Path.cwd()
448
- else:
449
- store_path = Path(str(store_path))
450
- store_path.mkdir(parents=True, exist_ok=True)
451
-
452
- if doi_list:
453
- doi_list = ensure_list(doi_list)
454
- if txt_file:
455
- doi_list = _read_txt(txt_file)
456
- if excel_file:
457
- doi_list = _read_excel(excel_file, col_name)
458
-
459
- if not doi_list:
460
- print("[bold yellow]未提供 DOI 列表。")
461
- return
462
-
463
- print(f"[bold cyan]通过 Unpaywall 尝试下载 {len(doi_list)} 篇 OA PDF...")
464
- ok, miss = 0, 0
465
- for doi in track(doi_list, description="OA downloading..."):
466
- # 规范化文件名
467
- fname = re.sub(r'[/<>:"?*|]', "_", str(doi)) + ".pdf"
468
- dest = store_path / fname
469
- if dest.exists() and _get_file_size(dest, unit="KB") > 10:
470
- ok += 1
471
- continue
472
-
473
- pdf_url = _get_oa_pdf_url_from_unpaywall(str(doi), email=email)
474
- if not pdf_url:
475
- miss += 1
476
- print(f"[bold yellow]未找到 OA PDF: {doi}")
477
- continue
478
-
479
- if _download_pdf_from_url(pdf_url, dest):
480
- size_kb = _get_file_size(dest, unit="KB")
481
- if isinstance(size_kb, (int, float)) and size_kb < 10:
482
- dest.unlink(missing_ok=True)
483
- miss += 1
484
- print(f"[bold yellow]文件过小,疑似异常,已删除: {dest}")
485
- else:
486
- ok += 1
487
- print(f"[bold green]已下载: {dest}")
488
- else:
489
- miss += 1
490
-
491
- print(f"[bold]完成。成功 {ok} 篇,未获取 {miss} 篇(可能无 OA 版本或需机构访问)。")
492
-
493
-
494
371
  if __name__ == "__main__":
495
372
  store_path = r"F:\AAA-Delete\DOI_Reference\5\pdf"
496
373
  excel_file = r"F:\AAA-Delete\DOI_Reference\5\savedrecs.xls"
@@ -54,10 +54,15 @@ def query_queue(need_node=1, queue_list =['dcu','bigmem','cpu_parallel','cpu_sin
54
54
  queue_dict = get_queue_node()
55
55
  hs = None
56
56
  for my_queue in queue_list:
57
- if my_queue in queue_dict and queue_dict[my_queue] >= need_node:
58
- # slurm_file = f'../run.slurm.{my_queue}'
59
- hs = my_queue
60
- break
57
+ if my_queue == 'cpu_parallel':
58
+ for mq in ['cpu_parallel','cpu_parallel*']:
59
+ if mq in queue_dict and queue_dict[mq] >= need_node:
60
+ hs = 'cpu_parallel'
61
+ break
62
+ else:
63
+ if my_queue in queue_dict and queue_dict[my_queue] >= need_node:
64
+ hs = my_queue
65
+ break
61
66
  return hs
62
67
 
63
68
  def running_jobs():
@@ -68,11 +73,18 @@ def running_jobs():
68
73
  ids = [job.split()[0] for job in Jobs if job != '']
69
74
  return ids
70
75
 
71
- def submit_job(working_dir, script_tmp='run.slurm', script_run='run.slurm', need_node=1, queue_tmp='<queue_name>', queue_list=['dcu', 'bigmem', 'cpu_parallel', 'cpu_single'], max_job=38):
76
+ def submit_job(working_dir=None, script_tmp='run.slurm', script_run='run.slurm', need_node=1, queue_tmp='<queue_name>', queue_list=['dcu', 'bigmem', 'cpu_parallel', 'cpu_single'], max_job=38, wait=False):
77
+ '''提交任务到集群,并返回任务ID'''
72
78
  from .oa_file import replace_content
73
79
  import datetime
80
+ if working_dir is None:
81
+ working_dir = os.getcwd()
74
82
  os.chdir(working_dir)
75
83
  print(f'切换工作目录到: {working_dir}')
84
+
85
+ if need_node > 1 and 'cpu_single' in queue_list:
86
+ queue_list.remove('cpu_single')
87
+
76
88
  while True:
77
89
  running_job = running_jobs()
78
90
  if not running_job or len(running_job) < max_job:
@@ -94,6 +106,7 @@ def submit_job(working_dir, script_tmp='run.slurm', script_run='run.slurm', need
94
106
  time.sleep(30)
95
107
  else:
96
108
  print(f'提交任务成功,{content_sub.strip()}')
109
+ job_id = content_sub.strip().split()[-1]
97
110
  break
98
111
  else:
99
112
  print('没有足够的计算资源,等待30秒后重试!')
@@ -103,6 +116,20 @@ def submit_job(working_dir, script_tmp='run.slurm', script_run='run.slurm', need
103
116
  time.sleep(60)
104
117
  print(f'等待10秒后,继续检查任务状态!')
105
118
  time.sleep(10)
119
+
120
+ if wait:
121
+ while True:
122
+ if job_id in running_jobs():
123
+ print(f'Time: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
124
+ print(f'任务{job_id}正在队列中...')
125
+ time.sleep(60)
126
+ else:
127
+ print(f'任务{job_id}已完成!')
128
+ break
129
+ else:
130
+ print(f'任务{job_id}已提交,不等待其完成,继续执行后续操作!')
131
+
132
+ return job_id
106
133
 
107
134
  if __name__ == "__main__":
108
135
  pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: oafuncs
3
- Version: 0.0.98.46
3
+ Version: 0.0.98.49
4
4
  Summary: Oceanic and Atmospheric Functions
5
5
  Home-page: https://github.com/Industry-Pays/OAFuncs
6
6
  Author: Kun Liu
@@ -18,7 +18,7 @@ URL = "https://github.com/Industry-Pays/OAFuncs"
18
18
  EMAIL = "liukun0312@stu.ouc.edu.cn"
19
19
  AUTHOR = "Kun Liu"
20
20
  REQUIRES_PYTHON = ">=3.10.0" # 2025/03/13
21
- VERSION = "0.0.98.46"
21
+ VERSION = "0.0.98.49"
22
22
 
23
23
  # What packages are required for this module to be executed?
24
24
  REQUIRED = [
File without changes
File without changes
File without changes
File without changes