hpc-task 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hpc_task/__init__.py ADDED
File without changes
hpc_task/hpc.py ADDED
@@ -0,0 +1,147 @@
1
+ import os
2
+
3
+ import paramiko
4
+ from paramiko import SSHClient, SSHException
5
+
6
+
7
+ class HPCTask:
8
+ def __init__(self, workdir='.'):
9
+ self._ssh_jump = None
10
+ self.jobid = None
11
+ self.ssh_client = None
12
+ self.workdir = workdir
13
+
14
+ def connect(self, target_host, jump_host=None):
15
+ """
16
+ 开启队列
17
+ :return: jobid
18
+ """
19
+ # 建立 ssh 连接
20
+ # 跳板机连接信息
21
+ if self.ssh_client is None:
22
+ try:
23
+ # 使用隧道连接到目标服务器
24
+ target_client = SSHClient()
25
+ target_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
26
+ # 首先连接到跳板机
27
+ if jump_host is not None:
28
+ # 创建SSH客户端
29
+ jump_client = SSHClient()
30
+ jump_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
31
+ jump_client.connect(**jump_host)
32
+ # 在跳板机上创建到目标服务器的隧道
33
+ transport = jump_client.get_transport()
34
+ dest_addr = (target_host['hostname'], target_host['port'])
35
+ local_addr = ('127.0.0.1', 0) # 本地任意端口
36
+ channel = transport.open_channel('direct-tcpip', dest_addr, local_addr)
37
+ target_client.connect('127.0.0.1',
38
+ port=channel.getpeername()[1],
39
+ username=target_host['username'],
40
+ password=target_host['password'],
41
+ sock=channel)
42
+ self._ssh_jump = jump_client
43
+ else:
44
+ target_client.connect(**target_host)
45
+
46
+ self.ssh_client = target_client
47
+ print("Connect to SSH success.")
48
+ except SSHException as e:
49
+ raise e
50
+
51
+ def prerun(self):
52
+ # 提交任务,占据节点
53
+ # TODO: 命令改成配置,适配常见 hpc
54
+
55
+ if self.ssh_client is None:
56
+ raise RuntimeError('ssh client is not connected')
57
+ commands = [f'mkdir -p {self.workdir}',
58
+ f'cd {self.workdir}',
59
+ f'cp ~/bin/hpc_job.chess hpc_job.chess',
60
+ f'bsub < hpc_job.chess']
61
+ stdin, stdout, stderr = self.ssh_client.exec_command(';'.join(commands))
62
+ jobid = stdout.read().decode().strip() # "Job <688518> is submitted to queue <proj>."
63
+ jobid = jobid.split()[1].lstrip('<').rstrip('>')
64
+ self.jobid = jobid
65
+ return stdin, stdout, stderr
66
+
67
+ def postrun(self):
68
+ """
69
+ 说明:关闭任务节点占用
70
+ bkill JOBID是从任务头部开始杀, KILL 信号会传递到子进程
71
+ pkill gosh-remote 是直接杀
72
+ 二者可能相同, 也可能不相同. 取决于 bsub 时如何定义的.
73
+ 通常 bsub 是用一个 script 调 gosh-remote, 这时二者就不同了.
74
+ 那个主调script 可能会做信号处理, 会顺着 gosh-remote的调用进程下去, 逐一 KILL.
75
+ gosh-remote我不记得是否有信号处理的逻辑, 得做实验确认一下.
76
+
77
+ :return:
78
+
79
+ TODO: 命令改成配置,适配常见 hpc
80
+ """
81
+
82
+ if self.ssh_client is None:
83
+ raise RuntimeError('ssh client is not connected')
84
+ commands = ['sleep 1', 'pkill gosh-remote', f'bkill {self.jobid}']
85
+ stdin, stdout, stderr = self.ssh_client.exec_command(';'.join(commands))
86
+ return stdin, stdout, stderr
87
+
88
+ @property
89
+ def status(self):
90
+ """
91
+ 查询作业状态:排队,运行,结束
92
+ # TODO: 统一不同的排队系统状态码
93
+ :return:
94
+ """
95
+ if self.ssh_client is None:
96
+ raise RuntimeError('ssh client is not connected')
97
+ commands = [f'bjobs -noheader {self.jobid}',]
98
+ stdin, stdout, stderr = self.ssh_client.exec_command(';'.join(commands))
99
+ stat = stdout.read().decode().strip().split() # 688559 renpeng RUN proj Khpcserver0 72*Knode44 scheduler Sep 8 10:55
100
+ if len(stat) > 5:
101
+ return stat[2]
102
+ else:
103
+ return "UNKNOWN"
104
+
105
+ def submit(self):
106
+ """
107
+
108
+ :return: None
109
+ """
110
+ return None
111
+
112
+ def upload(self):
113
+ """
114
+ TODO: 使用 rsync
115
+ :return: file sync status
116
+ """
117
+ if self.ssh_client is None:
118
+ raise RuntimeError('ssh client is not connected')
119
+
120
+ # 创建服务器目录,如果不存在
121
+ self.ssh_client.exec_command(f'if [ ! -d "{self.workdir}" ]; then mkdir -p {self.workdir};fi')
122
+ sftp_client = self.ssh_client.open_sftp()
123
+ filelist = os.listdir(self.workdir) # TODO: 递归所有文件夹
124
+ for filename in filelist:
125
+ sftp_client.put(os.path.join(self.workdir,filename), os.path.join(self.workdir,filename), confirm=False)
126
+ sftp_client.close()
127
+ return None
128
+
129
+ def download(self):
130
+ if self.ssh_client is None:
131
+ raise RuntimeError('ssh client is not connected')
132
+
133
+ sftp_client = self.ssh_client.open_sftp()
134
+ for filename in sftp_client.listdir(self.workdir): # TODO: 递归所有文件夹
135
+ sftp_client.get(os.path.join(self.workdir,filename), os.path.join(self.workdir,filename))
136
+ sftp_client.close()
137
+ return None
138
+
139
+ def close(self):
140
+ """
141
+ 关闭队列
142
+ """
143
+ if self.ssh_client is not None:
144
+ self.ssh_client.close()
145
+ if self._ssh_jump is not None:
146
+ self._ssh_jump.close()
147
+ return None
@@ -0,0 +1,40 @@
1
+ Metadata-Version: 2.4
2
+ Name: hpc_task
3
+ Version: 0.0.1
4
+ Summary: HPC Task python library.
5
+ Home-page: https://gitee.com/pjren/hpc_task
6
+ Author: Renpj
7
+ Author-email: 0403114076@163.com
8
+ License: MIT
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: paramiko
14
+ Dynamic: author
15
+ Dynamic: author-email
16
+ Dynamic: classifier
17
+ Dynamic: description
18
+ Dynamic: description-content-type
19
+ Dynamic: home-page
20
+ Dynamic: license
21
+ Dynamic: license-file
22
+ Dynamic: requires-dist
23
+ Dynamic: summary
24
+
25
+ # HPC Task
26
+
27
+ Python package for easy HPC task management based on paramiko.
28
+
29
+ ## Installation
30
+
31
+ pip install -U hpc_task
32
+
33
+ **Requirements**
34
+ * paramiko
35
+
36
+ ## Usage
37
+
38
+ See tests
39
+
40
+ ## TODO
@@ -0,0 +1,7 @@
1
+ hpc_task/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ hpc_task/hpc.py,sha256=8ExjM6yRHb5T1j5oWWd41LePeZ1Y-ynkJUk14XkC8zE,5691
3
+ hpc_task-0.0.1.dist-info/licenses/LICENSE,sha256=iEk8UVtFW_L7ddFiCE_WFUjbOCaVc5fwNapeDmeY0WA,1063
4
+ hpc_task-0.0.1.dist-info/METADATA,sha256=TPIh4yGwdVKy0cAVgm81ykNiFkYVU8q3eT3D82s7Cv4,760
5
+ hpc_task-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ hpc_task-0.0.1.dist-info/top_level.txt,sha256=6mflaHAkVGFi5mREsN5zh0KOO13pTNrTBRAAcwmHMVA,9
7
+ hpc_task-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 pj.ren
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ hpc_task