pycompss-cli 3.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pycompss_cli/__init__.py +16 -0
- pycompss_cli/assets/__init__.py +16 -0
- pycompss_cli/assets/enqueue_compss_args.txt +317 -0
- pycompss_cli/assets/runcompss_args.txt +188 -0
- pycompss_cli/cli/__init__.py +16 -0
- pycompss_cli/cli/compss.py +27 -0
- pycompss_cli/cli/dislib.py +28 -0
- pycompss_cli/cli/pycompss.py +49 -0
- pycompss_cli/core/__init__.py +16 -0
- pycompss_cli/core/actions.py +173 -0
- pycompss_cli/core/actions_dispatcher.py +113 -0
- pycompss_cli/core/arguments.py +313 -0
- pycompss_cli/core/cmd_helpers.py +56 -0
- pycompss_cli/core/docker/__init__.py +16 -0
- pycompss_cli/core/docker/actions.py +241 -0
- pycompss_cli/core/docker/arguments.py +65 -0
- pycompss_cli/core/docker/cmd.py +636 -0
- pycompss_cli/core/local/__init__.py +16 -0
- pycompss_cli/core/local/actions.py +338 -0
- pycompss_cli/core/local/arguments.py +51 -0
- pycompss_cli/core/local/cmd.py +464 -0
- pycompss_cli/core/remote/__init__.py +16 -0
- pycompss_cli/core/remote/actions.py +443 -0
- pycompss_cli/core/remote/arguments.py +215 -0
- pycompss_cli/core/remote/cmd.py +217 -0
- pycompss_cli/core/remote/interactive_sc/__init__.py +16 -0
- pycompss_cli/core/remote/interactive_sc/core.py +342 -0
- pycompss_cli/core/remote/interactive_sc/defaults.py +132 -0
- pycompss_cli/core/remote/job_scripts/__init__.py +16 -0
- pycompss_cli/core/remote/job_scripts/cancel.py +77 -0
- pycompss_cli/core/remote/job_scripts/commons.py +273 -0
- pycompss_cli/core/remote/job_scripts/find.py +74 -0
- pycompss_cli/core/remote/job_scripts/info.py +103 -0
- pycompss_cli/core/remote/job_scripts/status.py +67 -0
- pycompss_cli/core/unicore/__init__.py +21 -0
- pycompss_cli/core/unicore/actions.py +317 -0
- pycompss_cli/core/unicore/arguments.py +69 -0
- pycompss_cli/core/utils.py +143 -0
- pycompss_cli/models/__init__.py +16 -0
- pycompss_cli/models/app.py +22 -0
- pycompss_cli-3.3.6.dist-info/LICENSE.txt +202 -0
- pycompss_cli-3.3.6.dist-info/METADATA +138 -0
- pycompss_cli-3.3.6.dist-info/RECORD +46 -0
- pycompss_cli-3.3.6.dist-info/WHEEL +5 -0
- pycompss_cli-3.3.6.dist-info/entry_points.txt +4 -0
- pycompss_cli-3.3.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,636 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
# Copyright 2002-2025 Barcelona Supercomputing Center (www.bsc.es)
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
#
|
|
17
|
+
import io
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
import sys
|
|
21
|
+
import tarfile
|
|
22
|
+
import tempfile
|
|
23
|
+
from uuid import uuid4
|
|
24
|
+
import subprocess
|
|
25
|
+
|
|
26
|
+
from pycompss_cli.core.cmd_helpers import command_runner
|
|
27
|
+
|
|
28
|
+
# ################ #
|
|
29
|
+
# GLOBAL VARIABLES #
|
|
30
|
+
# ################ #
|
|
31
|
+
|
|
32
|
+
master_name = "pycompss-master"
|
|
33
|
+
worker_name = "pycompss-worker"
|
|
34
|
+
service_name = "pycompss-service"
|
|
35
|
+
default_workdir = "/home/user/"
|
|
36
|
+
default_worker_workdir = default_workdir + ".COMPSsWorker"
|
|
37
|
+
default_cfg_file = "cfg"
|
|
38
|
+
default_cfg = default_workdir + default_cfg_file
|
|
39
|
+
default_image_file = "image"
|
|
40
|
+
default_image = default_workdir + default_image_file
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
IMAGE_NAME = "compss/compss:3.3.3" # Update when releasing new version
|
|
44
|
+
DOCKER_AVAILABALE = True
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
import docker
|
|
48
|
+
from docker.types import Mount
|
|
49
|
+
from docker.errors import DockerException
|
|
50
|
+
from docker.models.containers import Container
|
|
51
|
+
except ImportError:
|
|
52
|
+
DOCKER_AVAILABALE = False
|
|
53
|
+
|
|
54
|
+
# ############# #
|
|
55
|
+
# API FUNCTIONS #
|
|
56
|
+
# ############# #
|
|
57
|
+
|
|
58
|
+
class DockerClient():
|
|
59
|
+
def __init__(self):
|
|
60
|
+
self._client = None
|
|
61
|
+
self._error_message = None
|
|
62
|
+
|
|
63
|
+
if not DOCKER_AVAILABALE:
|
|
64
|
+
self._error_message = 'ERROR: Pip package `docker` is required for creating docker environments.'
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
self._client = docker.from_env()
|
|
68
|
+
except DockerException:
|
|
69
|
+
self._error_message = '''ERROR: Docker service is not running\n\tPlease, start docker service and try again'''
|
|
70
|
+
|
|
71
|
+
def __getattribute__(self, name):
|
|
72
|
+
client = super().__getattribute__('_client')
|
|
73
|
+
error_message = super().__getattribute__('_error_message')
|
|
74
|
+
|
|
75
|
+
if client is None:
|
|
76
|
+
print(error_message, file=sys.stderr)
|
|
77
|
+
sys.exit(1)
|
|
78
|
+
|
|
79
|
+
return super().__getattribute__('_client').__getattribute__(name)
|
|
80
|
+
|
|
81
|
+
class ErrorContainerNotRunning(Exception):
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
class DockerCmd(object):
|
|
85
|
+
def __init__(self, env_id) -> None:
|
|
86
|
+
self.env_id = env_id
|
|
87
|
+
|
|
88
|
+
super().__init__()
|
|
89
|
+
|
|
90
|
+
self.client = DockerClient()
|
|
91
|
+
|
|
92
|
+
self.master_name = master_name + "-" + self.env_id
|
|
93
|
+
self.worker_name = worker_name + "-" + self.env_id
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def __get_image_name(self):
|
|
97
|
+
if os.environ.get("DEFAULT_DISLIB_DOCKER_IMAGE") is not None:
|
|
98
|
+
# This environment variable will be defined by the dislib script.
|
|
99
|
+
# It can be overriden by the COMPSS_DOCKER_IMAGE or the -i flag
|
|
100
|
+
# when running init.
|
|
101
|
+
return os.environ["DEFAULT_DISLIB_DOCKER_IMAGE"]
|
|
102
|
+
elif os.environ.get("COMPSS_DOCKER_IMAGE") is not None:
|
|
103
|
+
# If specified in an environment variable, take it
|
|
104
|
+
return os.environ["COMPSS_DOCKER_IMAGE"]
|
|
105
|
+
elif len(self.client.containers.list(filters={"name": self.master_name})) > 0:
|
|
106
|
+
# Condition equivalent to: is_running(master_name):
|
|
107
|
+
# But since it is undefined yet, we do it explicitly.
|
|
108
|
+
# If exists in the file (means that has been defined with init)
|
|
109
|
+
master = self.client.containers.list(filters={"name": self.master_name})[0]
|
|
110
|
+
# Command equivalent to: master = _get_master()
|
|
111
|
+
# But since it is undefined yet, we do it explicitly.
|
|
112
|
+
|
|
113
|
+
return master.image.attrs["Id"][7:7+12]
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def docker_deploy_compss(self, working_dir: str,
|
|
117
|
+
log_dir: str,
|
|
118
|
+
image: str = "",
|
|
119
|
+
restart: bool = True,
|
|
120
|
+
privileged: bool = False,
|
|
121
|
+
update_image: bool = False) -> None:
|
|
122
|
+
""" Starts the main COMPSs image in Docker.
|
|
123
|
+
It stops any existing one since it can not coexist with itself.
|
|
124
|
+
|
|
125
|
+
:param working_dir: Given working directory
|
|
126
|
+
:param image: Given docker image
|
|
127
|
+
:param restart: Force stop the existing and start a new one.
|
|
128
|
+
:returns: None
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
if image:
|
|
133
|
+
docker_image = image
|
|
134
|
+
else:
|
|
135
|
+
docker_image = self.__get_image_name()
|
|
136
|
+
|
|
137
|
+
masters = self.client.containers.list(filters={"name": self.master_name},
|
|
138
|
+
all=True)
|
|
139
|
+
|
|
140
|
+
assert len(masters) < 2 # never should we run 2 masters
|
|
141
|
+
|
|
142
|
+
if restart or self._exists(self.master_name):
|
|
143
|
+
self.docker_kill_compss(False)
|
|
144
|
+
|
|
145
|
+
if not self.is_running(self.master_name):
|
|
146
|
+
print("Starting %s container in dir %s" % (self.master_name, working_dir))
|
|
147
|
+
print("If this is your first time running PyCOMPSs it may take a " +
|
|
148
|
+
"while because it needs to download the docker image. " +
|
|
149
|
+
"Please be patient.")
|
|
150
|
+
if update_image:
|
|
151
|
+
subprocess.run(f'docker pull {docker_image}', shell=True)
|
|
152
|
+
mounts = self._get_mounts(user_working_dir=working_dir, log_dir=log_dir)
|
|
153
|
+
ports = {"8888/tcp": 8888, # required for jupyter notebooks
|
|
154
|
+
"8080/tcp": 8080} # required for monitor
|
|
155
|
+
container: Container = self.client.containers.run(image=docker_image, name=self.master_name,
|
|
156
|
+
mounts=mounts, detach=True, ports=ports, privileged=privileged)
|
|
157
|
+
self._generate_resources_cfg(ips=["localhost"])
|
|
158
|
+
self._generate_project_cfg(ips=["localhost"])
|
|
159
|
+
|
|
160
|
+
container.exec_run(f'mkdir -p {os.path.dirname(working_dir)}')
|
|
161
|
+
container.exec_run(f'ln -s {default_workdir} {working_dir}')
|
|
162
|
+
|
|
163
|
+
# don't pass configs because they need to be overwritten when adding
|
|
164
|
+
# new nodes
|
|
165
|
+
cfg_content = '{"working_dir":"' + working_dir + \
|
|
166
|
+
'","resources":"","project":""}'
|
|
167
|
+
tmp_path, cfg_file = self._store_temp_cfg(cfg_content)
|
|
168
|
+
self._copy_file(cfg_file, default_cfg)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def docker_start_compss(self):
|
|
172
|
+
self.client.containers.get(self.master_name).start()
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def docker_update_image(self) -> None:
|
|
176
|
+
""" Updates the default docker image.
|
|
177
|
+
|
|
178
|
+
:returns: None
|
|
179
|
+
"""
|
|
180
|
+
docker_image = "compss/compss:latest"
|
|
181
|
+
print("Updating docker image: %s" % docker_image)
|
|
182
|
+
if "COMPSS_DOCKER_IMAGE" in os.environ:
|
|
183
|
+
docker_image = os.environ["COMPSS_DOCKER_IMAGE"]
|
|
184
|
+
print("Found COMPSS_DOCKER_IMAGE environment variable: %s. Updating." %
|
|
185
|
+
docker_image)
|
|
186
|
+
else:
|
|
187
|
+
print("COMPSS_DOCKER_IMAGE is unset or empty. Updating default docker image: %s" % # noqa: E501
|
|
188
|
+
docker_image)
|
|
189
|
+
command_runner(["docker", "pull", docker_image])
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def docker_kill_compss(self, clean: bool = True) -> None:
|
|
193
|
+
""" Stops all COMPSs images in Docker.
|
|
194
|
+
|
|
195
|
+
:param clean: Force clean the generated files.
|
|
196
|
+
:returns: None
|
|
197
|
+
"""
|
|
198
|
+
if clean:
|
|
199
|
+
# Clean the cfg file
|
|
200
|
+
try:
|
|
201
|
+
master = self._get_master()
|
|
202
|
+
self._remove_cfg(master)
|
|
203
|
+
except ErrorContainerNotRunning:
|
|
204
|
+
print("WARNING: No master container running.")
|
|
205
|
+
|
|
206
|
+
self._stop_by_name(self.master_name)
|
|
207
|
+
self._stop_by_name(self.worker_name)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def docker_exec_in_daemon(self, cmd: str, return_output=False, return_stream=False) -> None:
|
|
211
|
+
""" Execute the given command in the main COMPSs image in Docker.
|
|
212
|
+
|
|
213
|
+
:param cmd: Command to execute.
|
|
214
|
+
:returns: The execution stdout.
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
if not self.is_running(self.master_name):
|
|
218
|
+
self.docker_start_compss()
|
|
219
|
+
|
|
220
|
+
master = self._get_master()
|
|
221
|
+
_, output = master.exec_run(cmd, workdir=default_workdir, stream=True)
|
|
222
|
+
|
|
223
|
+
if return_output:
|
|
224
|
+
return list(output)[-1].decode().strip()
|
|
225
|
+
if return_stream:
|
|
226
|
+
return output
|
|
227
|
+
try:
|
|
228
|
+
for line in output:
|
|
229
|
+
print(line.strip().decode())
|
|
230
|
+
except KeyboardInterrupt:
|
|
231
|
+
master.exec_run('compss_clean_procs')
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def docker_start_monitoring(self) -> None:
|
|
235
|
+
""" Starts the COMPSs monitoring within the Docker instance.
|
|
236
|
+
|
|
237
|
+
:returns: The monitoring initialization stdout.
|
|
238
|
+
"""
|
|
239
|
+
print("Starting Monitor")
|
|
240
|
+
if not self.is_running(self.master_name):
|
|
241
|
+
self.start_daemon()
|
|
242
|
+
|
|
243
|
+
cmd = "/etc/init.d/compss-monitor start"
|
|
244
|
+
master = self._get_master()
|
|
245
|
+
env = {"COMPSS_MONITOR": str(default_workdir) + "/.COMPSs"}
|
|
246
|
+
_, output = master.exec_run(cmd,
|
|
247
|
+
environment=env,
|
|
248
|
+
workdir=default_workdir,
|
|
249
|
+
stream=True)
|
|
250
|
+
for line in output:
|
|
251
|
+
print(line.strip().decode())
|
|
252
|
+
print("Please, open: http://127.0.0.1:8080/compss-monitor")
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def docker_stop_monitoring(self) -> None:
|
|
256
|
+
""" Stops the COMPSs monitoring within the Docker instance.
|
|
257
|
+
|
|
258
|
+
:returns: The monitoring stop stdout.
|
|
259
|
+
"""
|
|
260
|
+
print("Stopping Monitor")
|
|
261
|
+
cmd = "/etc/init.d/compss-monitor stop"
|
|
262
|
+
master = self._get_master()
|
|
263
|
+
_, output = master.exec_run(cmd,
|
|
264
|
+
workdir=default_workdir,
|
|
265
|
+
stream=True)
|
|
266
|
+
for line in output:
|
|
267
|
+
print(line.strip().decode())
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def docker_components(self, option: str = "list",
|
|
271
|
+
resource: str = "worker",
|
|
272
|
+
value: str = "1") -> None:
|
|
273
|
+
""" Performs actions over the COMPSS docker instances deployed.
|
|
274
|
+
|
|
275
|
+
:param option: Option to perform (supported: list, add and remove)
|
|
276
|
+
:param element: Element to add or remove (not needed for list)
|
|
277
|
+
:param value: Amount of elements to add or remove (not needed for list)
|
|
278
|
+
:returns: None
|
|
279
|
+
"""
|
|
280
|
+
if option == "list":
|
|
281
|
+
masters = self.client.containers.list(filters={"name": self.master_name})
|
|
282
|
+
workers = self.client.containers.list(filters={"name": self.worker_name})
|
|
283
|
+
for c in masters + workers:
|
|
284
|
+
print(c.name)
|
|
285
|
+
elif option == "add":
|
|
286
|
+
if resource == "worker":
|
|
287
|
+
if value.isdigit():
|
|
288
|
+
self._add_workers(int(value))
|
|
289
|
+
else:
|
|
290
|
+
self._add_custom_worker(value)
|
|
291
|
+
else:
|
|
292
|
+
raise Exception("Unsupported resource to be added: " + resource)
|
|
293
|
+
elif option == "remove":
|
|
294
|
+
if resource == "worker":
|
|
295
|
+
if value.isdigit():
|
|
296
|
+
self._remove_workers(int(value))
|
|
297
|
+
else:
|
|
298
|
+
self._remove_custom_worker(value)
|
|
299
|
+
else:
|
|
300
|
+
raise Exception("Unsupported resource to be removed: " + resource)
|
|
301
|
+
else:
|
|
302
|
+
raise Exception("Unexpected components option: " + option)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def is_running(self, name: str = master_name) -> bool:
|
|
306
|
+
""" Checks if a docker instance is running.
|
|
307
|
+
|
|
308
|
+
:param name: Instance name.
|
|
309
|
+
:returns: True if running. False otherwise.
|
|
310
|
+
"""
|
|
311
|
+
cs = self.client.containers.list(filters={"name": name})
|
|
312
|
+
return len(cs) > 0
|
|
313
|
+
|
|
314
|
+
def exists(self, name: str = None) -> bool:
|
|
315
|
+
""" Checks if a docker instance exists.
|
|
316
|
+
|
|
317
|
+
:param name: Instance name.
|
|
318
|
+
:returns: True if exists. False otherwise.
|
|
319
|
+
"""
|
|
320
|
+
if name is None:
|
|
321
|
+
name = master_name
|
|
322
|
+
cs = self.client.containers.list(all=True, filters={"name": name})
|
|
323
|
+
return len(cs) > 0
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
# ################# #
|
|
327
|
+
# PRIVATE FUNCTIONS #
|
|
328
|
+
# ################# #
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _exists(self, name: str) -> bool:
|
|
333
|
+
""" Checks if a docker instance exists.
|
|
334
|
+
|
|
335
|
+
:param name: Instance name.
|
|
336
|
+
:returns: True if exists. False otherwise.
|
|
337
|
+
"""
|
|
338
|
+
cs = self.client.containers.list(filters={"name": name}, all=True)
|
|
339
|
+
return len(cs) > 0
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _get_master(self):
|
|
343
|
+
""" Retrieve the COMPSs master container object.
|
|
344
|
+
|
|
345
|
+
:returns: Master container object.
|
|
346
|
+
"""
|
|
347
|
+
try:
|
|
348
|
+
master = self.client.containers.list(filters={"name": master_name})[0]
|
|
349
|
+
except IndexError:
|
|
350
|
+
raise ErrorContainerNotRunning(master_name)
|
|
351
|
+
return master
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _get_workers(self) -> list:
|
|
355
|
+
""" Retrieve the COMPSs worker containers objects.
|
|
356
|
+
|
|
357
|
+
:returns: List of the Worker containers objects.
|
|
358
|
+
"""
|
|
359
|
+
try:
|
|
360
|
+
workers = self.client.containers.list(filters={"name": worker_name})
|
|
361
|
+
except IndexError:
|
|
362
|
+
raise ErrorContainerNotRunning(worker_name)
|
|
363
|
+
return workers
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def _get_worker_ips(self) -> list:
|
|
367
|
+
""" Retrieve the COMPSs worker containers IP address.
|
|
368
|
+
|
|
369
|
+
:returns: List of the Worker containers IP address.
|
|
370
|
+
"""
|
|
371
|
+
ips = [c.attrs["NetworkSettings"]["Networks"]["bridge"]["IPAddress"]
|
|
372
|
+
for c in self.client.containers.list(filters={"name": worker_name})]
|
|
373
|
+
return ips
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def _store_temp_cfg(self, cfg_content: str) -> tuple:
|
|
377
|
+
""" Stores the given content in the temporary cfg file.
|
|
378
|
+
|
|
379
|
+
:param cfg_content: Cfg file contents.
|
|
380
|
+
:returns: The tmp file path and the cfg file name.
|
|
381
|
+
"""
|
|
382
|
+
tmp_path = tempfile.mkdtemp()
|
|
383
|
+
cfg_file = os.path.join(tmp_path, default_cfg_file)
|
|
384
|
+
with open(cfg_file, "w") as f:
|
|
385
|
+
f.write(cfg_content)
|
|
386
|
+
return tmp_path, cfg_file
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _copy_file(self, src: str, dst: str) -> None:
|
|
390
|
+
""" Copy the given file to the given destination within the COMPSs docker
|
|
391
|
+
master instance.
|
|
392
|
+
|
|
393
|
+
:param src: Source file path.
|
|
394
|
+
:param dst: Destination file path within the docker instance.
|
|
395
|
+
:returns: None
|
|
396
|
+
"""
|
|
397
|
+
master = self._get_master()
|
|
398
|
+
os.chdir(os.path.dirname(src))
|
|
399
|
+
src_name = os.path.basename(src)
|
|
400
|
+
tar_name = src + ".tar"
|
|
401
|
+
tar = tarfile.open(tar_name, mode="w")
|
|
402
|
+
try:
|
|
403
|
+
tar.add(src_name)
|
|
404
|
+
finally:
|
|
405
|
+
tar.close()
|
|
406
|
+
data = open(tar_name, "rb").read()
|
|
407
|
+
output = master.put_archive(os.path.dirname(dst), data)
|
|
408
|
+
if not output:
|
|
409
|
+
print("ERROR COPYING " + str(src) +
|
|
410
|
+
" TO " + src(dst) +
|
|
411
|
+
" OF MASTER CONTAINER!!!")
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def _get_mounts(self, user_working_dir: str, log_dir: str) -> list:
|
|
415
|
+
""" Retrieve the list of folders to be mounted. It gets the Mount object
|
|
416
|
+
from the given user working directory, and can include any other needed
|
|
417
|
+
folder.
|
|
418
|
+
|
|
419
|
+
:param user_working_dir: User working path.
|
|
420
|
+
:returns: List of docker Mount objects.
|
|
421
|
+
"""
|
|
422
|
+
# mount target dir needs to be absolute
|
|
423
|
+
target_dir = default_workdir
|
|
424
|
+
user_dir = Mount(target=target_dir,
|
|
425
|
+
source=user_working_dir,
|
|
426
|
+
type="bind")
|
|
427
|
+
# WARNING: mounting .COMPSs makes it fail
|
|
428
|
+
if '.COMPSs' not in log_dir:
|
|
429
|
+
log_dir = log_dir + "/.COMPSs"
|
|
430
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
431
|
+
|
|
432
|
+
compss_log_dir = Mount(target="/root/.COMPSs",
|
|
433
|
+
source=log_dir,
|
|
434
|
+
type="bind")
|
|
435
|
+
mounts = [user_dir, compss_log_dir]
|
|
436
|
+
return mounts
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def _generate_project_cfg(self, ips: list = (), cpus: int = 4,
|
|
440
|
+
install_dir: str = "/opt/COMPSs",
|
|
441
|
+
worker_dir: str = default_worker_workdir) -> str:
|
|
442
|
+
""" Generates the project.xml according to the given parameters.
|
|
443
|
+
|
|
444
|
+
:param ips: List of ip of the worker nodes.
|
|
445
|
+
:param cpus: Number of cores per worker node.
|
|
446
|
+
:param install_dir: COMPSs installation directory.
|
|
447
|
+
:param worker_dir: Worker working directory.
|
|
448
|
+
:returns: The cfg file contents for the project.
|
|
449
|
+
"""
|
|
450
|
+
# ./generate_project.sh project.xml "172.17.0.3:4:/opt/COMPSs:/tmp"
|
|
451
|
+
master = self._get_master()
|
|
452
|
+
proj_cmd = "/opt/COMPSs/Runtime/scripts/system/xmls/generate_project.sh"
|
|
453
|
+
master_ip = "127.0.0.1"
|
|
454
|
+
workers_ip = ips
|
|
455
|
+
proj_master = ":".join((master_ip, "0", install_dir, worker_dir))
|
|
456
|
+
proj_workers = " ".join(
|
|
457
|
+
["%s:%s:%s:%s" % (ip, cpus, install_dir, worker_dir) for ip in
|
|
458
|
+
workers_ip])
|
|
459
|
+
cmd = "%s /project.xml '%s' '%s'" % (proj_cmd, proj_master, proj_workers)
|
|
460
|
+
exit_code, output = master.exec_run(cmd=cmd)
|
|
461
|
+
if exit_code != 0:
|
|
462
|
+
print("Exit code: %s" % exit_code)
|
|
463
|
+
for line in [l for l in output.decode().split("\n")]:
|
|
464
|
+
print(line)
|
|
465
|
+
sys.exit(exit_code)
|
|
466
|
+
return proj_workers
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def _generate_resources_cfg(self, ips: list = (), cpus: int = 4) -> str:
|
|
470
|
+
""" Generates the resources.xml according to the given parameters.
|
|
471
|
+
|
|
472
|
+
:param ips: List of ip of the worker nodes.
|
|
473
|
+
:param cpus: Number of cores per worker node.
|
|
474
|
+
:returns: The cfg file contents for the resources.
|
|
475
|
+
"""
|
|
476
|
+
# ./generate_resources.sh resources.xml "172.17.0.3:4"
|
|
477
|
+
master = self._get_master()
|
|
478
|
+
res_cmd = "/opt/COMPSs/Runtime/scripts/system/xmls/generate_resources.sh"
|
|
479
|
+
res_arg = " ".join(["%s:%s" % (ip, cpus) for ip in ips])
|
|
480
|
+
cmd = "%s /resources.xml '%s'" % (res_cmd, res_arg)
|
|
481
|
+
exit_code, output = master.exec_run(cmd=cmd)
|
|
482
|
+
if exit_code != 0:
|
|
483
|
+
print("Exit code: %s" % exit_code)
|
|
484
|
+
for line in [l for l in output.decode().split("\n")]:
|
|
485
|
+
print(line)
|
|
486
|
+
sys.exit(exit_code)
|
|
487
|
+
return res_arg
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def _get_cfg(self, master) -> dict:
|
|
491
|
+
""" Retrieve the cfg file contents as dictionary.
|
|
492
|
+
|
|
493
|
+
:param master: Master docker instance object.
|
|
494
|
+
:returns: CFG file contents as dictionary.
|
|
495
|
+
"""
|
|
496
|
+
exit_code, output = master.exec_run(cmd="cat " + default_cfg)
|
|
497
|
+
json_str = output.decode()
|
|
498
|
+
cfg = json.loads(json_str)
|
|
499
|
+
return cfg
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def _remove_cfg(self, master) -> dict:
|
|
503
|
+
""" Remove the cfg file.
|
|
504
|
+
|
|
505
|
+
:param master: Master docker instance object.
|
|
506
|
+
:returns: None
|
|
507
|
+
"""
|
|
508
|
+
exit_code, output = master.exec_run(cmd="rm -f" + default_cfg)
|
|
509
|
+
# if exit_code != 0:
|
|
510
|
+
# for line in output:
|
|
511
|
+
# print(line.strip().decode())
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def _update_cfg(self, master, cfg: dict, ips, cpus) -> None:
|
|
515
|
+
""" Update the cfg file with the given parameters.
|
|
516
|
+
|
|
517
|
+
:param master: Master docker instance object.
|
|
518
|
+
:param ips: List of IP addresses of the worker nodes.
|
|
519
|
+
:param cpus: Number of cores per node.
|
|
520
|
+
:returns: None
|
|
521
|
+
"""
|
|
522
|
+
# Generate project.xml
|
|
523
|
+
new_proj_cfg = self._generate_project_cfg(ips=ips, cpus=cpus)
|
|
524
|
+
# Generate resources.xml
|
|
525
|
+
new_res_cfg = self._generate_resources_cfg(ips=ips, cpus=cpus)
|
|
526
|
+
# Update the cfg_content
|
|
527
|
+
cfg_content = '{"working_dir":"' + cfg['working_dir'] + \
|
|
528
|
+
'","resources":"' + new_res_cfg + \
|
|
529
|
+
'","project":"' + new_proj_cfg + '"}'
|
|
530
|
+
tmp_path, cfg_file = self._store_temp_cfg(cfg_content)
|
|
531
|
+
self._copy_file(cfg_file, default_cfg)
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
def _add_custom_worker(self, custom_cfg: str) -> None:
|
|
535
|
+
""" Add custom worker to the cfg file.
|
|
536
|
+
* custom_cfg = "ip:cpus"
|
|
537
|
+
|
|
538
|
+
:param custom_cfg: Existing custom cfg file.
|
|
539
|
+
:returns: None
|
|
540
|
+
"""
|
|
541
|
+
ip, cpus = custom_cfg.split(":")
|
|
542
|
+
master = self._get_master()
|
|
543
|
+
cfg = self._get_cfg(master)
|
|
544
|
+
# try to copy the master working dir to custom worker
|
|
545
|
+
os.system("scp -r %s %s:/tmp" % (cfg["working_dir"], ip))
|
|
546
|
+
ips = self._get_worker_ips()
|
|
547
|
+
ips.append(ip)
|
|
548
|
+
self._update_cfg(master, cfg, ips, cpus)
|
|
549
|
+
print("Connected worker %s\n\tCPUs: %s" % (ip, cpus))
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def _remove_custom_worker(self, custom_cfg: str) -> None:
|
|
553
|
+
""" Remove custom worker from the cfg file.
|
|
554
|
+
* custom_cfg = "ip:cpus"
|
|
555
|
+
|
|
556
|
+
:param custom_cfg: Existing custom cfg file.
|
|
557
|
+
:returns: None
|
|
558
|
+
"""
|
|
559
|
+
ip, cpus = custom_cfg.split(":")
|
|
560
|
+
master = self._get_master()
|
|
561
|
+
cfg = self._get_cfg(master)
|
|
562
|
+
# Find the worker with the given ip
|
|
563
|
+
workers = self._get_workers()
|
|
564
|
+
for w in workers:
|
|
565
|
+
w_ip = w.attrs["NetworkSettings"]["Networks"]["bridge"]["IPAddress"]
|
|
566
|
+
if w_ip == ip:
|
|
567
|
+
w.remove(force=True)
|
|
568
|
+
ips = self._get_worker_ips()
|
|
569
|
+
self._update_cfg(master, cfg, ips, cpus)
|
|
570
|
+
print("Removed worker %s" % (ip))
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def _add_workers(self, num_workers: int = 1,
|
|
574
|
+
user_working_dir: str = "",
|
|
575
|
+
cpus: int = 4) -> None:
|
|
576
|
+
""" Add COMPSs workers to the cfg file.
|
|
577
|
+
|
|
578
|
+
:param num_workers: Number of workers to e added.
|
|
579
|
+
:param user_working_dir: User working directory.
|
|
580
|
+
:returns: None
|
|
581
|
+
"""
|
|
582
|
+
master = self._get_master()
|
|
583
|
+
cfg = self._get_cfg(master)
|
|
584
|
+
mounts = self._get_mounts(user_working_dir=cfg["working_dir"])
|
|
585
|
+
for _ in range(num_workers):
|
|
586
|
+
worker_id = worker_name + "-" + uuid4().hex[:8]
|
|
587
|
+
self.client.containers.run(image=self._get_image_name(), name=worker_id,
|
|
588
|
+
mounts=mounts, detach=True, auto_remove=True)
|
|
589
|
+
ips = self._get_worker_ips()
|
|
590
|
+
self._update_cfg(master, cfg, ips, cpus)
|
|
591
|
+
print("Started %s worker/s\n\tWorking dir: %s\n\tCPUs: %s" %
|
|
592
|
+
(num_workers, user_working_dir, cpus))
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
def _remove_workers(self, num_workers: int = 1,
|
|
596
|
+
cpus: int = 4) -> None:
|
|
597
|
+
""" Removes COMPSs workers from the cfg file.
|
|
598
|
+
|
|
599
|
+
:param num_workers: Number of workers to e added.
|
|
600
|
+
:param cpus: Number of cores of the workers.
|
|
601
|
+
:returns: None
|
|
602
|
+
"""
|
|
603
|
+
master = self._get_master()
|
|
604
|
+
cfg = self._get_cfg(master)
|
|
605
|
+
workers = self._get_workers()
|
|
606
|
+
to_remove = workers[:num_workers]
|
|
607
|
+
for worker in to_remove:
|
|
608
|
+
worker.remove(force=True)
|
|
609
|
+
ips = self._get_worker_ips()
|
|
610
|
+
self._update_cfg(master, cfg, ips, cpus)
|
|
611
|
+
print("Removed " + str(num_workers) + " workers.")
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def _stop_by_name(self, name: str) -> None:
|
|
615
|
+
""" Stop a docker instance by name.
|
|
616
|
+
|
|
617
|
+
:param name: Name of the instance to be removed.
|
|
618
|
+
:returns: None
|
|
619
|
+
"""
|
|
620
|
+
containers = self.client.containers.list(filters={"name": name}, all=True)
|
|
621
|
+
for c in containers:
|
|
622
|
+
c.remove(force=True)
|
|
623
|
+
|
|
624
|
+
def docker_copy_to_host(self, container_name, src, dst):
|
|
625
|
+
""" Copy files from a container directory to a host directory.
|
|
626
|
+
|
|
627
|
+
:param container_name: Name of the container.
|
|
628
|
+
:param src: Source path.
|
|
629
|
+
:param dst: Destination path.
|
|
630
|
+
:returns: None
|
|
631
|
+
"""
|
|
632
|
+
container = self.client.containers.get(container_name)
|
|
633
|
+
data, stat = container.get_archive(src)
|
|
634
|
+
with open(dst, 'wb') as f:
|
|
635
|
+
for chunk in data:
|
|
636
|
+
f.write(chunk)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
# Copyright 2002-2025 Barcelona Supercomputing Center (www.bsc.es)
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
#
|