idmtools-platform-container 0.0.0.dev0__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docker_image/BASE_VERSION +1 -0
- docker_image/Dockerfile +48 -0
- docker_image/Dockerfile_buildenv +46 -0
- docker_image/ImageName +1 -0
- docker_image/README.md +78 -0
- docker_image/__init__.py +6 -0
- docker_image/build_docker_image.py +145 -0
- docker_image/debian/BASE_VERSION +1 -0
- docker_image/debian/Dockerfile +40 -0
- docker_image/debian/ImageName +1 -0
- docker_image/debian/README.md +48 -0
- docker_image/debian/pip.conf +3 -0
- docker_image/debian/requirements.txt +1 -0
- docker_image/docker_image_history.py +101 -0
- docker_image/pip.conf +3 -0
- docker_image/push_docker_image.py +62 -0
- docker_image/requirements.txt +1 -0
- docker_image/rocky_meta_runtime.txt +37 -0
- idmtools_platform_container/__init__.py +18 -8
- idmtools_platform_container/cli/__init__.py +5 -0
- idmtools_platform_container/cli/container.py +682 -0
- idmtools_platform_container/container_operations/__init__.py +5 -0
- idmtools_platform_container/container_operations/docker_operations.py +593 -0
- idmtools_platform_container/container_platform.py +375 -0
- idmtools_platform_container/platform_operations/__init__.py +5 -0
- idmtools_platform_container/platform_operations/experiment_operations.py +112 -0
- idmtools_platform_container/platform_operations/simulation_operations.py +58 -0
- idmtools_platform_container/plugin_info.py +79 -0
- idmtools_platform_container/utils/__init__.py +5 -0
- idmtools_platform_container/utils/general.py +136 -0
- idmtools_platform_container/utils/status.py +130 -0
- idmtools_platform_container-0.0.3.dist-info/METADATA +212 -0
- idmtools_platform_container-0.0.3.dist-info/RECORD +69 -0
- idmtools_platform_container-0.0.3.dist-info/entry_points.txt +5 -0
- idmtools_platform_container-0.0.3.dist-info/licenses/LICENSE.TXT +3 -0
- {idmtools_platform_container-0.0.0.dev0.dist-info → idmtools_platform_container-0.0.3.dist-info}/top_level.txt +2 -0
- tests/inputs/Assets/MyLib/functions.py +2 -0
- tests/inputs/__init__.py +0 -0
- tests/inputs/model.py +28 -0
- tests/inputs/model1.py +31 -0
- tests/inputs/model3.py +21 -0
- tests/inputs/model_file.py +18 -0
- tests/inputs/run.sh +1 -0
- tests/inputs/sleep.py +9 -0
- tests/test_container_cli/__init__.py +0 -0
- tests/test_container_cli/helper.py +57 -0
- tests/test_container_cli/test_base.py +14 -0
- tests/test_container_cli/test_cancel.py +96 -0
- tests/test_container_cli/test_clear_results.py +54 -0
- tests/test_container_cli/test_container.py +72 -0
- tests/test_container_cli/test_file_container_cli.py +121 -0
- tests/test_container_cli/test_get_detail.py +60 -0
- tests/test_container_cli/test_history.py +136 -0
- tests/test_container_cli/test_history_count.py +53 -0
- tests/test_container_cli/test_inspect.py +53 -0
- tests/test_container_cli/test_install.py +48 -0
- tests/test_container_cli/test_is_running.py +69 -0
- tests/test_container_cli/test_jobs.py +138 -0
- tests/test_container_cli/test_list_containers.py +99 -0
- tests/test_container_cli/test_packages.py +41 -0
- tests/test_container_cli/test_path.py +96 -0
- tests/test_container_cli/test_ps.py +47 -0
- tests/test_container_cli/test_remove_container.py +78 -0
- tests/test_container_cli/test_status.py +149 -0
- tests/test_container_cli/test_stop_container.py +71 -0
- tests/test_container_cli/test_sync_history.py +98 -0
- tests/test_container_cli/test_verify_docker.py +28 -0
- tests/test_container_cli/test_volume.py +28 -0
- idmtools_platform_container-0.0.0.dev0.dist-info/METADATA +0 -41
- idmtools_platform_container-0.0.0.dev0.dist-info/RECORD +0 -5
- {idmtools_platform_container-0.0.0.dev0.dist-info → idmtools_platform_container-0.0.3.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,593 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Here we implement the ContainerPlatform docker operations.
|
|
3
|
+
|
|
4
|
+
Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
|
|
5
|
+
"""
|
|
6
|
+
import docker
|
|
7
|
+
import platform as sys_platform
|
|
8
|
+
import subprocess
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import List, Dict, NoReturn, Any, Union
|
|
11
|
+
from idmtools.core import ItemType
|
|
12
|
+
from idmtools_platform_container.utils.general import normalize_path, parse_iso8601
|
|
13
|
+
from idmtools_platform_file.tools.job_history import JobHistory
|
|
14
|
+
from docker.models.containers import Container
|
|
15
|
+
from docker.errors import NotFound as ErrorNotFound, ImageNotFound, DockerException
|
|
16
|
+
from docker.errors import APIError as DockerAPIError
|
|
17
|
+
from logging import getLogger, DEBUG
|
|
18
|
+
|
|
19
|
+
logger = getLogger(__name__)
|
|
20
|
+
user_logger = getLogger('user')
|
|
21
|
+
|
|
22
|
+
# Only consider the containers that can be restarted
|
|
23
|
+
CONTAINER_STATUS = ['exited', 'running', 'paused']
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def validate_container_running(platform, **kwargs) -> str:
|
|
27
|
+
"""
|
|
28
|
+
Check if the docker daemon is running, find existing container or start a new container.
|
|
29
|
+
Args:
|
|
30
|
+
platform: Platform
|
|
31
|
+
kwargs: keyword arguments used to expand functionality
|
|
32
|
+
Returns:
|
|
33
|
+
container id
|
|
34
|
+
"""
|
|
35
|
+
# Check image exists
|
|
36
|
+
if not check_local_image(platform.docker_image):
|
|
37
|
+
user_logger.info(f"Image {platform.docker_image} does not exist, pull the image first.")
|
|
38
|
+
succeeded = pull_docker_image(platform.docker_image)
|
|
39
|
+
if not succeeded:
|
|
40
|
+
user_logger.error(f"/!\\ ERROR: Failed to pull image {platform.docker_image}.")
|
|
41
|
+
exit(-1)
|
|
42
|
+
|
|
43
|
+
# User configuration
|
|
44
|
+
if logger.isEnabledFor(DEBUG):
|
|
45
|
+
logger.debug(f"User config: force_start={platform.force_start}")
|
|
46
|
+
logger.debug(f"User config: new_container={platform.new_container}")
|
|
47
|
+
logger.debug(f"User config: include_stopped={platform.include_stopped}")
|
|
48
|
+
|
|
49
|
+
# Check containers
|
|
50
|
+
container_id = None
|
|
51
|
+
container_match = platform.retrieve_match_containers()
|
|
52
|
+
container_running = [container for status, container in container_match if status == 'running']
|
|
53
|
+
container_stopped = [container for status, container in container_match if status != 'running']
|
|
54
|
+
|
|
55
|
+
if logger.isEnabledFor(DEBUG):
|
|
56
|
+
logger.debug(f"Found running matched containers: {container_running}")
|
|
57
|
+
if platform.include_stopped:
|
|
58
|
+
logger.debug(f"Found stopped matched containers: {container_stopped}")
|
|
59
|
+
|
|
60
|
+
if platform.force_start:
|
|
61
|
+
if logger.isEnabledFor(DEBUG) and len(container_running) > 0:
|
|
62
|
+
logger.debug(f"Stop all running containers {container_running}")
|
|
63
|
+
stop_all_containers(container_running, keep_running=False)
|
|
64
|
+
container_running = []
|
|
65
|
+
|
|
66
|
+
if logger.isEnabledFor(DEBUG) and len(container_stopped) > 0 and platform.include_stopped:
|
|
67
|
+
logger.debug(f"Stop all stopped containers {container_stopped}")
|
|
68
|
+
stop_all_containers(container_stopped, keep_running=False)
|
|
69
|
+
container_stopped = []
|
|
70
|
+
|
|
71
|
+
if not platform.new_container and platform.container_prefix is None:
|
|
72
|
+
if len(container_running) > 0:
|
|
73
|
+
# Pick up the first running container
|
|
74
|
+
container_running = sort_containers_by_start(container_running)
|
|
75
|
+
container_id = container_running[0].short_id
|
|
76
|
+
container = get_container(container_id)
|
|
77
|
+
if sys_platform.system() not in ["Windows"]:
|
|
78
|
+
command = f"bash -c '[ \"$(ls -lart {platform.data_mount} | wc -l)\" -ge 3 ] && echo exists || echo not_exists'"
|
|
79
|
+
result = container.exec_run(command)
|
|
80
|
+
output = result.output.decode().strip()
|
|
81
|
+
if output == "not_exists":
|
|
82
|
+
stop_container(container_id, remove=True)
|
|
83
|
+
if logger.isEnabledFor(DEBUG):
|
|
84
|
+
logger.debug(f"Existing container {container_id} is not usable")
|
|
85
|
+
container_id = None
|
|
86
|
+
|
|
87
|
+
if container_id is not None:
|
|
88
|
+
if logger.isEnabledFor(DEBUG):
|
|
89
|
+
logger.debug(f"Pick running container {container_id}.")
|
|
90
|
+
elif len(container_stopped) > 0:
|
|
91
|
+
# Pick up the first stopped container and then restart it
|
|
92
|
+
container_stopped = sort_containers_by_start(container_stopped)
|
|
93
|
+
container = container_stopped[0]
|
|
94
|
+
container.restart()
|
|
95
|
+
container_id = container.short_id
|
|
96
|
+
if logger.isEnabledFor(DEBUG):
|
|
97
|
+
logger.debug(f"Pick and restart the stopped container {container.short_id}.")
|
|
98
|
+
|
|
99
|
+
# Start the container
|
|
100
|
+
if container_id is None:
|
|
101
|
+
container_id = platform.start_container(**kwargs)
|
|
102
|
+
if logger.isEnabledFor(DEBUG):
|
|
103
|
+
logger.debug(f"Start container: {platform.docker_image}")
|
|
104
|
+
logger.debug(f"New container ID: {container_id}")
|
|
105
|
+
|
|
106
|
+
return container_id
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
#############################
|
|
110
|
+
# Check containers
|
|
111
|
+
#############################
|
|
112
|
+
|
|
113
|
+
def get_container(container_id) -> Any:
|
|
114
|
+
"""
|
|
115
|
+
Get the container object by container ID.
|
|
116
|
+
Args:
|
|
117
|
+
container_id: container id
|
|
118
|
+
Returns:
|
|
119
|
+
container object
|
|
120
|
+
"""
|
|
121
|
+
client = docker.from_env()
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
# Retrieve the container
|
|
125
|
+
container = client.containers.get(container_id)
|
|
126
|
+
return container
|
|
127
|
+
except ErrorNotFound:
|
|
128
|
+
logger.debug(f"Container with ID {container_id} not found.")
|
|
129
|
+
return None
|
|
130
|
+
except DockerAPIError as e:
|
|
131
|
+
logger.debug(f"Error retrieving container with ID {container_id}: {str(e)}")
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def find_container_by_image(image: str, include_stopped: bool = False) -> Dict:
|
|
136
|
+
"""
|
|
137
|
+
Find the containers that match the image.
|
|
138
|
+
Args:
|
|
139
|
+
image: docker image
|
|
140
|
+
include_stopped: bool, if consider the stopped containers or not
|
|
141
|
+
Returns:
|
|
142
|
+
dict of containers
|
|
143
|
+
"""
|
|
144
|
+
container_found = {}
|
|
145
|
+
for status, container_list in get_containers(include_stopped).items():
|
|
146
|
+
container_found[status] = [container for container in container_list if
|
|
147
|
+
image == container.attrs['Config']['Image']]
|
|
148
|
+
|
|
149
|
+
return container_found
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def stop_container(container: Union[str, Container], remove: bool = True) -> NoReturn:
|
|
153
|
+
"""
|
|
154
|
+
Stop a container.
|
|
155
|
+
Args:
|
|
156
|
+
container: container id or container object to be stopped
|
|
157
|
+
remove: bool, if remove the container or not
|
|
158
|
+
Returns:
|
|
159
|
+
No return
|
|
160
|
+
"""
|
|
161
|
+
try:
|
|
162
|
+
if isinstance(container, str):
|
|
163
|
+
container = get_container(container)
|
|
164
|
+
elif not isinstance(container, Container):
|
|
165
|
+
raise TypeError("Invalid container object.")
|
|
166
|
+
|
|
167
|
+
# Stop the container
|
|
168
|
+
if container.status == 'running':
|
|
169
|
+
container.stop()
|
|
170
|
+
if logger.isEnabledFor(DEBUG):
|
|
171
|
+
logger.debug(f"Container {str(container)} has been stopped.")
|
|
172
|
+
|
|
173
|
+
if remove:
|
|
174
|
+
container.remove()
|
|
175
|
+
if logger.isEnabledFor(DEBUG):
|
|
176
|
+
logger.debug(f"Container {str(container)} has been removed.")
|
|
177
|
+
except ErrorNotFound:
|
|
178
|
+
if isinstance(container, str):
|
|
179
|
+
logger.debug(f"Container with ID {container} not found.")
|
|
180
|
+
else:
|
|
181
|
+
logger.debug(f"Container {container.short_id} not found.")
|
|
182
|
+
exit(-1)
|
|
183
|
+
except DockerAPIError as e:
|
|
184
|
+
logger.debug(f"Error stopping container {str(container)}: {str(e)}")
|
|
185
|
+
exit(-1)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def stop_all_containers(containers: List[Union[str, Container]], keep_running: bool = True,
|
|
189
|
+
remove: bool = True) -> NoReturn:
|
|
190
|
+
"""
|
|
191
|
+
Stop all containers.
|
|
192
|
+
Args:
|
|
193
|
+
containers: list of container id or containers to be stopped
|
|
194
|
+
keep_running: bool, if keep the running containers or not
|
|
195
|
+
remove: bool, if remove the container or not
|
|
196
|
+
Returns:
|
|
197
|
+
No return
|
|
198
|
+
"""
|
|
199
|
+
for container in containers:
|
|
200
|
+
if container.status == 'running' and keep_running:
|
|
201
|
+
jobs = list_running_jobs(container.short_id)
|
|
202
|
+
if jobs:
|
|
203
|
+
continue
|
|
204
|
+
stop_container(container, remove=remove)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def restart_container(container: Union[str, Container]) -> NoReturn:
|
|
208
|
+
"""
|
|
209
|
+
Restart a container.
|
|
210
|
+
Args:
|
|
211
|
+
container: container id or container object to be restarted
|
|
212
|
+
Returns:
|
|
213
|
+
No return
|
|
214
|
+
"""
|
|
215
|
+
try:
|
|
216
|
+
if isinstance(container, str):
|
|
217
|
+
container = get_container(container)
|
|
218
|
+
elif not isinstance(container, Container):
|
|
219
|
+
raise TypeError("Invalid container object.")
|
|
220
|
+
|
|
221
|
+
if container is None:
|
|
222
|
+
user_logger.error(f"Container {container} not found.")
|
|
223
|
+
exit(-1)
|
|
224
|
+
|
|
225
|
+
# Restart the container
|
|
226
|
+
container.restart()
|
|
227
|
+
if logger.isEnabledFor(DEBUG):
|
|
228
|
+
logger.debug(f"Container {container.short_id} has been restarted.")
|
|
229
|
+
except DockerAPIError as e:
|
|
230
|
+
user_logger.error(f"Error restarting container {container.short_id}: {str(e)}")
|
|
231
|
+
exit(-1)
|
|
232
|
+
except Exception as e:
|
|
233
|
+
user_logger.error(f"Restarting container {container.short_id} encounters an unexpected error: {e}")
|
|
234
|
+
exit(-1)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def sort_containers_by_start(containers: List[Container], reverse: bool = True) -> List[Container]:
|
|
238
|
+
"""
|
|
239
|
+
Sort the containers by the start time.
|
|
240
|
+
Args:
|
|
241
|
+
containers: list of containers
|
|
242
|
+
reverse: bool, if sort in reverse order
|
|
243
|
+
Returns:
|
|
244
|
+
sorted list of containers
|
|
245
|
+
"""
|
|
246
|
+
# Sort containers by 'StartedAt' in descending order
|
|
247
|
+
sorted_container_list = sorted(
|
|
248
|
+
containers,
|
|
249
|
+
key=lambda container: parse_iso8601(container.attrs['State']['StartedAt']),
|
|
250
|
+
reverse=reverse
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
return sorted_container_list
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def get_containers(include_stopped: bool = False) -> Dict:
|
|
257
|
+
"""
|
|
258
|
+
Find the containers that match the image.
|
|
259
|
+
Args:
|
|
260
|
+
include_stopped: bool, if consider the stopped containers or not
|
|
261
|
+
Returns:
|
|
262
|
+
dict of containers
|
|
263
|
+
"""
|
|
264
|
+
client = docker.from_env()
|
|
265
|
+
container_found = {}
|
|
266
|
+
# Get all containers
|
|
267
|
+
all_containers = client.containers.list(all=include_stopped)
|
|
268
|
+
# Filter the containers
|
|
269
|
+
all_containers = [ct for ct in all_containers if
|
|
270
|
+
ct.status in CONTAINER_STATUS and JobHistory.verify_container(ct.short_id)]
|
|
271
|
+
# Separate the containers
|
|
272
|
+
container_found['running'] = [ct for ct in all_containers if ct.status == 'running']
|
|
273
|
+
container_found['stopped'] = [ct for ct in all_containers if ct.status != 'running']
|
|
274
|
+
|
|
275
|
+
return container_found
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def get_working_containers(container_id: str = None, entity: bool = False) -> List[Any]:
|
|
279
|
+
"""
|
|
280
|
+
Get the working containers.
|
|
281
|
+
Args:
|
|
282
|
+
container_id: Container ID
|
|
283
|
+
entity: bool, if return the container object or container id
|
|
284
|
+
Returns:
|
|
285
|
+
list of working containers or IDs
|
|
286
|
+
"""
|
|
287
|
+
if container_id is None:
|
|
288
|
+
if entity:
|
|
289
|
+
containers = get_containers().get('running', [])
|
|
290
|
+
else:
|
|
291
|
+
containers = [c.short_id for c in get_containers().get('running', [])]
|
|
292
|
+
else:
|
|
293
|
+
# Check if the container is in the history and running
|
|
294
|
+
if not JobHistory.verify_container(container_id):
|
|
295
|
+
# The container is not in the history.
|
|
296
|
+
logger.error(f"Container {container_id} not found in History.")
|
|
297
|
+
containers = []
|
|
298
|
+
else:
|
|
299
|
+
# The container is in the history, we need to verify if it still exists.
|
|
300
|
+
container = get_container(container_id)
|
|
301
|
+
if container:
|
|
302
|
+
# We only consider the running container
|
|
303
|
+
if container.status == 'running':
|
|
304
|
+
containers = [container] if entity else [container.short_id]
|
|
305
|
+
else:
|
|
306
|
+
logger.warning(f"Container {container_id} is not running.")
|
|
307
|
+
containers = []
|
|
308
|
+
else:
|
|
309
|
+
logger.warning(f"Container {container_id} not found.")
|
|
310
|
+
containers = []
|
|
311
|
+
|
|
312
|
+
return containers
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
#############################
|
|
316
|
+
# Check docker
|
|
317
|
+
#############################
|
|
318
|
+
|
|
319
|
+
def is_docker_installed() -> bool:
|
|
320
|
+
"""
|
|
321
|
+
Check if Docker is installed.
|
|
322
|
+
Returns:
|
|
323
|
+
True/False
|
|
324
|
+
"""
|
|
325
|
+
try:
|
|
326
|
+
# Run the 'docker --version' command
|
|
327
|
+
result = subprocess.run(['docker', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
328
|
+
# Check the return code to see if it executed successfully
|
|
329
|
+
if result.returncode == 0:
|
|
330
|
+
if logger.isEnabledFor(DEBUG):
|
|
331
|
+
logger.debug(f"Docker is installed: {result.stdout.strip()}")
|
|
332
|
+
return True
|
|
333
|
+
else:
|
|
334
|
+
if logger.isEnabledFor(DEBUG):
|
|
335
|
+
logger.debug(f"Docker is not installed. Error: {result.stderr.strip()}")
|
|
336
|
+
return False
|
|
337
|
+
except FileNotFoundError:
|
|
338
|
+
# If the docker executable is not found, it means Docker is not installed
|
|
339
|
+
if logger.isEnabledFor(DEBUG):
|
|
340
|
+
logger.debug("Docker is not installed or not found in PATH.")
|
|
341
|
+
return False
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def is_docker_daemon_running() -> bool:
|
|
345
|
+
"""
|
|
346
|
+
Check if the Docker daemon is running.
|
|
347
|
+
Returns:
|
|
348
|
+
True/False
|
|
349
|
+
"""
|
|
350
|
+
try:
|
|
351
|
+
client = docker.from_env()
|
|
352
|
+
client.ping()
|
|
353
|
+
if logger.isEnabledFor(DEBUG):
|
|
354
|
+
logger.debug("Docker daemon is running.")
|
|
355
|
+
return True
|
|
356
|
+
except DockerAPIError as e:
|
|
357
|
+
if logger.isEnabledFor(DEBUG):
|
|
358
|
+
logger.debug(f"Docker daemon is not running: {e}")
|
|
359
|
+
return False
|
|
360
|
+
except Exception as ex:
|
|
361
|
+
if logger.isEnabledFor(DEBUG):
|
|
362
|
+
logger.debug(f"Error checking Docker daemon: {ex}")
|
|
363
|
+
return False
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
#############################
|
|
367
|
+
# Check images
|
|
368
|
+
#############################
|
|
369
|
+
|
|
370
|
+
def check_local_image(image_name: str) -> bool:
|
|
371
|
+
"""
|
|
372
|
+
Check if the image exists locally.
|
|
373
|
+
Args:
|
|
374
|
+
image_name: image name
|
|
375
|
+
Returns:
|
|
376
|
+
True/False
|
|
377
|
+
"""
|
|
378
|
+
try:
|
|
379
|
+
client = docker.from_env()
|
|
380
|
+
# Add ':latest' if no tag provided
|
|
381
|
+
if ":" not in image_name:
|
|
382
|
+
image_name = f"{image_name}:latest"
|
|
383
|
+
client.images.get(image_name)
|
|
384
|
+
return True
|
|
385
|
+
except ImageNotFound:
|
|
386
|
+
return False
|
|
387
|
+
except DockerException as ex:
|
|
388
|
+
if logger.isEnabledFor(DEBUG):
|
|
389
|
+
logger.debug(f"Error checking Docker daemon: {ex}")
|
|
390
|
+
return False
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def pull_docker_image(image_name, tag='latest') -> bool:
|
|
394
|
+
"""
|
|
395
|
+
Pull a docker image from IDM artifactory.
|
|
396
|
+
Args:
|
|
397
|
+
image_name: image name
|
|
398
|
+
tag: image tag
|
|
399
|
+
Returns:
|
|
400
|
+
True/False
|
|
401
|
+
"""
|
|
402
|
+
# Check if the image name contains the tag
|
|
403
|
+
if ':' in image_name:
|
|
404
|
+
full_image_name = image_name
|
|
405
|
+
else:
|
|
406
|
+
full_image_name = f'{image_name}:{tag}'
|
|
407
|
+
|
|
408
|
+
# Pull the image
|
|
409
|
+
user_logger.info(f'Pulling image {full_image_name} ...')
|
|
410
|
+
try:
|
|
411
|
+
client = docker.from_env()
|
|
412
|
+
client.images.pull(f'{full_image_name}')
|
|
413
|
+
if logger.isEnabledFor(DEBUG):
|
|
414
|
+
logger.debug(f'Successfully pulled {full_image_name}')
|
|
415
|
+
return True
|
|
416
|
+
except DockerAPIError as e:
|
|
417
|
+
if logger.isEnabledFor(DEBUG):
|
|
418
|
+
logger.debug(f'Error pulling {full_image_name}: {e}')
|
|
419
|
+
return False
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
#############################
|
|
423
|
+
# Check binding/mounting
|
|
424
|
+
#############################
|
|
425
|
+
def compare_mounts(mounts1: List[Dict], mounts2: List[Dict]) -> bool:
|
|
426
|
+
"""
|
|
427
|
+
Compare two sets of mount configurations.
|
|
428
|
+
Args:
|
|
429
|
+
mounts1: container mounting configurations
|
|
430
|
+
mounts2: container mounting configurations
|
|
431
|
+
Returns:
|
|
432
|
+
True/False
|
|
433
|
+
"""
|
|
434
|
+
# Convert mount configurations to a set of tuples for easy comparison
|
|
435
|
+
mounts_set1 = set(
|
|
436
|
+
(mount['Type'], mount['Mode'], normalize_path(mount['Source']), normalize_path(mount['Destination'])) for
|
|
437
|
+
mount in mounts1
|
|
438
|
+
)
|
|
439
|
+
mounts_set2 = set(
|
|
440
|
+
(mount['Type'], mount['Mode'], normalize_path(mount['Source']), normalize_path(mount['Destination'])) for
|
|
441
|
+
mount in mounts2
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
return mounts_set1 == mounts_set2
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def compare_container_mount(container1: Union[str, Container], container2: Union[str, Container]) -> bool:
|
|
448
|
+
"""
|
|
449
|
+
Compare the mount configurations of two containers.
|
|
450
|
+
Args:
|
|
451
|
+
container1: container object or id
|
|
452
|
+
container2: container object or id
|
|
453
|
+
Returns:
|
|
454
|
+
True/False
|
|
455
|
+
"""
|
|
456
|
+
# Get the container objects
|
|
457
|
+
if isinstance(container1, str):
|
|
458
|
+
container1 = get_container(container1)
|
|
459
|
+
|
|
460
|
+
if isinstance(container2, str):
|
|
461
|
+
container2 = get_container(container2)
|
|
462
|
+
|
|
463
|
+
# Get the mount configurations
|
|
464
|
+
mounts1 = container1.attrs['Mounts']
|
|
465
|
+
mounts2 = container2.attrs['Mounts']
|
|
466
|
+
|
|
467
|
+
return compare_mounts(mounts1, mounts2)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
#############################
|
|
471
|
+
# Check jobs
|
|
472
|
+
#############################
|
|
473
|
+
|
|
474
|
+
PS_QUERY = 'ps xao pid,ppid,pgid,etime,cmd | head -n 1 && ps xao pid,ppid,pgid,etime,cmd | grep -e EXPERIMENT -e SIMULATION | grep -v grep'
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
@dataclass(repr=False)
|
|
478
|
+
class Job:
|
|
479
|
+
"""Running Job."""
|
|
480
|
+
item_id: str = field(init=True)
|
|
481
|
+
item_type: ItemType = field(init=True)
|
|
482
|
+
job_id: int = field(init=True)
|
|
483
|
+
group_pid: int = field(init=True)
|
|
484
|
+
container_id: str = field(init=True)
|
|
485
|
+
elapsed: str = field(init=True)
|
|
486
|
+
parent_pid: int = field(default=None, init=True)
|
|
487
|
+
|
|
488
|
+
def display(self):
|
|
489
|
+
"""Display Job for debugging usage."""
|
|
490
|
+
user_logger.info(f"Item ID: {self.item_id:15}")
|
|
491
|
+
user_logger.info(f"Item Type: {self.item_type:15}")
|
|
492
|
+
user_logger.info(f"Job ID: {self.job_id:15}")
|
|
493
|
+
user_logger.info(f"Group PID: {self.group_pid:15}")
|
|
494
|
+
user_logger.info(f"Container ID: {self.container_id:15}")
|
|
495
|
+
user_logger.info(f"Elapsed: {self.elapsed:15}")
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def list_running_jobs(container_id: str, limit: int = None) -> List[Job]:
|
|
499
|
+
"""
|
|
500
|
+
List all running jobs on the container.
|
|
501
|
+
Args:
|
|
502
|
+
container_id: Container ID
|
|
503
|
+
limit: number of jobs to view
|
|
504
|
+
Returns:
|
|
505
|
+
list of running jobs
|
|
506
|
+
"""
|
|
507
|
+
command = f'docker exec {container_id} bash -c "({PS_QUERY})"'
|
|
508
|
+
result = subprocess.run(command, shell=True, check=False, capture_output=True, text=True)
|
|
509
|
+
|
|
510
|
+
running_jobs = []
|
|
511
|
+
if result.returncode == 0:
|
|
512
|
+
processes = result.stdout.splitlines()
|
|
513
|
+
header = processes[0].split() # Extract the header (column names)
|
|
514
|
+
for line in processes[1:]: # Skip the first header line
|
|
515
|
+
if 'EXPERIMENT' in line or 'SIMULATION' in line:
|
|
516
|
+
# Split the line into columns
|
|
517
|
+
columns = line.split(maxsplit=len(header) - 1)
|
|
518
|
+
# Convert columns to their respective types
|
|
519
|
+
pid = int(columns[0]) # pid is an integer
|
|
520
|
+
ppid = int(columns[1]) # ppid is an integer
|
|
521
|
+
pgid = int(columns[2]) # pgid is an integer
|
|
522
|
+
etime = columns[3] # etime is a string
|
|
523
|
+
cmd = columns[4] # cmd is a string
|
|
524
|
+
|
|
525
|
+
# Determine the item type and job ID
|
|
526
|
+
item_type = ItemType.EXPERIMENT if 'EXPERIMENT' in cmd else ItemType.SIMULATION
|
|
527
|
+
job_id = pgid if 'EXPERIMENT' in cmd else pid
|
|
528
|
+
|
|
529
|
+
# Find the item that starts with 'EXPERIMENT' or 'SIMULATION'
|
|
530
|
+
columns = cmd.split()
|
|
531
|
+
result = [item for item in columns if item.startswith('EXPERIMENT') or item.startswith('SIMULATION')]
|
|
532
|
+
item_id = result[0].split(':')[1]
|
|
533
|
+
|
|
534
|
+
# Create a new job
|
|
535
|
+
job = Job(item_id=item_id, item_type=item_type, job_id=job_id, group_pid=pgid, parent_pid=ppid,
|
|
536
|
+
container_id=container_id, elapsed=etime)
|
|
537
|
+
running_jobs.append(job)
|
|
538
|
+
elif result.returncode == 1:
|
|
539
|
+
pass
|
|
540
|
+
else:
|
|
541
|
+
logger.error(result.stderr)
|
|
542
|
+
user_logger.error(f"Command failed with return code {result.returncode}")
|
|
543
|
+
exit(-1)
|
|
544
|
+
|
|
545
|
+
if limit:
|
|
546
|
+
running_jobs = running_jobs[:limit]
|
|
547
|
+
return running_jobs[:limit]
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def find_running_job(item_id: Union[int, str], container_id: str = None) -> Job:
|
|
551
|
+
"""
|
|
552
|
+
Check item running on container.
|
|
553
|
+
Args:
|
|
554
|
+
item_id: Experiment/Simulation ID or Running Job ID
|
|
555
|
+
container_id: Container ID
|
|
556
|
+
Returns:
|
|
557
|
+
running Job
|
|
558
|
+
"""
|
|
559
|
+
if container_id:
|
|
560
|
+
containers = [container_id]
|
|
561
|
+
else:
|
|
562
|
+
# Check if the item is an Experiment ID
|
|
563
|
+
his_job = JobHistory.get_job(item_id)
|
|
564
|
+
if his_job:
|
|
565
|
+
# item_id is an Experiment ID
|
|
566
|
+
containers = [his_job['CONTAINER']]
|
|
567
|
+
else:
|
|
568
|
+
# item_id is a Simulation ID or Job ID, we need to check all working containers
|
|
569
|
+
containers = get_working_containers()
|
|
570
|
+
|
|
571
|
+
match_jobs = []
|
|
572
|
+
for cid in containers:
|
|
573
|
+
# List all running jobs on the container
|
|
574
|
+
jobs = list_running_jobs(cid)
|
|
575
|
+
if len(jobs) == 0:
|
|
576
|
+
continue
|
|
577
|
+
|
|
578
|
+
# Container has running jobs
|
|
579
|
+
for job in jobs:
|
|
580
|
+
# Check if the job is the one we are looking for
|
|
581
|
+
if job.item_id == item_id or str(job.job_id) == str(item_id):
|
|
582
|
+
match_jobs.append(job)
|
|
583
|
+
break # One running container can't have multiple matches!
|
|
584
|
+
|
|
585
|
+
if len(match_jobs) > 1:
|
|
586
|
+
# item_id must be a Job ID in this case and container_id must be None!
|
|
587
|
+
user_logger.error(
|
|
588
|
+
f"Multiple jobs found for Job ID {item_id}, please provide the Container ID or use Entity ID instead.")
|
|
589
|
+
exit(-1)
|
|
590
|
+
elif len(match_jobs) == 1:
|
|
591
|
+
return match_jobs[0]
|
|
592
|
+
else:
|
|
593
|
+
return None
|