idmtools-platform-container 0.0.0.dev0__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. docker_image/BASE_VERSION +1 -0
  2. docker_image/Dockerfile +48 -0
  3. docker_image/Dockerfile_buildenv +46 -0
  4. docker_image/ImageName +1 -0
  5. docker_image/README.md +78 -0
  6. docker_image/__init__.py +6 -0
  7. docker_image/build_docker_image.py +145 -0
  8. docker_image/debian/BASE_VERSION +1 -0
  9. docker_image/debian/Dockerfile +40 -0
  10. docker_image/debian/ImageName +1 -0
  11. docker_image/debian/README.md +48 -0
  12. docker_image/debian/pip.conf +3 -0
  13. docker_image/debian/requirements.txt +1 -0
  14. docker_image/docker_image_history.py +101 -0
  15. docker_image/pip.conf +3 -0
  16. docker_image/push_docker_image.py +62 -0
  17. docker_image/requirements.txt +1 -0
  18. docker_image/rocky_meta_runtime.txt +37 -0
  19. idmtools_platform_container/__init__.py +18 -8
  20. idmtools_platform_container/cli/__init__.py +5 -0
  21. idmtools_platform_container/cli/container.py +682 -0
  22. idmtools_platform_container/container_operations/__init__.py +5 -0
  23. idmtools_platform_container/container_operations/docker_operations.py +593 -0
  24. idmtools_platform_container/container_platform.py +375 -0
  25. idmtools_platform_container/platform_operations/__init__.py +5 -0
  26. idmtools_platform_container/platform_operations/experiment_operations.py +112 -0
  27. idmtools_platform_container/platform_operations/simulation_operations.py +58 -0
  28. idmtools_platform_container/plugin_info.py +79 -0
  29. idmtools_platform_container/utils/__init__.py +5 -0
  30. idmtools_platform_container/utils/general.py +136 -0
  31. idmtools_platform_container/utils/status.py +130 -0
  32. idmtools_platform_container-0.0.3.dist-info/METADATA +212 -0
  33. idmtools_platform_container-0.0.3.dist-info/RECORD +69 -0
  34. idmtools_platform_container-0.0.3.dist-info/entry_points.txt +5 -0
  35. idmtools_platform_container-0.0.3.dist-info/licenses/LICENSE.TXT +3 -0
  36. {idmtools_platform_container-0.0.0.dev0.dist-info → idmtools_platform_container-0.0.3.dist-info}/top_level.txt +2 -0
  37. tests/inputs/Assets/MyLib/functions.py +2 -0
  38. tests/inputs/__init__.py +0 -0
  39. tests/inputs/model.py +28 -0
  40. tests/inputs/model1.py +31 -0
  41. tests/inputs/model3.py +21 -0
  42. tests/inputs/model_file.py +18 -0
  43. tests/inputs/run.sh +1 -0
  44. tests/inputs/sleep.py +9 -0
  45. tests/test_container_cli/__init__.py +0 -0
  46. tests/test_container_cli/helper.py +57 -0
  47. tests/test_container_cli/test_base.py +14 -0
  48. tests/test_container_cli/test_cancel.py +96 -0
  49. tests/test_container_cli/test_clear_results.py +54 -0
  50. tests/test_container_cli/test_container.py +72 -0
  51. tests/test_container_cli/test_file_container_cli.py +121 -0
  52. tests/test_container_cli/test_get_detail.py +60 -0
  53. tests/test_container_cli/test_history.py +136 -0
  54. tests/test_container_cli/test_history_count.py +53 -0
  55. tests/test_container_cli/test_inspect.py +53 -0
  56. tests/test_container_cli/test_install.py +48 -0
  57. tests/test_container_cli/test_is_running.py +69 -0
  58. tests/test_container_cli/test_jobs.py +138 -0
  59. tests/test_container_cli/test_list_containers.py +99 -0
  60. tests/test_container_cli/test_packages.py +41 -0
  61. tests/test_container_cli/test_path.py +96 -0
  62. tests/test_container_cli/test_ps.py +47 -0
  63. tests/test_container_cli/test_remove_container.py +78 -0
  64. tests/test_container_cli/test_status.py +149 -0
  65. tests/test_container_cli/test_stop_container.py +71 -0
  66. tests/test_container_cli/test_sync_history.py +98 -0
  67. tests/test_container_cli/test_verify_docker.py +28 -0
  68. tests/test_container_cli/test_volume.py +28 -0
  69. idmtools_platform_container-0.0.0.dev0.dist-info/METADATA +0 -41
  70. idmtools_platform_container-0.0.0.dev0.dist-info/RECORD +0 -5
  71. {idmtools_platform_container-0.0.0.dev0.dist-info → idmtools_platform_container-0.0.3.dist-info}/WHEEL +0 -0
@@ -0,0 +1,593 @@
1
+ """
2
+ Here we implement the ContainerPlatform docker operations.
3
+
4
+ Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
5
+ """
6
+ import docker
7
+ import platform as sys_platform
8
+ import subprocess
9
+ from dataclasses import dataclass, field
10
+ from typing import List, Dict, NoReturn, Any, Union
11
+ from idmtools.core import ItemType
12
+ from idmtools_platform_container.utils.general import normalize_path, parse_iso8601
13
+ from idmtools_platform_file.tools.job_history import JobHistory
14
+ from docker.models.containers import Container
15
+ from docker.errors import NotFound as ErrorNotFound, ImageNotFound, DockerException
16
+ from docker.errors import APIError as DockerAPIError
17
+ from logging import getLogger, DEBUG
18
+
19
+ logger = getLogger(__name__)
20
+ user_logger = getLogger('user')
21
+
22
+ # Only consider the containers that can be restarted
23
+ CONTAINER_STATUS = ['exited', 'running', 'paused']
24
+
25
+
26
+ def validate_container_running(platform, **kwargs) -> str:
27
+ """
28
+ Check if the docker daemon is running, find existing container or start a new container.
29
+ Args:
30
+ platform: Platform
31
+ kwargs: keyword arguments used to expand functionality
32
+ Returns:
33
+ container id
34
+ """
35
+ # Check image exists
36
+ if not check_local_image(platform.docker_image):
37
+ user_logger.info(f"Image {platform.docker_image} does not exist, pull the image first.")
38
+ succeeded = pull_docker_image(platform.docker_image)
39
+ if not succeeded:
40
+ user_logger.error(f"/!\\ ERROR: Failed to pull image {platform.docker_image}.")
41
+ exit(-1)
42
+
43
+ # User configuration
44
+ if logger.isEnabledFor(DEBUG):
45
+ logger.debug(f"User config: force_start={platform.force_start}")
46
+ logger.debug(f"User config: new_container={platform.new_container}")
47
+ logger.debug(f"User config: include_stopped={platform.include_stopped}")
48
+
49
+ # Check containers
50
+ container_id = None
51
+ container_match = platform.retrieve_match_containers()
52
+ container_running = [container for status, container in container_match if status == 'running']
53
+ container_stopped = [container for status, container in container_match if status != 'running']
54
+
55
+ if logger.isEnabledFor(DEBUG):
56
+ logger.debug(f"Found running matched containers: {container_running}")
57
+ if platform.include_stopped:
58
+ logger.debug(f"Found stopped matched containers: {container_stopped}")
59
+
60
+ if platform.force_start:
61
+ if logger.isEnabledFor(DEBUG) and len(container_running) > 0:
62
+ logger.debug(f"Stop all running containers {container_running}")
63
+ stop_all_containers(container_running, keep_running=False)
64
+ container_running = []
65
+
66
+ if logger.isEnabledFor(DEBUG) and len(container_stopped) > 0 and platform.include_stopped:
67
+ logger.debug(f"Stop all stopped containers {container_stopped}")
68
+ stop_all_containers(container_stopped, keep_running=False)
69
+ container_stopped = []
70
+
71
+ if not platform.new_container and platform.container_prefix is None:
72
+ if len(container_running) > 0:
73
+ # Pick up the first running container
74
+ container_running = sort_containers_by_start(container_running)
75
+ container_id = container_running[0].short_id
76
+ container = get_container(container_id)
77
+ if sys_platform.system() not in ["Windows"]:
78
+ command = f"bash -c '[ \"$(ls -lart {platform.data_mount} | wc -l)\" -ge 3 ] && echo exists || echo not_exists'"
79
+ result = container.exec_run(command)
80
+ output = result.output.decode().strip()
81
+ if output == "not_exists":
82
+ stop_container(container_id, remove=True)
83
+ if logger.isEnabledFor(DEBUG):
84
+ logger.debug(f"Existing container {container_id} is not usable")
85
+ container_id = None
86
+
87
+ if container_id is not None:
88
+ if logger.isEnabledFor(DEBUG):
89
+ logger.debug(f"Pick running container {container_id}.")
90
+ elif len(container_stopped) > 0:
91
+ # Pick up the first stopped container and then restart it
92
+ container_stopped = sort_containers_by_start(container_stopped)
93
+ container = container_stopped[0]
94
+ container.restart()
95
+ container_id = container.short_id
96
+ if logger.isEnabledFor(DEBUG):
97
+ logger.debug(f"Pick and restart the stopped container {container.short_id}.")
98
+
99
+ # Start the container
100
+ if container_id is None:
101
+ container_id = platform.start_container(**kwargs)
102
+ if logger.isEnabledFor(DEBUG):
103
+ logger.debug(f"Start container: {platform.docker_image}")
104
+ logger.debug(f"New container ID: {container_id}")
105
+
106
+ return container_id
107
+
108
+
109
+ #############################
110
+ # Check containers
111
+ #############################
112
+
113
+ def get_container(container_id) -> Any:
114
+ """
115
+ Get the container object by container ID.
116
+ Args:
117
+ container_id: container id
118
+ Returns:
119
+ container object
120
+ """
121
+ client = docker.from_env()
122
+
123
+ try:
124
+ # Retrieve the container
125
+ container = client.containers.get(container_id)
126
+ return container
127
+ except ErrorNotFound:
128
+ logger.debug(f"Container with ID {container_id} not found.")
129
+ return None
130
+ except DockerAPIError as e:
131
+ logger.debug(f"Error retrieving container with ID {container_id}: {str(e)}")
132
+ return None
133
+
134
+
135
+ def find_container_by_image(image: str, include_stopped: bool = False) -> Dict:
136
+ """
137
+ Find the containers that match the image.
138
+ Args:
139
+ image: docker image
140
+ include_stopped: bool, if consider the stopped containers or not
141
+ Returns:
142
+ dict of containers
143
+ """
144
+ container_found = {}
145
+ for status, container_list in get_containers(include_stopped).items():
146
+ container_found[status] = [container for container in container_list if
147
+ image == container.attrs['Config']['Image']]
148
+
149
+ return container_found
150
+
151
+
152
+ def stop_container(container: Union[str, Container], remove: bool = True) -> NoReturn:
153
+ """
154
+ Stop a container.
155
+ Args:
156
+ container: container id or container object to be stopped
157
+ remove: bool, if remove the container or not
158
+ Returns:
159
+ No return
160
+ """
161
+ try:
162
+ if isinstance(container, str):
163
+ container = get_container(container)
164
+ elif not isinstance(container, Container):
165
+ raise TypeError("Invalid container object.")
166
+
167
+ # Stop the container
168
+ if container.status == 'running':
169
+ container.stop()
170
+ if logger.isEnabledFor(DEBUG):
171
+ logger.debug(f"Container {str(container)} has been stopped.")
172
+
173
+ if remove:
174
+ container.remove()
175
+ if logger.isEnabledFor(DEBUG):
176
+ logger.debug(f"Container {str(container)} has been removed.")
177
+ except ErrorNotFound:
178
+ if isinstance(container, str):
179
+ logger.debug(f"Container with ID {container} not found.")
180
+ else:
181
+ logger.debug(f"Container {container.short_id} not found.")
182
+ exit(-1)
183
+ except DockerAPIError as e:
184
+ logger.debug(f"Error stopping container {str(container)}: {str(e)}")
185
+ exit(-1)
186
+
187
+
188
+ def stop_all_containers(containers: List[Union[str, Container]], keep_running: bool = True,
189
+ remove: bool = True) -> NoReturn:
190
+ """
191
+ Stop all containers.
192
+ Args:
193
+ containers: list of container id or containers to be stopped
194
+ keep_running: bool, if keep the running containers or not
195
+ remove: bool, if remove the container or not
196
+ Returns:
197
+ No return
198
+ """
199
+ for container in containers:
200
+ if container.status == 'running' and keep_running:
201
+ jobs = list_running_jobs(container.short_id)
202
+ if jobs:
203
+ continue
204
+ stop_container(container, remove=remove)
205
+
206
+
207
+ def restart_container(container: Union[str, Container]) -> NoReturn:
208
+ """
209
+ Restart a container.
210
+ Args:
211
+ container: container id or container object to be restarted
212
+ Returns:
213
+ No return
214
+ """
215
+ try:
216
+ if isinstance(container, str):
217
+ container = get_container(container)
218
+ elif not isinstance(container, Container):
219
+ raise TypeError("Invalid container object.")
220
+
221
+ if container is None:
222
+ user_logger.error(f"Container {container} not found.")
223
+ exit(-1)
224
+
225
+ # Restart the container
226
+ container.restart()
227
+ if logger.isEnabledFor(DEBUG):
228
+ logger.debug(f"Container {container.short_id} has been restarted.")
229
+ except DockerAPIError as e:
230
+ user_logger.error(f"Error restarting container {container.short_id}: {str(e)}")
231
+ exit(-1)
232
+ except Exception as e:
233
+ user_logger.error(f"Restarting container {container.short_id} encounters an unexpected error: {e}")
234
+ exit(-1)
235
+
236
+
237
+ def sort_containers_by_start(containers: List[Container], reverse: bool = True) -> List[Container]:
238
+ """
239
+ Sort the containers by the start time.
240
+ Args:
241
+ containers: list of containers
242
+ reverse: bool, if sort in reverse order
243
+ Returns:
244
+ sorted list of containers
245
+ """
246
+ # Sort containers by 'StartedAt' in descending order
247
+ sorted_container_list = sorted(
248
+ containers,
249
+ key=lambda container: parse_iso8601(container.attrs['State']['StartedAt']),
250
+ reverse=reverse
251
+ )
252
+
253
+ return sorted_container_list
254
+
255
+
256
+ def get_containers(include_stopped: bool = False) -> Dict:
257
+ """
258
+ Find the containers that match the image.
259
+ Args:
260
+ include_stopped: bool, if consider the stopped containers or not
261
+ Returns:
262
+ dict of containers
263
+ """
264
+ client = docker.from_env()
265
+ container_found = {}
266
+ # Get all containers
267
+ all_containers = client.containers.list(all=include_stopped)
268
+ # Filter the containers
269
+ all_containers = [ct for ct in all_containers if
270
+ ct.status in CONTAINER_STATUS and JobHistory.verify_container(ct.short_id)]
271
+ # Separate the containers
272
+ container_found['running'] = [ct for ct in all_containers if ct.status == 'running']
273
+ container_found['stopped'] = [ct for ct in all_containers if ct.status != 'running']
274
+
275
+ return container_found
276
+
277
+
278
+ def get_working_containers(container_id: str = None, entity: bool = False) -> List[Any]:
279
+ """
280
+ Get the working containers.
281
+ Args:
282
+ container_id: Container ID
283
+ entity: bool, if return the container object or container id
284
+ Returns:
285
+ list of working containers or IDs
286
+ """
287
+ if container_id is None:
288
+ if entity:
289
+ containers = get_containers().get('running', [])
290
+ else:
291
+ containers = [c.short_id for c in get_containers().get('running', [])]
292
+ else:
293
+ # Check if the container is in the history and running
294
+ if not JobHistory.verify_container(container_id):
295
+ # The container is not in the history.
296
+ logger.error(f"Container {container_id} not found in History.")
297
+ containers = []
298
+ else:
299
+ # The container is in the history, we need to verify if it still exists.
300
+ container = get_container(container_id)
301
+ if container:
302
+ # We only consider the running container
303
+ if container.status == 'running':
304
+ containers = [container] if entity else [container.short_id]
305
+ else:
306
+ logger.warning(f"Container {container_id} is not running.")
307
+ containers = []
308
+ else:
309
+ logger.warning(f"Container {container_id} not found.")
310
+ containers = []
311
+
312
+ return containers
313
+
314
+
315
+ #############################
316
+ # Check docker
317
+ #############################
318
+
319
+ def is_docker_installed() -> bool:
320
+ """
321
+ Check if Docker is installed.
322
+ Returns:
323
+ True/False
324
+ """
325
+ try:
326
+ # Run the 'docker --version' command
327
+ result = subprocess.run(['docker', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
328
+ # Check the return code to see if it executed successfully
329
+ if result.returncode == 0:
330
+ if logger.isEnabledFor(DEBUG):
331
+ logger.debug(f"Docker is installed: {result.stdout.strip()}")
332
+ return True
333
+ else:
334
+ if logger.isEnabledFor(DEBUG):
335
+ logger.debug(f"Docker is not installed. Error: {result.stderr.strip()}")
336
+ return False
337
+ except FileNotFoundError:
338
+ # If the docker executable is not found, it means Docker is not installed
339
+ if logger.isEnabledFor(DEBUG):
340
+ logger.debug("Docker is not installed or not found in PATH.")
341
+ return False
342
+
343
+
344
+ def is_docker_daemon_running() -> bool:
345
+ """
346
+ Check if the Docker daemon is running.
347
+ Returns:
348
+ True/False
349
+ """
350
+ try:
351
+ client = docker.from_env()
352
+ client.ping()
353
+ if logger.isEnabledFor(DEBUG):
354
+ logger.debug("Docker daemon is running.")
355
+ return True
356
+ except DockerAPIError as e:
357
+ if logger.isEnabledFor(DEBUG):
358
+ logger.debug(f"Docker daemon is not running: {e}")
359
+ return False
360
+ except Exception as ex:
361
+ if logger.isEnabledFor(DEBUG):
362
+ logger.debug(f"Error checking Docker daemon: {ex}")
363
+ return False
364
+
365
+
366
+ #############################
367
+ # Check images
368
+ #############################
369
+
370
+ def check_local_image(image_name: str) -> bool:
371
+ """
372
+ Check if the image exists locally.
373
+ Args:
374
+ image_name: image name
375
+ Returns:
376
+ True/False
377
+ """
378
+ try:
379
+ client = docker.from_env()
380
+ # Add ':latest' if no tag provided
381
+ if ":" not in image_name:
382
+ image_name = f"{image_name}:latest"
383
+ client.images.get(image_name)
384
+ return True
385
+ except ImageNotFound:
386
+ return False
387
+ except DockerException as ex:
388
+ if logger.isEnabledFor(DEBUG):
389
+ logger.debug(f"Error checking Docker daemon: {ex}")
390
+ return False
391
+
392
+
393
+ def pull_docker_image(image_name, tag='latest') -> bool:
394
+ """
395
+ Pull a docker image from IDM artifactory.
396
+ Args:
397
+ image_name: image name
398
+ tag: image tag
399
+ Returns:
400
+ True/False
401
+ """
402
+ # Check if the image name contains the tag
403
+ if ':' in image_name:
404
+ full_image_name = image_name
405
+ else:
406
+ full_image_name = f'{image_name}:{tag}'
407
+
408
+ # Pull the image
409
+ user_logger.info(f'Pulling image {full_image_name} ...')
410
+ try:
411
+ client = docker.from_env()
412
+ client.images.pull(f'{full_image_name}')
413
+ if logger.isEnabledFor(DEBUG):
414
+ logger.debug(f'Successfully pulled {full_image_name}')
415
+ return True
416
+ except DockerAPIError as e:
417
+ if logger.isEnabledFor(DEBUG):
418
+ logger.debug(f'Error pulling {full_image_name}: {e}')
419
+ return False
420
+
421
+
422
+ #############################
423
+ # Check binding/mounting
424
+ #############################
425
+ def compare_mounts(mounts1: List[Dict], mounts2: List[Dict]) -> bool:
426
+ """
427
+ Compare two sets of mount configurations.
428
+ Args:
429
+ mounts1: container mounting configurations
430
+ mounts2: container mounting configurations
431
+ Returns:
432
+ True/False
433
+ """
434
+ # Convert mount configurations to a set of tuples for easy comparison
435
+ mounts_set1 = set(
436
+ (mount['Type'], mount['Mode'], normalize_path(mount['Source']), normalize_path(mount['Destination'])) for
437
+ mount in mounts1
438
+ )
439
+ mounts_set2 = set(
440
+ (mount['Type'], mount['Mode'], normalize_path(mount['Source']), normalize_path(mount['Destination'])) for
441
+ mount in mounts2
442
+ )
443
+
444
+ return mounts_set1 == mounts_set2
445
+
446
+
447
+ def compare_container_mount(container1: Union[str, Container], container2: Union[str, Container]) -> bool:
448
+ """
449
+ Compare the mount configurations of two containers.
450
+ Args:
451
+ container1: container object or id
452
+ container2: container object or id
453
+ Returns:
454
+ True/False
455
+ """
456
+ # Get the container objects
457
+ if isinstance(container1, str):
458
+ container1 = get_container(container1)
459
+
460
+ if isinstance(container2, str):
461
+ container2 = get_container(container2)
462
+
463
+ # Get the mount configurations
464
+ mounts1 = container1.attrs['Mounts']
465
+ mounts2 = container2.attrs['Mounts']
466
+
467
+ return compare_mounts(mounts1, mounts2)
468
+
469
+
470
+ #############################
471
+ # Check jobs
472
+ #############################
473
+
474
+ PS_QUERY = 'ps xao pid,ppid,pgid,etime,cmd | head -n 1 && ps xao pid,ppid,pgid,etime,cmd | grep -e EXPERIMENT -e SIMULATION | grep -v grep'
475
+
476
+
477
+ @dataclass(repr=False)
478
+ class Job:
479
+ """Running Job."""
480
+ item_id: str = field(init=True)
481
+ item_type: ItemType = field(init=True)
482
+ job_id: int = field(init=True)
483
+ group_pid: int = field(init=True)
484
+ container_id: str = field(init=True)
485
+ elapsed: str = field(init=True)
486
+ parent_pid: int = field(default=None, init=True)
487
+
488
+ def display(self):
489
+ """Display Job for debugging usage."""
490
+ user_logger.info(f"Item ID: {self.item_id:15}")
491
+ user_logger.info(f"Item Type: {self.item_type:15}")
492
+ user_logger.info(f"Job ID: {self.job_id:15}")
493
+ user_logger.info(f"Group PID: {self.group_pid:15}")
494
+ user_logger.info(f"Container ID: {self.container_id:15}")
495
+ user_logger.info(f"Elapsed: {self.elapsed:15}")
496
+
497
+
498
+ def list_running_jobs(container_id: str, limit: int = None) -> List[Job]:
499
+ """
500
+ List all running jobs on the container.
501
+ Args:
502
+ container_id: Container ID
503
+ limit: number of jobs to view
504
+ Returns:
505
+ list of running jobs
506
+ """
507
+ command = f'docker exec {container_id} bash -c "({PS_QUERY})"'
508
+ result = subprocess.run(command, shell=True, check=False, capture_output=True, text=True)
509
+
510
+ running_jobs = []
511
+ if result.returncode == 0:
512
+ processes = result.stdout.splitlines()
513
+ header = processes[0].split() # Extract the header (column names)
514
+ for line in processes[1:]: # Skip the first header line
515
+ if 'EXPERIMENT' in line or 'SIMULATION' in line:
516
+ # Split the line into columns
517
+ columns = line.split(maxsplit=len(header) - 1)
518
+ # Convert columns to their respective types
519
+ pid = int(columns[0]) # pid is an integer
520
+ ppid = int(columns[1]) # ppid is an integer
521
+ pgid = int(columns[2]) # pgid is an integer
522
+ etime = columns[3] # etime is a string
523
+ cmd = columns[4] # cmd is a string
524
+
525
+ # Determine the item type and job ID
526
+ item_type = ItemType.EXPERIMENT if 'EXPERIMENT' in cmd else ItemType.SIMULATION
527
+ job_id = pgid if 'EXPERIMENT' in cmd else pid
528
+
529
+ # Find the item that starts with 'EXPERIMENT' or 'SIMULATION'
530
+ columns = cmd.split()
531
+ result = [item for item in columns if item.startswith('EXPERIMENT') or item.startswith('SIMULATION')]
532
+ item_id = result[0].split(':')[1]
533
+
534
+ # Create a new job
535
+ job = Job(item_id=item_id, item_type=item_type, job_id=job_id, group_pid=pgid, parent_pid=ppid,
536
+ container_id=container_id, elapsed=etime)
537
+ running_jobs.append(job)
538
+ elif result.returncode == 1:
539
+ pass
540
+ else:
541
+ logger.error(result.stderr)
542
+ user_logger.error(f"Command failed with return code {result.returncode}")
543
+ exit(-1)
544
+
545
+ if limit:
546
+ running_jobs = running_jobs[:limit]
547
+ return running_jobs[:limit]
548
+
549
+
550
+ def find_running_job(item_id: Union[int, str], container_id: str = None) -> Job:
551
+ """
552
+ Check item running on container.
553
+ Args:
554
+ item_id: Experiment/Simulation ID or Running Job ID
555
+ container_id: Container ID
556
+ Returns:
557
+ running Job
558
+ """
559
+ if container_id:
560
+ containers = [container_id]
561
+ else:
562
+ # Check if the item is an Experiment ID
563
+ his_job = JobHistory.get_job(item_id)
564
+ if his_job:
565
+ # item_id is an Experiment ID
566
+ containers = [his_job['CONTAINER']]
567
+ else:
568
+ # item_id is a Simulation ID or Job ID, we need to check all working containers
569
+ containers = get_working_containers()
570
+
571
+ match_jobs = []
572
+ for cid in containers:
573
+ # List all running jobs on the container
574
+ jobs = list_running_jobs(cid)
575
+ if len(jobs) == 0:
576
+ continue
577
+
578
+ # Container has running jobs
579
+ for job in jobs:
580
+ # Check if the job is the one we are looking for
581
+ if job.item_id == item_id or str(job.job_id) == str(item_id):
582
+ match_jobs.append(job)
583
+ break # One running container can't have multiple matches!
584
+
585
+ if len(match_jobs) > 1:
586
+ # item_id must be a Job ID in this case and container_id must be None!
587
+ user_logger.error(
588
+ f"Multiple jobs found for Job ID {item_id}, please provide the Container ID or use Entity ID instead.")
589
+ exit(-1)
590
+ elif len(match_jobs) == 1:
591
+ return match_jobs[0]
592
+ else:
593
+ return None