aws-parallelcluster-node 3.5.1__tar.gz → 3.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aws-parallelcluster-node-3.5.1/src/aws_parallelcluster_node.egg-info → aws-parallelcluster-node-3.6.0}/PKG-INFO +1 -1
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/setup.py +1 -1
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0/src/aws_parallelcluster_node.egg-info}/PKG-INFO +1 -1
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/aws_parallelcluster_node.egg-info/SOURCES.txt +2 -0
- aws-parallelcluster-node-3.6.0/src/common/ec2_utils.py +29 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/common/schedulers/slurm_commands.py +8 -4
- aws-parallelcluster-node-3.6.0/src/slurm_plugin/cluster_event_publisher.py +777 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/clustermgtd.py +31 -5
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/console_logger.py +12 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/fleet_manager.py +2 -1
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/fleet_status_manager.py +1 -1
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/instance_manager.py +2 -1
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/logging/parallelcluster_clustermgtd_logging.conf +18 -3
- aws-parallelcluster-node-3.6.0/src/slurm_plugin/logging/parallelcluster_resume_logging.conf +36 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/resume.py +19 -3
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/slurm_resources.py +102 -10
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/task_executor.py +12 -0
- aws-parallelcluster-node-3.5.1/src/slurm_plugin/logging/parallelcluster_resume_logging.conf +0 -21
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/LICENSE.txt +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/NOTICE.txt +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/README.md +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/setup.cfg +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/aws_parallelcluster_node.egg-info/dependency_links.txt +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/aws_parallelcluster_node.egg-info/entry_points.txt +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/aws_parallelcluster_node.egg-info/not-zip-safe +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/aws_parallelcluster_node.egg-info/requires.txt +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/aws_parallelcluster_node.egg-info/top_level.txt +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/common/__init__.py +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/common/schedulers/__init__.py +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/common/time_utils.py +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/common/utils.py +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/__init__.py +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/common.py +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/computemgtd.py +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/logging/parallelcluster_computemgtd_logging.conf +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/logging/parallelcluster_fleet_status_manager_logging.conf +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/logging/parallelcluster_suspend_logging.conf +0 -0
- {aws-parallelcluster-node-3.5.1 → aws-parallelcluster-node-3.6.0}/src/slurm_plugin/suspend.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: aws-parallelcluster-node
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.6.0
|
|
4
4
|
Summary: aws-parallelcluster-node provides the scripts for an AWS ParallelCluster node.
|
|
5
5
|
Home-page: https://github.com/aws/aws-parallelcluster-node
|
|
6
6
|
Author: Amazon Web Services
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: aws-parallelcluster-node
|
|
3
|
-
Version: 3.
|
|
3
|
+
Version: 3.6.0
|
|
4
4
|
Summary: aws-parallelcluster-node provides the scripts for an AWS ParallelCluster node.
|
|
5
5
|
Home-page: https://github.com/aws/aws-parallelcluster-node
|
|
6
6
|
Author: Amazon Web Services
|
|
@@ -10,11 +10,13 @@ src/aws_parallelcluster_node.egg-info/not-zip-safe
|
|
|
10
10
|
src/aws_parallelcluster_node.egg-info/requires.txt
|
|
11
11
|
src/aws_parallelcluster_node.egg-info/top_level.txt
|
|
12
12
|
src/common/__init__.py
|
|
13
|
+
src/common/ec2_utils.py
|
|
13
14
|
src/common/time_utils.py
|
|
14
15
|
src/common/utils.py
|
|
15
16
|
src/common/schedulers/__init__.py
|
|
16
17
|
src/common/schedulers/slurm_commands.py
|
|
17
18
|
src/slurm_plugin/__init__.py
|
|
19
|
+
src/slurm_plugin/cluster_event_publisher.py
|
|
18
20
|
src/slurm_plugin/clustermgtd.py
|
|
19
21
|
src/slurm_plugin/common.py
|
|
20
22
|
src/slurm_plugin/computemgtd.py
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License").
|
|
4
|
+
# You may not use this file except in compliance with the License.
|
|
5
|
+
# A copy of the License is located at
|
|
6
|
+
#
|
|
7
|
+
# http://aws.amazon.com/apache2.0/
|
|
8
|
+
#
|
|
9
|
+
# or in the "LICENSE.txt" file accompanying this file.
|
|
10
|
+
# This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and limitations under the License.
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_private_ip_address(instance_info):
|
|
15
|
+
"""
|
|
16
|
+
Return the PrivateIpAddress of the EC2 instance.
|
|
17
|
+
|
|
18
|
+
The PrivateIpAddress is considered to be the one for the
|
|
19
|
+
network interface with DeviceIndex = NetworkCardIndex = 0.
|
|
20
|
+
:param instance_info: the dictionary returned by a EC2:DescribeInstances call.
|
|
21
|
+
:return: the PrivateIpAddress of the instance.
|
|
22
|
+
"""
|
|
23
|
+
private_ip = instance_info["PrivateIpAddress"]
|
|
24
|
+
for network_interface in instance_info["NetworkInterfaces"]:
|
|
25
|
+
attachment = network_interface["Attachment"]
|
|
26
|
+
if attachment["DeviceIndex"] == 0 and attachment["NetworkCardIndex"] == 0:
|
|
27
|
+
private_ip = network_interface["PrivateIpAddress"]
|
|
28
|
+
break
|
|
29
|
+
return private_ip
|
|
@@ -61,7 +61,7 @@ SINFO = f"{SLURM_BINARIES_DIR}/sinfo"
|
|
|
61
61
|
SCONTROL_OUTPUT_AWK_PARSER = (
|
|
62
62
|
'awk \'BEGIN{{RS="\\n\\n" ; ORS="######\\n";}} {{print}}\' | '
|
|
63
63
|
+ "grep -oP '^(NodeName=\\S+)|(NodeAddr=\\S+)|(NodeHostName=\\S+)|(?<!Next)(State=\\S+)|"
|
|
64
|
-
+ "(Partitions=\\S+)|(SlurmdStartTime=\\S+)|(Reason=.*)|(######)'"
|
|
64
|
+
+ "(Partitions=\\S+)|(SlurmdStartTime=\\S+)|(LastBusyTime=\\S+)|(Reason=.*)|(######)'"
|
|
65
65
|
)
|
|
66
66
|
|
|
67
67
|
# Set default timeouts for running different slurm commands.
|
|
@@ -333,7 +333,8 @@ def _parse_nodes_info(slurm_node_info: str) -> List[SlurmNode]:
|
|
|
333
333
|
"""Parse slurm node info into SlurmNode objects."""
|
|
334
334
|
# [ec2-user@ip-10-0-0-58 ~]$ /opt/slurm/bin/scontrol show nodes compute-dy-c5xlarge-[1-3],compute-dy-c5xlarge-50001\
|
|
335
335
|
# | awk 'BEGIN{{RS="\n\n" ; ORS="######\n";}} {{print}}' | grep -oP "^(NodeName=\S+)|(NodeAddr=\S+)
|
|
336
|
-
# |(NodeHostName=\S+)|(?<!Next)(State=\S+)|(Partitions=\S+)|(SlurmdStartTime=\S+)|(
|
|
336
|
+
# |(NodeHostName=\S+)|(?<!Next)(State=\S+)|(Partitions=\S+)|(SlurmdStartTime=\S+)|(LastBusyTime=\\S+)|(Reason=.*)\
|
|
337
|
+
# |(######)"
|
|
337
338
|
# NodeName=compute-dy-c5xlarge-1
|
|
338
339
|
# NodeAddr=1.2.3.4
|
|
339
340
|
# NodeHostName=compute-dy-c5xlarge-1
|
|
@@ -372,8 +373,11 @@ def _parse_nodes_info(slurm_node_info: str) -> List[SlurmNode]:
|
|
|
372
373
|
"Partitions": "partitions",
|
|
373
374
|
"Reason": "reason",
|
|
374
375
|
"SlurmdStartTime": "slurmdstarttime",
|
|
376
|
+
"LastBusyTime": "lastbusytime",
|
|
375
377
|
}
|
|
376
378
|
|
|
379
|
+
date_fields = ["SlurmdStartTime", "LastBusyTime"]
|
|
380
|
+
|
|
377
381
|
node_info = slurm_node_info.split("######\n")
|
|
378
382
|
slurm_nodes = []
|
|
379
383
|
for node in node_info:
|
|
@@ -381,8 +385,8 @@ def _parse_nodes_info(slurm_node_info: str) -> List[SlurmNode]:
|
|
|
381
385
|
kwargs = {}
|
|
382
386
|
for line in lines:
|
|
383
387
|
key, value = line.split("=")
|
|
384
|
-
if key
|
|
385
|
-
if value
|
|
388
|
+
if key in date_fields:
|
|
389
|
+
if value not in ["None", "Unknown"]:
|
|
386
390
|
value = datetime.strptime(value, "%Y-%m-%dT%H:%M:%S").astimezone(tz=timezone.utc)
|
|
387
391
|
else:
|
|
388
392
|
value = None
|