PyPI - sagemaker-core - Versions diffs - 1.0.48__py3-none-any.whl → 1.0.50__py3-none-any.whl - Mend - Supply Chain Defender

sagemaker-core 1.0.48py3-none-any.whl → 1.0.50py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sagemaker-core might be problematic. Click here for more details.

Files changed (8) hide show

sagemaker_core/main/resources.py CHANGED Viewed

@@ -3223,6 +3223,7 @@ class Cluster(Base):
         vpc_config:
         orchestrator: The type of orchestrator used for the SageMaker HyperPod cluster.
         node_recovery: The node recovery mode configured for the SageMaker HyperPod cluster.
+        node_provisioning_mode: The mode used for provisioning nodes in the cluster.
     """
@@ -3238,6 +3239,7 @@ class Cluster(Base):
     vpc_config: Optional[shapes.VpcConfig] = Unassigned()
     orchestrator: Optional[shapes.ClusterOrchestrator] = Unassigned()
     node_recovery: Optional[str] = Unassigned()
+    node_provisioning_mode: Optional[str] = Unassigned()
     def get_name(self) -> str:
         attributes = vars(self)
@@ -3287,6 +3289,7 @@ class Cluster(Base):
         tags: Optional[List[shapes.Tag]] = Unassigned(),
         orchestrator: Optional[shapes.ClusterOrchestrator] = Unassigned(),
         node_recovery: Optional[str] = Unassigned(),
+        node_provisioning_mode: Optional[str] = Unassigned(),
         session: Optional[Session] = None,
         region: Optional[str] = None,
     ) -> Optional["Cluster"]:
@@ -3299,8 +3302,9 @@ class Cluster(Base):
             restricted_instance_groups: The specialized instance groups for training models like Amazon Nova to be created in the SageMaker HyperPod cluster.
             vpc_config: Specifies the Amazon Virtual Private Cloud (VPC) that is associated with the Amazon SageMaker HyperPod cluster. You can control access to and from your resources by configuring your VPC. For more information, see Give SageMaker access to resources in your Amazon VPC.  When your Amazon VPC and subnets support IPv6, network communications differ based on the cluster orchestration platform:   Slurm-orchestrated clusters automatically configure nodes with dual IPv6 and IPv4 addresses, allowing immediate IPv6 network communications.   In Amazon EKS-orchestrated clusters, nodes receive dual-stack addressing, but pods can only use IPv6 when the Amazon EKS cluster is explicitly IPv6-enabled. For information about deploying an IPv6 Amazon EKS cluster, see Amazon EKS IPv6 Cluster Deployment.   Additional resources for IPv6 configuration:   For information about adding IPv6 support to your VPC, see to IPv6 Support for VPC.   For information about creating a new IPv6-compatible VPC, see Amazon VPC Creation Guide.   To configure SageMaker HyperPod with a custom Amazon VPC, see Custom Amazon VPC Setup for SageMaker HyperPod.
             tags: Custom tags for managing the SageMaker HyperPod cluster as an Amazon Web Services resource. You can add tags to your cluster in the same way you add them in other Amazon Web Services services that support tagging. To learn more about tagging Amazon Web Services resources in general, see Tagging Amazon Web Services Resources User Guide.
-            orchestrator: The type of orchestrator to use for the SageMaker HyperPod cluster. Currently, the only supported value is "eks", which is to use an Amazon Elastic Kubernetes Service (EKS) cluster as the orchestrator.
+            orchestrator: The type of orchestrator to use for the SageMaker HyperPod cluster. Currently, the only supported value is "eks", which is to use an Amazon Elastic Kubernetes Service cluster as the orchestrator.
             node_recovery: The node recovery mode for the SageMaker HyperPod cluster. When set to Automatic, SageMaker HyperPod will automatically reboot or replace faulty nodes when issues are detected. When set to None, cluster administrators will need to manually manage any faulty cluster instances.
+            node_provisioning_mode: The mode for provisioning nodes in the cluster. You can specify the following modes:    Continuous: Scaling behavior that enables 1) concurrent operation execution within instance groups, 2) continuous retry mechanisms for failed operations, 3) enhanced customer visibility into cluster events through detailed event streams, 4) partial provisioning capabilities. Your clusters and instance groups remain InService while scaling. This mode is only supported for EKS orchestrated clusters.
             session: Boto3 session.
             region: Region name.
@@ -3337,6 +3341,7 @@ class Cluster(Base):
             "Tags": tags,
             "Orchestrator": orchestrator,
             "NodeRecovery": node_recovery,
+            "NodeProvisioningMode": node_provisioning_mode,
         }
         operation_input_args = Base.populate_chained_attributes(
@@ -3731,6 +3736,7 @@ class Cluster(Base):
     def get_node(
         self,
         node_id: Optional[str] = Unassigned(),
+        node_logical_id: Optional[str] = Unassigned(),
         session: Optional[Session] = None,
         region: Optional[str] = None,
     ) -> Optional[shapes.ClusterNodeDetails]:
@@ -3739,6 +3745,7 @@ class Cluster(Base):
         Parameters:
             node_id: The ID of the SageMaker HyperPod cluster node.
+            node_logical_id: The logical identifier of the node to describe. You can specify either NodeLogicalId or InstanceId, but not both. NodeLogicalId can be used to describe nodes that are still being provisioned and don't yet have an InstanceId assigned.
             session: Boto3 session.
             region: Region name.
@@ -3761,6 +3768,7 @@ class Cluster(Base):
         operation_input_args = {
             "ClusterName": self.cluster_name,
             "NodeId": node_id,
+            "NodeLogicalId": node_logical_id,
         }
         # serialize the input request
         operation_input_args = serialize(operation_input_args)
@@ -3785,6 +3793,7 @@ class Cluster(Base):
         instance_group_name_contains: Optional[str] = Unassigned(),
         sort_by: Optional[str] = Unassigned(),
         sort_order: Optional[str] = Unassigned(),
+        include_node_logical_ids: Optional[bool] = Unassigned(),
         session: Optional[Session] = None,
         region: Optional[str] = None,
     ) -> ResourceIterator[shapes.ClusterNodeDetails]:
@@ -3799,6 +3808,7 @@ class Cluster(Base):
             next_token: If the result of the previous ListClusterNodes request was truncated, the response includes a NextToken. To retrieve the next set of cluster nodes, use the token in the next request.
             sort_by: The field by which to sort results. The default value is CREATION_TIME.
             sort_order: The sort order for results. The default value is Ascending.
+            include_node_logical_ids: Specifies whether to include nodes that are still being provisioned in the response. When set to true, the response includes all nodes regardless of their provisioning status. When set to False (default), only nodes with assigned InstanceIds are returned.
             session: Boto3 session.
             region: Region name.
@@ -3825,6 +3835,7 @@ class Cluster(Base):
             "InstanceGroupNameContains": instance_group_name_contains,
             "SortBy": sort_by,
             "SortOrder": sort_order,
+            "IncludeNodeLogicalIds": include_node_logical_ids,
         }
         # serialize the input request
         operation_input_args = serialize(operation_input_args)
@@ -3847,6 +3858,7 @@ class Cluster(Base):
     def update_software(
         self,
         deployment_config: Optional[shapes.DeploymentConfiguration] = Unassigned(),
+        image_id: Optional[str] = Unassigned(),
         session: Optional[Session] = None,
         region: Optional[str] = None,
     ) -> None:
@@ -3855,6 +3867,7 @@ class Cluster(Base):
         Parameters:
             deployment_config: The configuration to use when updating the AMI versions.
+            image_id: When configuring your HyperPod cluster, you can specify an image ID using one of the following options:    HyperPodPublicAmiId: Use a HyperPod public AMI    CustomAmiId: Use your custom AMI    default: Use the default latest system image   f you choose to use a custom AMI (CustomAmiId), ensure it meets the following requirements:   Encryption: The custom AMI must be unencrypted.   Ownership: The custom AMI must be owned by the same Amazon Web Services account that is creating the HyperPod cluster.   Volume support: Only the primary AMI snapshot volume is supported; additional AMI volumes are not supported.   When updating the instance group's AMI through the UpdateClusterSoftware operation, if an instance group uses a custom AMI, you must provide an ImageId or use the default as input.
             session: Boto3 session.
             region: Region name.
@@ -3876,6 +3889,7 @@ class Cluster(Base):
             "ClusterName": self.cluster_name,
             "InstanceGroups": self.instance_groups,
             "DeploymentConfig": deployment_config,
+            "ImageId": image_id,
         }
         # serialize the input request
         operation_input_args = serialize(operation_input_args)
@@ -3893,6 +3907,7 @@ class Cluster(Base):
     def batch_delete_nodes(
         self,
         node_ids: Optional[List[str]] = Unassigned(),
+        node_logical_ids: Optional[List[str]] = Unassigned(),
         session: Optional[Session] = None,
         region: Optional[str] = None,
     ) -> Optional[shapes.BatchDeleteClusterNodesResponse]:
@@ -3901,6 +3916,7 @@ class Cluster(Base):
         Parameters:
             node_ids: A list of node IDs to be deleted from the specified cluster.    For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes.   If you need to delete more than 99 instances, contact Support for assistance.
+            node_logical_ids: A list of NodeLogicalIds identifying the nodes to be deleted. You can specify up to 50 NodeLogicalIds. You must specify either NodeLogicalIds, InstanceIds, or both, with a combined maximum of 50 identifiers.
             session: Boto3 session.
             region: Region name.
@@ -3923,6 +3939,7 @@ class Cluster(Base):
         operation_input_args = {
             "ClusterName": self.cluster_name,
             "NodeIds": node_ids,
+            "NodeLogicalIds": node_logical_ids,
         }
         # serialize the input request
         operation_input_args = serialize(operation_input_args)
@@ -28874,6 +28891,9 @@ class TrainingPlan(Base):
         total_instance_count: The total number of instances reserved in this training plan.
         available_instance_count: The number of instances currently available for use in this training plan.
         in_use_instance_count: The number of instances currently in use from this training plan.
+        unhealthy_instance_count: The number of instances in the training plan that are currently in an unhealthy state.
+        available_spare_instance_count: The number of available spare instances in the training plan.
+        total_ultra_server_count: The total number of UltraServers reserved to this training plan.
         target_resources: The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod) that can use this training plan. Training plans are specific to their target resource.   A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs.   A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group.
         reserved_capacity_summaries: The list of Reserved Capacity providing the underlying compute resources of the plan.
@@ -28892,6 +28912,9 @@ class TrainingPlan(Base):
     total_instance_count: Optional[int] = Unassigned()
     available_instance_count: Optional[int] = Unassigned()
     in_use_instance_count: Optional[int] = Unassigned()
+    unhealthy_instance_count: Optional[int] = Unassigned()
+    available_spare_instance_count: Optional[int] = Unassigned()
+    total_ultra_server_count: Optional[int] = Unassigned()
     target_resources: Optional[List[str]] = Unassigned()
     reserved_capacity_summaries: Optional[List[shapes.ReservedCapacitySummary]] = Unassigned()
@@ -28917,6 +28940,7 @@ class TrainingPlan(Base):
         cls,
         training_plan_name: str,
         training_plan_offering_id: str,
+        spare_instance_count_per_ultra_server: Optional[int] = Unassigned(),
         tags: Optional[List[shapes.Tag]] = Unassigned(),
         session: Optional[Session] = None,
         region: Optional[str] = None,
@@ -28927,6 +28951,7 @@ class TrainingPlan(Base):
         Parameters:
             training_plan_name: The name of the training plan to create.
             training_plan_offering_id: The unique identifier of the training plan offering to use for creating this plan.
+            spare_instance_count_per_ultra_server: Number of spare instances to reserve per UltraServer for enhanced resiliency. Default is 1.
             tags: An array of key-value pairs to apply to this training plan.
             session: Boto3 session.
             region: Region name.
@@ -28960,6 +28985,7 @@ class TrainingPlan(Base):
         operation_input_args = {
             "TrainingPlanName": training_plan_name,
             "TrainingPlanOfferingId": training_plan_offering_id,
+            "SpareInstanceCountPerUltraServer": spare_instance_count_per_ultra_server,
             "Tags": tags,
         }