RubyGems - aws-sdk-sagemaker - Versions diffs - 1.339.0 → 1.340.0 - Mend

aws-sdk-sagemaker 1.339.0 → 1.340.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/VERSION +1 -1
data/lib/aws-sdk-sagemaker/client.rb +226 -6
data/lib/aws-sdk-sagemaker/client_api.rb +103 -0
data/lib/aws-sdk-sagemaker/types.rb +403 -7
data/lib/aws-sdk-sagemaker.rb +1 -1
data/sig/client.rbs +42 -4
data/sig/types.rbs +70 -4
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f99f676fe2b17eb076e8761bee93ce0bfaee796c41dc984e1eab0803c1947f2e
-  data.tar.gz: 6b3644ed5504af87e03d6b11d457c9cfa1bcf428b917150a2bad9a661968f654
+  metadata.gz: f760b6f6d6902ca08349489fe9e34c577365953a32aa2f031a4db9e67aa9cb08
+  data.tar.gz: 5d01231c9d2c1d978d56deee398fb98fe79da54085d362172671724d6bce3233
 SHA512:
-  metadata.gz: c26fc869f314ef2b25b1b2e0c58c9926c7b20b740a5f46064e31618cd4af601938f5df74199fce53c2c2d5494059acbec30b71727ae6534400013bbee5516433
-  data.tar.gz: 2bbffdc253a295bf9b9aa9fb1ae956ba268bb2d6d27abce4574ad0a4e0d1861c6ce0d84f655bbb45ed6959be27b34820b399c908c96e87de3b5d4c7dff93ed6b
+  metadata.gz: a658467635f982e5e53c0cd5a428c96d8f17acb120d16c31184b8ac8ddd2f21c9062c41e6df84610f95227118ae6de6a47053642188e8ed29208e2d44cfa5863
+  data.tar.gz: ec789fc4a9c2d3014b4e19c7a5b628f158fdc8a3b262139e86e44147b83a65107844ac19445d924aa41f1e90791d961c6c1bf8abb2ef19a9de49e6a54e63cba0

data/CHANGELOG.md CHANGED Viewed

@@ -1,6 +1,11 @@
 Unreleased Changes
 ------------------
+1.340.0 (2025-11-20)
+------------------
+* Feature - Added training plan support for inference endpoints. Added HyperPod task governance with accelerator partition-based quota allocation. Added BatchRebootClusterNodes and BatchReplaceClusterNodes APIs. Updated ListClusterNodes to include privateDnsHostName.
 1.339.0 (2025-11-19)
 ------------------

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 1.~~339~~.0
1	+ 1.340.0

data/lib/aws-sdk-sagemaker/client.rb CHANGED Viewed

@@ -950,6 +950,209 @@ module Aws::SageMaker
       req.send_request(options)
     end
+    # Reboots specific nodes within a SageMaker HyperPod cluster using a
+    # soft recovery mechanism. `BatchRebootClusterNodes` performs a graceful
+    # reboot of the specified nodes by calling the Amazon Elastic Compute
+    # Cloud `RebootInstances` API, which attempts to cleanly shut down the
+    # operating system before restarting the instance.
+    #
+    # This operation is useful for recovering from transient issues or
+    # applying certain configuration changes that require a restart.
+    #
+    # <note markdown="1"> * Rebooting a node may cause temporary service interruption for
+    #   workloads running on that node. Ensure your workloads can handle
+    #   node restarts or use appropriate scheduling to minimize impact.
+    #
+    # * You can reboot up to 25 nodes in a single request.
+    #
+    # * For SageMaker HyperPod clusters using the Slurm workload manager,
+    #   ensure rebooting nodes will not disrupt critical cluster operations.
+    #
+    #  </note>
+    #
+    # @option params [required, String] :cluster_name
+    #   The name or Amazon Resource Name (ARN) of the SageMaker HyperPod
+    #   cluster containing the nodes to reboot.
+    #
+    # @option params [Array<String>] :node_ids
+    #   A list of EC2 instance IDs to reboot using soft recovery. You can
+    #   specify between 1 and 25 instance IDs.
+    #
+    #   <note markdown="1"> * Either `NodeIds` or `NodeLogicalIds` must be provided (or both), but
+    #     at least one is required.
+    #
+    #   * Each instance ID must follow the pattern `i-` followed by 17
+    #     hexadecimal characters (for example, `i-0123456789abcdef0`).
+    #
+    #    </note>
+    #
+    # @option params [Array<String>] :node_logical_ids
+    #   A list of logical node IDs to reboot using soft recovery. You can
+    #   specify between 1 and 25 logical node IDs.
+    #
+    #   The `NodeLogicalId` is a unique identifier that persists throughout
+    #   the node's lifecycle and can be used to track nodes that are still
+    #   being provisioned and don't yet have an EC2 instance ID assigned.
+    #
+    #   * This parameter is only supported for clusters using `Continuous` as
+    #     the `NodeProvisioningMode`. For clusters using the default
+    #     provisioning mode, use `NodeIds` instead.
+    #
+    #   * Either `NodeIds` or `NodeLogicalIds` must be provided (or both), but
+    #     at least one is required.
+    #
+    # @return [Types::BatchRebootClusterNodesResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
+    #
+    #   * {Types::BatchRebootClusterNodesResponse#successful #successful} => Array&lt;String&gt;
+    #   * {Types::BatchRebootClusterNodesResponse#failed #failed} => Array&lt;Types::BatchRebootClusterNodesError&gt;
+    #   * {Types::BatchRebootClusterNodesResponse#failed_node_logical_ids #failed_node_logical_ids} => Array&lt;Types::BatchRebootClusterNodeLogicalIdsError&gt;
+    #   * {Types::BatchRebootClusterNodesResponse#successful_node_logical_ids #successful_node_logical_ids} => Array&lt;String&gt;
+    #
+    # @example Request syntax with placeholder values
+    #
+    #   resp = client.batch_reboot_cluster_nodes({
+    #     cluster_name: "ClusterNameOrArn", # required
+    #     node_ids: ["ClusterNodeId"],
+    #     node_logical_ids: ["ClusterNodeLogicalId"],
+    #   })
+    #
+    # @example Response structure
+    #
+    #   resp.successful #=> Array
+    #   resp.successful[0] #=> String
+    #   resp.failed #=> Array
+    #   resp.failed[0].node_id #=> String
+    #   resp.failed[0].error_code #=> String, one of "InstanceIdNotFound", "InvalidInstanceStatus", "InstanceIdInUse", "InternalServerError"
+    #   resp.failed[0].message #=> String
+    #   resp.failed_node_logical_ids #=> Array
+    #   resp.failed_node_logical_ids[0].node_logical_id #=> String
+    #   resp.failed_node_logical_ids[0].error_code #=> String, one of "InstanceIdNotFound", "InvalidInstanceStatus", "InstanceIdInUse", "InternalServerError"
+    #   resp.failed_node_logical_ids[0].message #=> String
+    #   resp.successful_node_logical_ids #=> Array
+    #   resp.successful_node_logical_ids[0] #=> String
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchRebootClusterNodes AWS API Documentation
+    #
+    # @overload batch_reboot_cluster_nodes(params = {})
+    # @param [Hash] params ({})
+    def batch_reboot_cluster_nodes(params = {}, options = {})
+      req = build_request(:batch_reboot_cluster_nodes, params)
+      req.send_request(options)
+    end
+    # Replaces specific nodes within a SageMaker HyperPod cluster with new
+    # hardware. `BatchReplaceClusterNodes` terminates the specified
+    # instances and provisions new replacement instances with the same
+    # configuration but fresh hardware. The Amazon Machine Image (AMI) and
+    # instance configuration remain the same.
+    #
+    # This operation is useful for recovering from hardware failures or
+    # persistent issues that cannot be resolved through a reboot.
+    #
+    # * **Data Loss Warning:** Replacing nodes destroys all instance
+    #   volumes, including both root and secondary volumes. All data stored
+    #   on these volumes will be permanently lost and cannot be recovered.
+    #
+    # * To safeguard your work, back up your data to Amazon S3 or an FSx for
+    #   Lustre file system before invoking the API on a worker node group.
+    #   This will help prevent any potential data loss from the instance
+    #   root volume. For more information about backup, see [Use the backup
+    #   script provided by SageMaker HyperPod][1].
+    #
+    # * If you want to invoke this API on an existing cluster, you'll first
+    #   need to patch the cluster by running the [UpdateClusterSoftware
+    #   API][2]. For more information about patching a cluster, see [Update
+    #   the SageMaker HyperPod platform software of a cluster][3].
+    #
+    # * You can replace up to 25 nodes in a single request.
+    #
+    #
+    #
+    # [1]: https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-hyperpod-operate-cli-command.html#sagemaker-hyperpod-operate-cli-command-update-cluster-software-backup
+    # [2]: https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_UpdateClusterSoftware.html
+    # [3]: https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-hyperpod-operate-cli-command.html#sagemaker-hyperpod-operate-cli-command-update-cluster-software
+    #
+    # @option params [required, String] :cluster_name
+    #   The name or Amazon Resource Name (ARN) of the SageMaker HyperPod
+    #   cluster containing the nodes to replace.
+    #
+    # @option params [Array<String>] :node_ids
+    #   A list of EC2 instance IDs to replace with new hardware. You can
+    #   specify between 1 and 25 instance IDs.
+    #
+    #   Replace operations destroy all instance volumes (root and secondary).
+    #   Ensure you have backed up any important data before proceeding.
+    #
+    #   <note markdown="1"> * Either `NodeIds` or `NodeLogicalIds` must be provided (or both), but
+    #     at least one is required.
+    #
+    #   * Each instance ID must follow the pattern `i-` followed by 17
+    #     hexadecimal characters (for example, `i-0123456789abcdef0`).
+    #
+    #   * For SageMaker HyperPod clusters using the Slurm workload manager,
+    #     you cannot replace instances that are configured as Slurm controller
+    #     nodes.
+    #
+    #    </note>
+    #
+    # @option params [Array<String>] :node_logical_ids
+    #   A list of logical node IDs to replace with new hardware. You can
+    #   specify between 1 and 25 logical node IDs.
+    #
+    #   The `NodeLogicalId` is a unique identifier that persists throughout
+    #   the node's lifecycle and can be used to track nodes that are still
+    #   being provisioned and don't yet have an EC2 instance ID assigned.
+    #
+    #   * Replace operations destroy all instance volumes (root and
+    #     secondary). Ensure you have backed up any important data before
+    #     proceeding.
+    #
+    #   * This parameter is only supported for clusters using `Continuous` as
+    #     the `NodeProvisioningMode`. For clusters using the default
+    #     provisioning mode, use `NodeIds` instead.
+    #
+    #   * Either `NodeIds` or `NodeLogicalIds` must be provided (or both), but
+    #     at least one is required.
+    #
+    # @return [Types::BatchReplaceClusterNodesResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
+    #
+    #   * {Types::BatchReplaceClusterNodesResponse#successful #successful} => Array&lt;String&gt;
+    #   * {Types::BatchReplaceClusterNodesResponse#failed #failed} => Array&lt;Types::BatchReplaceClusterNodesError&gt;
+    #   * {Types::BatchReplaceClusterNodesResponse#failed_node_logical_ids #failed_node_logical_ids} => Array&lt;Types::BatchReplaceClusterNodeLogicalIdsError&gt;
+    #   * {Types::BatchReplaceClusterNodesResponse#successful_node_logical_ids #successful_node_logical_ids} => Array&lt;String&gt;
+    #
+    # @example Request syntax with placeholder values
+    #
+    #   resp = client.batch_replace_cluster_nodes({
+    #     cluster_name: "ClusterNameOrArn", # required
+    #     node_ids: ["ClusterNodeId"],
+    #     node_logical_ids: ["ClusterNodeLogicalId"],
+    #   })
+    #
+    # @example Response structure
+    #
+    #   resp.successful #=> Array
+    #   resp.successful[0] #=> String
+    #   resp.failed #=> Array
+    #   resp.failed[0].node_id #=> String
+    #   resp.failed[0].error_code #=> String, one of "InstanceIdNotFound", "InvalidInstanceStatus", "InstanceIdInUse", "InternalServerError"
+    #   resp.failed[0].message #=> String
+    #   resp.failed_node_logical_ids #=> Array
+    #   resp.failed_node_logical_ids[0].node_logical_id #=> String
+    #   resp.failed_node_logical_ids[0].error_code #=> String, one of "InstanceIdNotFound", "InvalidInstanceStatus", "InstanceIdInUse", "InternalServerError"
+    #   resp.failed_node_logical_ids[0].message #=> String
+    #   resp.successful_node_logical_ids #=> Array
+    #   resp.successful_node_logical_ids[0] #=> String
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchReplaceClusterNodes AWS API Documentation
+    #
+    # @overload batch_replace_cluster_nodes(params = {})
+    # @param [Hash] params ({})
+    def batch_replace_cluster_nodes(params = {}, options = {})
+      req = build_request(:batch_replace_cluster_nodes, params)
+      req.send_request(options)
+    end
     # Creates an *action*. An action is a lineage tracking entity that
     # represents an action or activity. For example, a model deployment or
     # an HPO job. Generally, an action involves at least one input or output
@@ -2725,6 +2928,10 @@ module Aws::SageMaker
     #           accelerators: 1,
     #           v_cpu: 1.0,
     #           memory_in_gi_b: 1.0,
+    #           accelerator_partition: {
+    #             type: "mig-1g.5gb", # required, accepts mig-1g.5gb, mig-1g.10gb, mig-1g.18gb, mig-1g.20gb, mig-1g.23gb, mig-1g.35gb, mig-1g.45gb, mig-1g.47gb, mig-2g.10gb, mig-2g.20gb, mig-2g.35gb, mig-2g.45gb, mig-2g.47gb, mig-3g.20gb, mig-3g.40gb, mig-3g.71gb, mig-3g.90gb, mig-3g.93gb, mig-4g.20gb, mig-4g.40gb, mig-4g.71gb, mig-4g.90gb, mig-4g.93gb, mig-7g.40gb, mig-7g.80gb, mig-7g.141gb, mig-7g.180gb, mig-7g.186gb
+    #             count: 1, # required
+    #           },
     #         },
     #       ],
     #       resource_sharing_config: {
@@ -13912,6 +14119,8 @@ module Aws::SageMaker
     #   resp.compute_quota_config.compute_quota_resources[0].accelerators #=> Integer
     #   resp.compute_quota_config.compute_quota_resources[0].v_cpu #=> Float
     #   resp.compute_quota_config.compute_quota_resources[0].memory_in_gi_b #=> Float
+    #   resp.compute_quota_config.compute_quota_resources[0].accelerator_partition.type #=> String, one of "mig-1g.5gb", "mig-1g.10gb", "mig-1g.18gb", "mig-1g.20gb", "mig-1g.23gb", "mig-1g.35gb", "mig-1g.45gb", "mig-1g.47gb", "mig-2g.10gb", "mig-2g.20gb", "mig-2g.35gb", "mig-2g.45gb", "mig-2g.47gb", "mig-3g.20gb", "mig-3g.40gb", "mig-3g.71gb", "mig-3g.90gb", "mig-3g.93gb", "mig-4g.20gb", "mig-4g.40gb", "mig-4g.71gb", "mig-4g.90gb", "mig-4g.93gb", "mig-7g.40gb", "mig-7g.80gb", "mig-7g.141gb", "mig-7g.180gb", "mig-7g.186gb"
+    #   resp.compute_quota_config.compute_quota_resources[0].accelerator_partition.count #=> Integer
     #   resp.compute_quota_config.resource_sharing_config.strategy #=> String, one of "Lend", "DontLend", "LendAndBorrow"
     #   resp.compute_quota_config.resource_sharing_config.borrow_limit #=> Integer
     #   resp.compute_quota_config.preempt_team_tasks #=> String, one of "Never", "LowerPriority"
@@ -18411,7 +18620,7 @@ module Aws::SageMaker
     #   resp.available_spare_instance_count #=> Integer
     #   resp.total_ultra_server_count #=> Integer
     #   resp.target_resources #=> Array
-    #   resp.target_resources[0] #=> String, one of "training-job", "hyperpod-cluster"
+    #   resp.target_resources[0] #=> String, one of "training-job", "hyperpod-cluster", "endpoint"
     #   resp.reserved_capacity_summaries #=> Array
     #   resp.reserved_capacity_summaries[0].reserved_capacity_arn #=> String
     #   resp.reserved_capacity_summaries[0].reserved_capacity_type #=> String, one of "UltraServer", "Instance"
@@ -20350,6 +20559,7 @@ module Aws::SageMaker
     #   resp.cluster_node_summaries[0].instance_status.status #=> String, one of "Running", "Failure", "Pending", "ShuttingDown", "SystemUpdating", "DeepHealthCheckInProgress", "NotFound"
     #   resp.cluster_node_summaries[0].instance_status.message #=> String
     #   resp.cluster_node_summaries[0].ultra_server_info.id #=> String
+    #   resp.cluster_node_summaries[0].private_dns_hostname #=> String
     #
     # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ListClusterNodes AWS API Documentation
     #
@@ -20813,6 +21023,8 @@ module Aws::SageMaker
     #   resp.compute_quota_summaries[0].compute_quota_config.compute_quota_resources[0].accelerators #=> Integer
     #   resp.compute_quota_summaries[0].compute_quota_config.compute_quota_resources[0].v_cpu #=> Float
     #   resp.compute_quota_summaries[0].compute_quota_config.compute_quota_resources[0].memory_in_gi_b #=> Float
+    #   resp.compute_quota_summaries[0].compute_quota_config.compute_quota_resources[0].accelerator_partition.type #=> String, one of "mig-1g.5gb", "mig-1g.10gb", "mig-1g.18gb", "mig-1g.20gb", "mig-1g.23gb", "mig-1g.35gb", "mig-1g.45gb", "mig-1g.47gb", "mig-2g.10gb", "mig-2g.20gb", "mig-2g.35gb", "mig-2g.45gb", "mig-2g.47gb", "mig-3g.20gb", "mig-3g.40gb", "mig-3g.71gb", "mig-3g.90gb", "mig-3g.93gb", "mig-4g.20gb", "mig-4g.40gb", "mig-4g.71gb", "mig-4g.90gb", "mig-4g.93gb", "mig-7g.40gb", "mig-7g.80gb", "mig-7g.141gb", "mig-7g.180gb", "mig-7g.186gb"
+    #   resp.compute_quota_summaries[0].compute_quota_config.compute_quota_resources[0].accelerator_partition.count #=> Integer
     #   resp.compute_quota_summaries[0].compute_quota_config.resource_sharing_config.strategy #=> String, one of "Lend", "DontLend", "LendAndBorrow"
     #   resp.compute_quota_summaries[0].compute_quota_config.resource_sharing_config.borrow_limit #=> Integer
     #   resp.compute_quota_summaries[0].compute_quota_config.preempt_team_tasks #=> String, one of "Never", "LowerPriority"
@@ -25460,7 +25672,7 @@ module Aws::SageMaker
     #   resp.training_plan_summaries[0].in_use_instance_count #=> Integer
     #   resp.training_plan_summaries[0].total_ultra_server_count #=> Integer
     #   resp.training_plan_summaries[0].target_resources #=> Array
-    #   resp.training_plan_summaries[0].target_resources[0] #=> String, one of "training-job", "hyperpod-cluster"
+    #   resp.training_plan_summaries[0].target_resources[0] #=> String, one of "training-job", "hyperpod-cluster", "endpoint"
     #   resp.training_plan_summaries[0].reserved_capacity_summaries #=> Array
     #   resp.training_plan_summaries[0].reserved_capacity_summaries[0].reserved_capacity_arn #=> String
     #   resp.training_plan_summaries[0].reserved_capacity_summaries[0].reserved_capacity_type #=> String, one of "UltraServer", "Instance"
@@ -26481,7 +26693,7 @@ module Aws::SageMaker
     #
     # @option params [required, Array<String>] :target_resources
     #   The target resources (e.g., SageMaker Training Jobs, SageMaker
-    #   HyperPod) to search for in the offerings.
+    #   HyperPod, SageMaker Endpoints) to search for in the offerings.
     #
     #   Training plans are specific to their target resource.
     #
@@ -26491,6 +26703,10 @@ module Aws::SageMaker
     #   * A training plan for HyperPod clusters can be used exclusively to
     #     provide compute resources to a cluster's instance group.
     #
+    #   * A training plan for SageMaker endpoints can be used exclusively to
+    #     provide compute resources to SageMaker endpoints for model
+    #     deployment.
+    #
     # @return [Types::SearchTrainingPlanOfferingsResponse] Returns a {Seahorse::Client::Response response} object which responds to the following methods:
     #
     #   * {Types::SearchTrainingPlanOfferingsResponse#training_plan_offerings #training_plan_offerings} => Array&lt;Types::TrainingPlanOffering&gt;
@@ -26505,7 +26721,7 @@ module Aws::SageMaker
     #     start_time_after: Time.now,
     #     end_time_before: Time.now,
     #     duration_hours: 1, # required
-    #     target_resources: ["training-job"], # required, accepts training-job, hyperpod-cluster
+    #     target_resources: ["training-job"], # required, accepts training-job, hyperpod-cluster, endpoint
     #   })
     #
     # @example Response structure
@@ -26513,7 +26729,7 @@ module Aws::SageMaker
     #   resp.training_plan_offerings #=> Array
     #   resp.training_plan_offerings[0].training_plan_offering_id #=> String
     #   resp.training_plan_offerings[0].target_resources #=> Array
-    #   resp.training_plan_offerings[0].target_resources[0] #=> String, one of "training-job", "hyperpod-cluster"
+    #   resp.training_plan_offerings[0].target_resources[0] #=> String, one of "training-job", "hyperpod-cluster", "endpoint"
     #   resp.training_plan_offerings[0].requested_start_time_after #=> Time
     #   resp.training_plan_offerings[0].requested_end_time_before #=> Time
     #   resp.training_plan_offerings[0].duration_hours #=> Integer
@@ -27951,6 +28167,10 @@ module Aws::SageMaker
     #           accelerators: 1,
     #           v_cpu: 1.0,
     #           memory_in_gi_b: 1.0,
+    #           accelerator_partition: {
+    #             type: "mig-1g.5gb", # required, accepts mig-1g.5gb, mig-1g.10gb, mig-1g.18gb, mig-1g.20gb, mig-1g.23gb, mig-1g.35gb, mig-1g.45gb, mig-1g.47gb, mig-2g.10gb, mig-2g.20gb, mig-2g.35gb, mig-2g.45gb, mig-2g.47gb, mig-3g.20gb, mig-3g.40gb, mig-3g.71gb, mig-3g.90gb, mig-3g.93gb, mig-4g.20gb, mig-4g.40gb, mig-4g.71gb, mig-4g.90gb, mig-4g.93gb, mig-7g.40gb, mig-7g.80gb, mig-7g.141gb, mig-7g.180gb, mig-7g.186gb
+    #             count: 1, # required
+    #           },
     #         },
     #       ],
     #       resource_sharing_config: {
@@ -31359,7 +31579,7 @@ module Aws::SageMaker
         tracer: tracer
       )
       context[:gem_name] = 'aws-sdk-sagemaker'
-      context[:gem_version] = '1.339.0'
+      context[:gem_version] = '1.340.0'
       Seahorse::Client::Request.new(handlers, context)
     end

data/lib/aws-sdk-sagemaker/client_api.rb CHANGED Viewed

@@ -14,6 +14,8 @@ module Aws::SageMaker
     include Seahorse::Model
+    AcceleratorPartitionConfig = Shapes::StructureShape.new(name: 'AcceleratorPartitionConfig')
+    AcceleratorPartitionConfigCountInteger = Shapes::IntegerShape.new(name: 'AcceleratorPartitionConfigCountInteger')
     AcceleratorsAmount = Shapes::IntegerShape.new(name: 'AcceleratorsAmount')
     Accept = Shapes::StringShape.new(name: 'Accept')
     AcceptEula = Shapes::BooleanShape.new(name: 'AcceptEula')
@@ -211,6 +213,24 @@ module Aws::SageMaker
     BatchDescribeModelPackageInput = Shapes::StructureShape.new(name: 'BatchDescribeModelPackageInput')
     BatchDescribeModelPackageOutput = Shapes::StructureShape.new(name: 'BatchDescribeModelPackageOutput')
     BatchDescribeModelPackageSummary = Shapes::StructureShape.new(name: 'BatchDescribeModelPackageSummary')
+    BatchRebootClusterNodeLogicalIdsError = Shapes::StructureShape.new(name: 'BatchRebootClusterNodeLogicalIdsError')
+    BatchRebootClusterNodeLogicalIdsErrors = Shapes::ListShape.new(name: 'BatchRebootClusterNodeLogicalIdsErrors')
+    BatchRebootClusterNodesError = Shapes::StructureShape.new(name: 'BatchRebootClusterNodesError')
+    BatchRebootClusterNodesErrorCode = Shapes::StringShape.new(name: 'BatchRebootClusterNodesErrorCode')
+    BatchRebootClusterNodesErrors = Shapes::ListShape.new(name: 'BatchRebootClusterNodesErrors')
+    BatchRebootClusterNodesRequest = Shapes::StructureShape.new(name: 'BatchRebootClusterNodesRequest')
+    BatchRebootClusterNodesRequestNodeIdsList = Shapes::ListShape.new(name: 'BatchRebootClusterNodesRequestNodeIdsList')
+    BatchRebootClusterNodesRequestNodeLogicalIdsList = Shapes::ListShape.new(name: 'BatchRebootClusterNodesRequestNodeLogicalIdsList')
+    BatchRebootClusterNodesResponse = Shapes::StructureShape.new(name: 'BatchRebootClusterNodesResponse')
+    BatchReplaceClusterNodeLogicalIdsError = Shapes::StructureShape.new(name: 'BatchReplaceClusterNodeLogicalIdsError')
+    BatchReplaceClusterNodeLogicalIdsErrors = Shapes::ListShape.new(name: 'BatchReplaceClusterNodeLogicalIdsErrors')
+    BatchReplaceClusterNodesError = Shapes::StructureShape.new(name: 'BatchReplaceClusterNodesError')
+    BatchReplaceClusterNodesErrorCode = Shapes::StringShape.new(name: 'BatchReplaceClusterNodesErrorCode')
+    BatchReplaceClusterNodesErrors = Shapes::ListShape.new(name: 'BatchReplaceClusterNodesErrors')
+    BatchReplaceClusterNodesRequest = Shapes::StructureShape.new(name: 'BatchReplaceClusterNodesRequest')
+    BatchReplaceClusterNodesRequestNodeIdsList = Shapes::ListShape.new(name: 'BatchReplaceClusterNodesRequestNodeIdsList')
+    BatchReplaceClusterNodesRequestNodeLogicalIdsList = Shapes::ListShape.new(name: 'BatchReplaceClusterNodesRequestNodeLogicalIdsList')
+    BatchReplaceClusterNodesResponse = Shapes::StructureShape.new(name: 'BatchReplaceClusterNodesResponse')
     BatchStrategy = Shapes::StringShape.new(name: 'BatchStrategy')
     BatchTransformInput = Shapes::StructureShape.new(name: 'BatchTransformInput')
     BestObjectiveNotImproving = Shapes::StructureShape.new(name: 'BestObjectiveNotImproving')
@@ -1569,6 +1589,7 @@ module Aws::SageMaker
     LocalPath = Shapes::StringShape.new(name: 'LocalPath')
     Long = Shapes::IntegerShape.new(name: 'Long')
     LongS3Uri = Shapes::StringShape.new(name: 'LongS3Uri')
+    MIGProfileType = Shapes::StringShape.new(name: 'MIGProfileType')
     MLFramework = Shapes::StringShape.new(name: 'MLFramework')
     MajorMinorVersion = Shapes::StringShape.new(name: 'MajorMinorVersion')
     ManagedInstanceScalingMaxInstanceCount = Shapes::IntegerShape.new(name: 'ManagedInstanceScalingMaxInstanceCount')
@@ -2708,6 +2729,10 @@ module Aws::SageMaker
     WorkteamName = Shapes::StringShape.new(name: 'WorkteamName')
     Workteams = Shapes::ListShape.new(name: 'Workteams')
+    AcceleratorPartitionConfig.add_member(:type, Shapes::ShapeRef.new(shape: MIGProfileType, required: true, location_name: "Type"))
+    AcceleratorPartitionConfig.add_member(:count, Shapes::ShapeRef.new(shape: AcceleratorPartitionConfigCountInteger, required: true, location_name: "Count", metadata: {"box" => true}))
+    AcceleratorPartitionConfig.struct_class = Types::AcceleratorPartitionConfig
     ActionSource.add_member(:source_uri, Shapes::ShapeRef.new(shape: SourceUri, required: true, location_name: "SourceUri"))
     ActionSource.add_member(:source_type, Shapes::ShapeRef.new(shape: String256, location_name: "SourceType"))
     ActionSource.add_member(:source_id, Shapes::ShapeRef.new(shape: String256, location_name: "SourceId"))
@@ -3210,6 +3235,64 @@ module Aws::SageMaker
     BatchDescribeModelPackageSummary.add_member(:model_approval_status, Shapes::ShapeRef.new(shape: ModelApprovalStatus, location_name: "ModelApprovalStatus"))
     BatchDescribeModelPackageSummary.struct_class = Types::BatchDescribeModelPackageSummary
+    BatchRebootClusterNodeLogicalIdsError.add_member(:node_logical_id, Shapes::ShapeRef.new(shape: ClusterNodeLogicalId, required: true, location_name: "NodeLogicalId"))
+    BatchRebootClusterNodeLogicalIdsError.add_member(:error_code, Shapes::ShapeRef.new(shape: BatchRebootClusterNodesErrorCode, required: true, location_name: "ErrorCode"))
+    BatchRebootClusterNodeLogicalIdsError.add_member(:message, Shapes::ShapeRef.new(shape: String, required: true, location_name: "Message"))
+    BatchRebootClusterNodeLogicalIdsError.struct_class = Types::BatchRebootClusterNodeLogicalIdsError
+    BatchRebootClusterNodeLogicalIdsErrors.member = Shapes::ShapeRef.new(shape: BatchRebootClusterNodeLogicalIdsError)
+    BatchRebootClusterNodesError.add_member(:node_id, Shapes::ShapeRef.new(shape: ClusterNodeId, required: true, location_name: "NodeId"))
+    BatchRebootClusterNodesError.add_member(:error_code, Shapes::ShapeRef.new(shape: BatchRebootClusterNodesErrorCode, required: true, location_name: "ErrorCode"))
+    BatchRebootClusterNodesError.add_member(:message, Shapes::ShapeRef.new(shape: String, required: true, location_name: "Message"))
+    BatchRebootClusterNodesError.struct_class = Types::BatchRebootClusterNodesError
+    BatchRebootClusterNodesErrors.member = Shapes::ShapeRef.new(shape: BatchRebootClusterNodesError)
+    BatchRebootClusterNodesRequest.add_member(:cluster_name, Shapes::ShapeRef.new(shape: ClusterNameOrArn, required: true, location_name: "ClusterName"))
+    BatchRebootClusterNodesRequest.add_member(:node_ids, Shapes::ShapeRef.new(shape: BatchRebootClusterNodesRequestNodeIdsList, location_name: "NodeIds"))
+    BatchRebootClusterNodesRequest.add_member(:node_logical_ids, Shapes::ShapeRef.new(shape: BatchRebootClusterNodesRequestNodeLogicalIdsList, location_name: "NodeLogicalIds"))
+    BatchRebootClusterNodesRequest.struct_class = Types::BatchRebootClusterNodesRequest
+    BatchRebootClusterNodesRequestNodeIdsList.member = Shapes::ShapeRef.new(shape: ClusterNodeId)
+    BatchRebootClusterNodesRequestNodeLogicalIdsList.member = Shapes::ShapeRef.new(shape: ClusterNodeLogicalId)
+    BatchRebootClusterNodesResponse.add_member(:successful, Shapes::ShapeRef.new(shape: ClusterNodeIds, location_name: "Successful"))
+    BatchRebootClusterNodesResponse.add_member(:failed, Shapes::ShapeRef.new(shape: BatchRebootClusterNodesErrors, location_name: "Failed"))
+    BatchRebootClusterNodesResponse.add_member(:failed_node_logical_ids, Shapes::ShapeRef.new(shape: BatchRebootClusterNodeLogicalIdsErrors, location_name: "FailedNodeLogicalIds"))
+    BatchRebootClusterNodesResponse.add_member(:successful_node_logical_ids, Shapes::ShapeRef.new(shape: ClusterNodeLogicalIdList, location_name: "SuccessfulNodeLogicalIds"))
+    BatchRebootClusterNodesResponse.struct_class = Types::BatchRebootClusterNodesResponse
+    BatchReplaceClusterNodeLogicalIdsError.add_member(:node_logical_id, Shapes::ShapeRef.new(shape: ClusterNodeLogicalId, required: true, location_name: "NodeLogicalId"))
+    BatchReplaceClusterNodeLogicalIdsError.add_member(:error_code, Shapes::ShapeRef.new(shape: BatchReplaceClusterNodesErrorCode, required: true, location_name: "ErrorCode"))
+    BatchReplaceClusterNodeLogicalIdsError.add_member(:message, Shapes::ShapeRef.new(shape: String, required: true, location_name: "Message"))
+    BatchReplaceClusterNodeLogicalIdsError.struct_class = Types::BatchReplaceClusterNodeLogicalIdsError
+    BatchReplaceClusterNodeLogicalIdsErrors.member = Shapes::ShapeRef.new(shape: BatchReplaceClusterNodeLogicalIdsError)
+    BatchReplaceClusterNodesError.add_member(:node_id, Shapes::ShapeRef.new(shape: ClusterNodeId, required: true, location_name: "NodeId"))
+    BatchReplaceClusterNodesError.add_member(:error_code, Shapes::ShapeRef.new(shape: BatchReplaceClusterNodesErrorCode, required: true, location_name: "ErrorCode"))
+    BatchReplaceClusterNodesError.add_member(:message, Shapes::ShapeRef.new(shape: String, required: true, location_name: "Message"))
+    BatchReplaceClusterNodesError.struct_class = Types::BatchReplaceClusterNodesError
+    BatchReplaceClusterNodesErrors.member = Shapes::ShapeRef.new(shape: BatchReplaceClusterNodesError)
+    BatchReplaceClusterNodesRequest.add_member(:cluster_name, Shapes::ShapeRef.new(shape: ClusterNameOrArn, required: true, location_name: "ClusterName"))
+    BatchReplaceClusterNodesRequest.add_member(:node_ids, Shapes::ShapeRef.new(shape: BatchReplaceClusterNodesRequestNodeIdsList, location_name: "NodeIds"))
+    BatchReplaceClusterNodesRequest.add_member(:node_logical_ids, Shapes::ShapeRef.new(shape: BatchReplaceClusterNodesRequestNodeLogicalIdsList, location_name: "NodeLogicalIds"))
+    BatchReplaceClusterNodesRequest.struct_class = Types::BatchReplaceClusterNodesRequest
+    BatchReplaceClusterNodesRequestNodeIdsList.member = Shapes::ShapeRef.new(shape: ClusterNodeId)
+    BatchReplaceClusterNodesRequestNodeLogicalIdsList.member = Shapes::ShapeRef.new(shape: ClusterNodeLogicalId)
+    BatchReplaceClusterNodesResponse.add_member(:successful, Shapes::ShapeRef.new(shape: ClusterNodeIds, location_name: "Successful"))
+    BatchReplaceClusterNodesResponse.add_member(:failed, Shapes::ShapeRef.new(shape: BatchReplaceClusterNodesErrors, location_name: "Failed"))
+    BatchReplaceClusterNodesResponse.add_member(:failed_node_logical_ids, Shapes::ShapeRef.new(shape: BatchReplaceClusterNodeLogicalIdsErrors, location_name: "FailedNodeLogicalIds"))
+    BatchReplaceClusterNodesResponse.add_member(:successful_node_logical_ids, Shapes::ShapeRef.new(shape: ClusterNodeLogicalIdList, location_name: "SuccessfulNodeLogicalIds"))
+    BatchReplaceClusterNodesResponse.struct_class = Types::BatchReplaceClusterNodesResponse
     BatchTransformInput.add_member(:data_captured_destination_s3_uri, Shapes::ShapeRef.new(shape: DestinationS3Uri, required: true, location_name: "DataCapturedDestinationS3Uri"))
     BatchTransformInput.add_member(:dataset_format, Shapes::ShapeRef.new(shape: MonitoringDatasetFormat, required: true, location_name: "DatasetFormat"))
     BatchTransformInput.add_member(:local_path, Shapes::ShapeRef.new(shape: ProcessingLocalPath, required: true, location_name: "LocalPath"))
@@ -3561,6 +3644,7 @@ module Aws::SageMaker
     ClusterNodeSummary.add_member(:last_software_update_time, Shapes::ShapeRef.new(shape: Timestamp, location_name: "LastSoftwareUpdateTime"))
     ClusterNodeSummary.add_member(:instance_status, Shapes::ShapeRef.new(shape: ClusterInstanceStatusDetails, required: true, location_name: "InstanceStatus"))
     ClusterNodeSummary.add_member(:ultra_server_info, Shapes::ShapeRef.new(shape: UltraServerInfo, location_name: "UltraServerInfo"))
+    ClusterNodeSummary.add_member(:private_dns_hostname, Shapes::ShapeRef.new(shape: ClusterPrivateDnsHostname, location_name: "PrivateDnsHostname"))
     ClusterNodeSummary.struct_class = Types::ClusterNodeSummary
     ClusterOrchestrator.add_member(:eks, Shapes::ShapeRef.new(shape: ClusterOrchestratorEksConfig, required: true, location_name: "Eks"))
@@ -3703,6 +3787,7 @@ module Aws::SageMaker
     ComputeQuotaResourceConfig.add_member(:accelerators, Shapes::ShapeRef.new(shape: AcceleratorsAmount, location_name: "Accelerators"))
     ComputeQuotaResourceConfig.add_member(:v_cpu, Shapes::ShapeRef.new(shape: VCpuAmount, location_name: "VCpu"))
     ComputeQuotaResourceConfig.add_member(:memory_in_gi_b, Shapes::ShapeRef.new(shape: MemoryInGiBAmount, location_name: "MemoryInGiB"))
+    ComputeQuotaResourceConfig.add_member(:accelerator_partition, Shapes::ShapeRef.new(shape: AcceleratorPartitionConfig, location_name: "AcceleratorPartition"))
     ComputeQuotaResourceConfig.struct_class = Types::ComputeQuotaResourceConfig
     ComputeQuotaResourceConfigList.member = Shapes::ShapeRef.new(shape: ComputeQuotaResourceConfig)
@@ -12019,6 +12104,24 @@ module Aws::SageMaker
         o.output = Shapes::ShapeRef.new(shape: BatchDescribeModelPackageOutput)
       end)
+      api.add_operation(:batch_reboot_cluster_nodes, Seahorse::Model::Operation.new.tap do |o|
+        o.name = "BatchRebootClusterNodes"
+        o.http_method = "POST"
+        o.http_request_uri = "/"
+        o.input = Shapes::ShapeRef.new(shape: BatchRebootClusterNodesRequest)
+        o.output = Shapes::ShapeRef.new(shape: BatchRebootClusterNodesResponse)
+        o.errors << Shapes::ShapeRef.new(shape: ResourceNotFound)
+      end)
+      api.add_operation(:batch_replace_cluster_nodes, Seahorse::Model::Operation.new.tap do |o|
+        o.name = "BatchReplaceClusterNodes"
+        o.http_method = "POST"
+        o.http_request_uri = "/"
+        o.input = Shapes::ShapeRef.new(shape: BatchReplaceClusterNodesRequest)
+        o.output = Shapes::ShapeRef.new(shape: BatchReplaceClusterNodesResponse)
+        o.errors << Shapes::ShapeRef.new(shape: ResourceNotFound)
+      end)
       api.add_operation(:create_action, Seahorse::Model::Operation.new.tap do |o|
         o.name = "CreateAction"
         o.http_method = "POST"

data/lib/aws-sdk-sagemaker/types.rb CHANGED Viewed

@@ -10,6 +10,33 @@
 module Aws::SageMaker
   module Types
+    # Configuration for allocating accelerator partitions.
+    #
+    # @!attribute [rw] type
+    #   The Multi-Instance GPU (MIG) profile type that defines the partition
+    #   configuration. The profile specifies the compute and memory
+    #   allocation for each partition instance. The available profile types
+    #   depend on the instance type specified in the compute quota
+    #   configuration.
+    #   @return [String]
+    #
+    # @!attribute [rw] count
+    #   The number of accelerator partitions to allocate with the specified
+    #   partition type. If you don't specify a value for vCPU and
+    #   MemoryInGiB, SageMaker AI automatically allocates ratio-based values
+    #   for those parameters based on the accelerator partition count you
+    #   provide.
+    #   @return [Integer]
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/AcceleratorPartitionConfig AWS API Documentation
+    #
+    class AcceleratorPartitionConfig < Struct.new(
+      :type,
+      :count)
+      SENSITIVE = []
+      include Aws::Structure
+    end
     # A structure describing the source of an action.
     #
     # @!attribute [rw] source_uri
@@ -3452,6 +3479,348 @@ module Aws::SageMaker
       include Aws::Structure
     end
+    # Represents an error encountered when rebooting a node (identified by
+    # its logical node ID) from a SageMaker HyperPod cluster.
+    #
+    # @!attribute [rw] node_logical_id
+    #   The logical node ID of the node that encountered an error during the
+    #   reboot operation.
+    #   @return [String]
+    #
+    # @!attribute [rw] error_code
+    #   The error code associated with the error encountered when rebooting
+    #   a node by logical node ID.
+    #
+    #   Possible values:
+    #
+    #   * `InstanceIdNotFound`: The node does not exist in the specified
+    #     cluster.
+    #
+    #   * `InvalidInstanceStatus`: The node is in a state that does not
+    #     allow rebooting. Wait for the node to finish any ongoing changes
+    #     before retrying.
+    #
+    #   * `InstanceIdInUse`: Another operation is already in progress for
+    #     this node. Wait for the operation to complete before retrying.
+    #
+    #   * `InternalServerError`: An internal error occurred while processing
+    #     this node.
+    #   @return [String]
+    #
+    # @!attribute [rw] message
+    #   A human-readable message describing the error encountered when
+    #   rebooting a node by logical node ID.
+    #   @return [String]
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchRebootClusterNodeLogicalIdsError AWS API Documentation
+    #
+    class BatchRebootClusterNodeLogicalIdsError < Struct.new(
+      :node_logical_id,
+      :error_code,
+      :message)
+      SENSITIVE = []
+      include Aws::Structure
+    end
+    # Represents an error encountered when rebooting a node from a SageMaker
+    # HyperPod cluster.
+    #
+    # @!attribute [rw] node_id
+    #   The EC2 instance ID of the node that encountered an error during the
+    #   reboot operation.
+    #   @return [String]
+    #
+    # @!attribute [rw] error_code
+    #   The error code associated with the error encountered when rebooting
+    #   a node.
+    #
+    #   Possible values:
+    #
+    #   * `InstanceIdNotFound`: The instance does not exist in the specified
+    #     cluster.
+    #
+    #   * `InvalidInstanceStatus`: The instance is in a state that does not
+    #     allow rebooting. Wait for the instance to finish any ongoing
+    #     changes before retrying.
+    #
+    #   * `InstanceIdInUse`: Another operation is already in progress for
+    #     this node. Wait for the operation to complete before retrying.
+    #
+    #   * `InternalServerError`: An internal error occurred while processing
+    #     this node.
+    #   @return [String]
+    #
+    # @!attribute [rw] message
+    #   A human-readable message describing the error encountered when
+    #   rebooting a node.
+    #   @return [String]
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchRebootClusterNodesError AWS API Documentation
+    #
+    class BatchRebootClusterNodesError < Struct.new(
+      :node_id,
+      :error_code,
+      :message)
+      SENSITIVE = []
+      include Aws::Structure
+    end
+    # @!attribute [rw] cluster_name
+    #   The name or Amazon Resource Name (ARN) of the SageMaker HyperPod
+    #   cluster containing the nodes to reboot.
+    #   @return [String]
+    #
+    # @!attribute [rw] node_ids
+    #   A list of EC2 instance IDs to reboot using soft recovery. You can
+    #   specify between 1 and 25 instance IDs.
+    #
+    #   <note markdown="1"> * Either `NodeIds` or `NodeLogicalIds` must be provided (or both),
+    #     but at least one is required.
+    #
+    #   * Each instance ID must follow the pattern `i-` followed by 17
+    #     hexadecimal characters (for example, `i-0123456789abcdef0`).
+    #
+    #    </note>
+    #   @return [Array<String>]
+    #
+    # @!attribute [rw] node_logical_ids
+    #   A list of logical node IDs to reboot using soft recovery. You can
+    #   specify between 1 and 25 logical node IDs.
+    #
+    #   The `NodeLogicalId` is a unique identifier that persists throughout
+    #   the node's lifecycle and can be used to track nodes that are still
+    #   being provisioned and don't yet have an EC2 instance ID assigned.
+    #
+    #   * This parameter is only supported for clusters using `Continuous`
+    #     as the `NodeProvisioningMode`. For clusters using the default
+    #     provisioning mode, use `NodeIds` instead.
+    #
+    #   * Either `NodeIds` or `NodeLogicalIds` must be provided (or both),
+    #     but at least one is required.
+    #   @return [Array<String>]
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchRebootClusterNodesRequest AWS API Documentation
+    #
+    class BatchRebootClusterNodesRequest < Struct.new(
+      :cluster_name,
+      :node_ids,
+      :node_logical_ids)
+      SENSITIVE = []
+      include Aws::Structure
+    end
+    # @!attribute [rw] successful
+    #   A list of EC2 instance IDs for which the reboot operation was
+    #   successfully initiated.
+    #   @return [Array<String>]
+    #
+    # @!attribute [rw] failed
+    #   A list of errors encountered for EC2 instance IDs that could not be
+    #   rebooted. Each error includes the instance ID, an error code, and a
+    #   descriptive message.
+    #   @return [Array<Types::BatchRebootClusterNodesError>]
+    #
+    # @!attribute [rw] failed_node_logical_ids
+    #   A list of errors encountered for logical node IDs that could not be
+    #   rebooted. Each error includes the logical node ID, an error code,
+    #   and a descriptive message. This field is only present when
+    #   `NodeLogicalIds` were provided in the request.
+    #   @return [Array<Types::BatchRebootClusterNodeLogicalIdsError>]
+    #
+    # @!attribute [rw] successful_node_logical_ids
+    #   A list of logical node IDs for which the reboot operation was
+    #   successfully initiated. This field is only present when
+    #   `NodeLogicalIds` were provided in the request.
+    #   @return [Array<String>]
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchRebootClusterNodesResponse AWS API Documentation
+    #
+    class BatchRebootClusterNodesResponse < Struct.new(
+      :successful,
+      :failed,
+      :failed_node_logical_ids,
+      :successful_node_logical_ids)
+      SENSITIVE = []
+      include Aws::Structure
+    end
+    # Represents an error encountered when replacing a node (identified by
+    # its logical node ID) in a SageMaker HyperPod cluster.
+    #
+    # @!attribute [rw] node_logical_id
+    #   The logical node ID of the node that encountered an error during the
+    #   replacement operation.
+    #   @return [String]
+    #
+    # @!attribute [rw] error_code
+    #   The error code associated with the error encountered when replacing
+    #   a node by logical node ID.
+    #
+    #   Possible values:
+    #
+    #   * `InstanceIdNotFound`: The node does not exist in the specified
+    #     cluster.
+    #
+    #   * `InvalidInstanceStatus`: The node is in a state that does not
+    #     allow replacement. Wait for the node to finish any ongoing changes
+    #     before retrying.
+    #
+    #   * `InstanceIdInUse`: Another operation is already in progress for
+    #     this node. Wait for the operation to complete before retrying.
+    #
+    #   * `InternalServerError`: An internal error occurred while processing
+    #     this node.
+    #   @return [String]
+    #
+    # @!attribute [rw] message
+    #   A human-readable message describing the error encountered when
+    #   replacing a node by logical node ID.
+    #   @return [String]
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchReplaceClusterNodeLogicalIdsError AWS API Documentation
+    #
+    class BatchReplaceClusterNodeLogicalIdsError < Struct.new(
+      :node_logical_id,
+      :error_code,
+      :message)
+      SENSITIVE = []
+      include Aws::Structure
+    end
+    # Represents an error encountered when replacing a node in a SageMaker
+    # HyperPod cluster.
+    #
+    # @!attribute [rw] node_id
+    #   The EC2 instance ID of the node that encountered an error during the
+    #   replacement operation.
+    #   @return [String]
+    #
+    # @!attribute [rw] error_code
+    #   The error code associated with the error encountered when replacing
+    #   a node.
+    #
+    #   Possible values:
+    #
+    #   * `InstanceIdNotFound`: The instance does not exist in the specified
+    #     cluster.
+    #
+    #   * `InvalidInstanceStatus`: The instance is in a state that does not
+    #     allow replacement. Wait for the instance to finish any ongoing
+    #     changes before retrying.
+    #
+    #   * `InstanceIdInUse`: Another operation is already in progress for
+    #     this node. Wait for the operation to complete before retrying.
+    #
+    #   * `InternalServerError`: An internal error occurred while processing
+    #     this node.
+    #   @return [String]
+    #
+    # @!attribute [rw] message
+    #   A human-readable message describing the error encountered when
+    #   replacing a node.
+    #   @return [String]
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchReplaceClusterNodesError AWS API Documentation
+    #
+    class BatchReplaceClusterNodesError < Struct.new(
+      :node_id,
+      :error_code,
+      :message)
+      SENSITIVE = []
+      include Aws::Structure
+    end
+    # @!attribute [rw] cluster_name
+    #   The name or Amazon Resource Name (ARN) of the SageMaker HyperPod
+    #   cluster containing the nodes to replace.
+    #   @return [String]
+    #
+    # @!attribute [rw] node_ids
+    #   A list of EC2 instance IDs to replace with new hardware. You can
+    #   specify between 1 and 25 instance IDs.
+    #
+    #   Replace operations destroy all instance volumes (root and
+    #   secondary). Ensure you have backed up any important data before
+    #   proceeding.
+    #
+    #   <note markdown="1"> * Either `NodeIds` or `NodeLogicalIds` must be provided (or both),
+    #     but at least one is required.
+    #
+    #   * Each instance ID must follow the pattern `i-` followed by 17
+    #     hexadecimal characters (for example, `i-0123456789abcdef0`).
+    #
+    #   * For SageMaker HyperPod clusters using the Slurm workload manager,
+    #     you cannot replace instances that are configured as Slurm
+    #     controller nodes.
+    #
+    #    </note>
+    #   @return [Array<String>]
+    #
+    # @!attribute [rw] node_logical_ids
+    #   A list of logical node IDs to replace with new hardware. You can
+    #   specify between 1 and 25 logical node IDs.
+    #
+    #   The `NodeLogicalId` is a unique identifier that persists throughout
+    #   the node's lifecycle and can be used to track nodes that are still
+    #   being provisioned and don't yet have an EC2 instance ID assigned.
+    #
+    #   * Replace operations destroy all instance volumes (root and
+    #     secondary). Ensure you have backed up any important data before
+    #     proceeding.
+    #
+    #   * This parameter is only supported for clusters using `Continuous`
+    #     as the `NodeProvisioningMode`. For clusters using the default
+    #     provisioning mode, use `NodeIds` instead.
+    #
+    #   * Either `NodeIds` or `NodeLogicalIds` must be provided (or both),
+    #     but at least one is required.
+    #   @return [Array<String>]
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchReplaceClusterNodesRequest AWS API Documentation
+    #
+    class BatchReplaceClusterNodesRequest < Struct.new(
+      :cluster_name,
+      :node_ids,
+      :node_logical_ids)
+      SENSITIVE = []
+      include Aws::Structure
+    end
+    # @!attribute [rw] successful
+    #   A list of EC2 instance IDs for which the replacement operation was
+    #   successfully initiated.
+    #   @return [Array<String>]
+    #
+    # @!attribute [rw] failed
+    #   A list of errors encountered for EC2 instance IDs that could not be
+    #   replaced. Each error includes the instance ID, an error code, and a
+    #   descriptive message.
+    #   @return [Array<Types::BatchReplaceClusterNodesError>]
+    #
+    # @!attribute [rw] failed_node_logical_ids
+    #   A list of errors encountered for logical node IDs that could not be
+    #   replaced. Each error includes the logical node ID, an error code,
+    #   and a descriptive message. This field is only present when
+    #   `NodeLogicalIds` were provided in the request.
+    #   @return [Array<Types::BatchReplaceClusterNodeLogicalIdsError>]
+    #
+    # @!attribute [rw] successful_node_logical_ids
+    #   A list of logical node IDs for which the replacement operation was
+    #   successfully initiated. This field is only present when
+    #   `NodeLogicalIds` were provided in the request.
+    #   @return [Array<String>]
+    #
+    # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchReplaceClusterNodesResponse AWS API Documentation
+    #
+    class BatchReplaceClusterNodesResponse < Struct.new(
+      :successful,
+      :failed,
+      :failed_node_logical_ids,
+      :successful_node_logical_ids)
+      SENSITIVE = []
+      include Aws::Structure
+    end
     # Input object for the batch transform job.
     #
     # @!attribute [rw] data_captured_destination_s3_uri
@@ -5536,6 +5905,10 @@ module Aws::SageMaker
     #   Contains information about the UltraServer.
     #   @return [Types::UltraServerInfo]
     #
+    # @!attribute [rw] private_dns_hostname
+    #   The private DNS hostname of the SageMaker HyperPod cluster node.
+    #   @return [String]
+    #
     # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ClusterNodeSummary AWS API Documentation
     #
     class ClusterNodeSummary < Struct.new(
@@ -5546,7 +5919,8 @@ module Aws::SageMaker
       :launch_time,
       :last_software_update_time,
       :instance_status,
-      :ultra_server_info)
+      :ultra_server_info,
+      :private_dns_hostname)
       SENSITIVE = []
       include Aws::Structure
     end
@@ -6294,6 +6668,11 @@ module Aws::SageMaker
     #   Accelerators are set to 0.
     #   @return [Float]
     #
+    # @!attribute [rw] accelerator_partition
+    #   The accelerator partition configuration for fractional GPU
+    #   allocation.
+    #   @return [Types::AcceleratorPartitionConfig]
+    #
     # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ComputeQuotaResourceConfig AWS API Documentation
     #
     class ComputeQuotaResourceConfig < Struct.new(
@@ -6301,7 +6680,8 @@ module Aws::SageMaker
       :count,
       :accelerators,
       :v_cpu,
-      :memory_in_gi_b)
+      :memory_in_gi_b,
+      :accelerator_partition)
       SENSITIVE = []
       include Aws::Structure
     end
@@ -20816,7 +21196,7 @@ module Aws::SageMaker
     #
     # @!attribute [rw] target_resources
     #   The target resources (e.g., SageMaker Training Jobs, SageMaker
-    #   HyperPod) that can use this training plan.
+    #   HyperPod, SageMaker Endpoints) that can use this training plan.
     #
     #   Training plans are specific to their target resource.
     #
@@ -20825,6 +21205,10 @@ module Aws::SageMaker
     #
     #   * A training plan for HyperPod clusters can be used exclusively to
     #     provide compute resources to a cluster's instance group.
+    #
+    #   * A training plan for SageMaker endpoints can be used exclusively to
+    #     provide compute resources to SageMaker endpoints for model
+    #     deployment.
     #   @return [Array<String>]
     #
     # @!attribute [rw] reserved_capacity_summaries
@@ -45661,7 +46045,7 @@ module Aws::SageMaker
     #
     # @!attribute [rw] target_resources
     #   The target resources (e.g., SageMaker Training Jobs, SageMaker
-    #   HyperPod) to search for in the offerings.
+    #   HyperPod, SageMaker Endpoints) to search for in the offerings.
     #
     #   Training plans are specific to their target resource.
     #
@@ -45670,6 +46054,10 @@ module Aws::SageMaker
     #
     #   * A training plan for HyperPod clusters can be used exclusively to
     #     provide compute resources to a cluster's instance group.
+    #
+    #   * A training plan for SageMaker endpoints can be used exclusively to
+    #     provide compute resources to SageMaker endpoints for model
+    #     deployment.
     #   @return [Array<String>]
     #
     # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/SearchTrainingPlanOfferingsRequest AWS API Documentation
@@ -48905,7 +49293,7 @@ module Aws::SageMaker
     #
     # @!attribute [rw] target_resources
     #   The target resources (e.g., SageMaker Training Jobs, SageMaker
-    #   HyperPod) for this training plan offering.
+    #   HyperPod, SageMaker Endpoints) for this training plan offering.
     #
     #   Training plans are specific to their target resource.
     #
@@ -48914,6 +49302,10 @@ module Aws::SageMaker
     #
     #   * A training plan for HyperPod clusters can be used exclusively to
     #     provide compute resources to a cluster's instance group.
+    #
+    #   * A training plan for SageMaker endpoints can be used exclusively to
+    #     provide compute resources to SageMaker endpoints for model
+    #     deployment.
     #   @return [Array<String>]
     #
     # @!attribute [rw] requested_start_time_after
@@ -49035,8 +49427,8 @@ module Aws::SageMaker
     #   @return [Integer]
     #
     # @!attribute [rw] target_resources
-    #   The target resources (e.g., training jobs, HyperPod clusters) that
-    #   can use this training plan.
+    #   The target resources (e.g., training jobs, HyperPod clusters,
+    #   Endpoints) that can use this training plan.
     #
     #   Training plans are specific to their target resource.
     #
@@ -49045,6 +49437,10 @@ module Aws::SageMaker
     #
     #   * A training plan for HyperPod clusters can be used exclusively to
     #     provide compute resources to a cluster's instance group.
+    #
+    #   * A training plan for SageMaker endpoints can be used exclusively to
+    #     provide compute resources to SageMaker endpoints for model
+    #     deployment.
     #   @return [Array<String>]
     #
     # @!attribute [rw] reserved_capacity_summaries

data/lib/aws-sdk-sagemaker.rb CHANGED Viewed

@@ -55,7 +55,7 @@ module Aws::SageMaker
   autoload :EndpointProvider, 'aws-sdk-sagemaker/endpoint_provider'
   autoload :Endpoints, 'aws-sdk-sagemaker/endpoints'
-  GEM_VERSION = '1.339.0'
+  GEM_VERSION = '1.340.0'
 end

data/sig/client.rbs CHANGED Viewed

@@ -181,6 +181,36 @@ module Aws
                                         ) -> _BatchDescribeModelPackageResponseSuccess
                                       | (Hash[Symbol, untyped] params, ?Hash[Symbol, untyped] options) -> _BatchDescribeModelPackageResponseSuccess
+      interface _BatchRebootClusterNodesResponseSuccess
+        include ::Seahorse::Client::_ResponseSuccess[Types::BatchRebootClusterNodesResponse]
+        def successful: () -> ::Array[::String]
+        def failed: () -> ::Array[Types::BatchRebootClusterNodesError]
+        def failed_node_logical_ids: () -> ::Array[Types::BatchRebootClusterNodeLogicalIdsError]
+        def successful_node_logical_ids: () -> ::Array[::String]
+      end
+      # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/SageMaker/Client.html#batch_reboot_cluster_nodes-instance_method
+      def batch_reboot_cluster_nodes: (
+                                        cluster_name: ::String,
+                                        ?node_ids: Array[::String],
+                                        ?node_logical_ids: Array[::String]
+                                      ) -> _BatchRebootClusterNodesResponseSuccess
+                                    | (Hash[Symbol, untyped] params, ?Hash[Symbol, untyped] options) -> _BatchRebootClusterNodesResponseSuccess
+      interface _BatchReplaceClusterNodesResponseSuccess
+        include ::Seahorse::Client::_ResponseSuccess[Types::BatchReplaceClusterNodesResponse]
+        def successful: () -> ::Array[::String]
+        def failed: () -> ::Array[Types::BatchReplaceClusterNodesError]
+        def failed_node_logical_ids: () -> ::Array[Types::BatchReplaceClusterNodeLogicalIdsError]
+        def successful_node_logical_ids: () -> ::Array[::String]
+      end
+      # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/SageMaker/Client.html#batch_replace_cluster_nodes-instance_method
+      def batch_replace_cluster_nodes: (
+                                         cluster_name: ::String,
+                                         ?node_ids: Array[::String],
+                                         ?node_logical_ids: Array[::String]
+                                       ) -> _BatchReplaceClusterNodesResponseSuccess
+                                     | (Hash[Symbol, untyped] params, ?Hash[Symbol, untyped] options) -> _BatchReplaceClusterNodesResponseSuccess
       interface _CreateActionResponseSuccess
         include ::Seahorse::Client::_ResponseSuccess[Types::CreateActionResponse]
         def action_arn: () -> ::String
@@ -1013,7 +1043,11 @@ module Aws
                                         count: ::Integer?,
                                         accelerators: ::Integer?,
                                         v_cpu: ::Float?,
-                                        memory_in_gi_b: ::Float?
+                                        memory_in_gi_b: ::Float?,
+                                        accelerator_partition: {
+                                          type: ("mig-1g.5gb" | "mig-1g.10gb" | "mig-1g.18gb" | "mig-1g.20gb" | "mig-1g.23gb" | "mig-1g.35gb" | "mig-1g.45gb" | "mig-1g.47gb" | "mig-2g.10gb" | "mig-2g.20gb" | "mig-2g.35gb" | "mig-2g.45gb" | "mig-2g.47gb" | "mig-3g.20gb" | "mig-3g.40gb" | "mig-3g.71gb" | "mig-3g.90gb" | "mig-3g.93gb" | "mig-4g.20gb" | "mig-4g.40gb" | "mig-4g.71gb" | "mig-4g.90gb" | "mig-4g.93gb" | "mig-7g.40gb" | "mig-7g.80gb" | "mig-7g.141gb" | "mig-7g.180gb" | "mig-7g.186gb"),
+                                          count: ::Integer
+                                        }?
                                       },
                                     ]?,
                                     resource_sharing_config: {
@@ -6680,7 +6714,7 @@ module Aws
         def unhealthy_instance_count: () -> ::Integer
         def available_spare_instance_count: () -> ::Integer
         def total_ultra_server_count: () -> ::Integer
-        def target_resources: () -> ::Array[("training-job" | "hyperpod-cluster")]
+        def target_resources: () -> ::Array[("training-job" | "hyperpod-cluster" | "endpoint")]
         def reserved_capacity_summaries: () -> ::Array[Types::ReservedCapacitySummary]
       end
       # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/SageMaker/Client.html#describe_training_plan-instance_method
@@ -8627,7 +8661,7 @@ module Aws
                                             ?start_time_after: ::Time,
                                             ?end_time_before: ::Time,
                                             duration_hours: ::Integer,
-                                            target_resources: Array[("training-job" | "hyperpod-cluster")]
+                                            target_resources: Array[("training-job" | "hyperpod-cluster" | "endpoint")]
                                           ) -> _SearchTrainingPlanOfferingsResponseSuccess
                                         | (Hash[Symbol, untyped] params, ?Hash[Symbol, untyped] options) -> _SearchTrainingPlanOfferingsResponseSuccess
@@ -9150,7 +9184,11 @@ module Aws
                                         count: ::Integer?,
                                         accelerators: ::Integer?,
                                         v_cpu: ::Float?,
-                                        memory_in_gi_b: ::Float?
+                                        memory_in_gi_b: ::Float?,
+                                        accelerator_partition: {
+                                          type: ("mig-1g.5gb" | "mig-1g.10gb" | "mig-1g.18gb" | "mig-1g.20gb" | "mig-1g.23gb" | "mig-1g.35gb" | "mig-1g.45gb" | "mig-1g.47gb" | "mig-2g.10gb" | "mig-2g.20gb" | "mig-2g.35gb" | "mig-2g.45gb" | "mig-2g.47gb" | "mig-3g.20gb" | "mig-3g.40gb" | "mig-3g.71gb" | "mig-3g.90gb" | "mig-3g.93gb" | "mig-4g.20gb" | "mig-4g.40gb" | "mig-4g.71gb" | "mig-4g.90gb" | "mig-4g.93gb" | "mig-7g.40gb" | "mig-7g.80gb" | "mig-7g.141gb" | "mig-7g.180gb" | "mig-7g.186gb"),
+                                          count: ::Integer
+                                        }?
                                       },
                                     ]?,
                                     resource_sharing_config: {

data/sig/types.rbs CHANGED Viewed

@@ -8,6 +8,12 @@
 module Aws::SageMaker
   module Types
+    class AcceleratorPartitionConfig
+      attr_accessor type: ("mig-1g.5gb" | "mig-1g.10gb" | "mig-1g.18gb" | "mig-1g.20gb" | "mig-1g.23gb" | "mig-1g.35gb" | "mig-1g.45gb" | "mig-1g.47gb" | "mig-2g.10gb" | "mig-2g.20gb" | "mig-2g.35gb" | "mig-2g.45gb" | "mig-2g.47gb" | "mig-3g.20gb" | "mig-3g.40gb" | "mig-3g.71gb" | "mig-3g.90gb" | "mig-3g.93gb" | "mig-4g.20gb" | "mig-4g.40gb" | "mig-4g.71gb" | "mig-4g.90gb" | "mig-4g.93gb" | "mig-7g.40gb" | "mig-7g.80gb" | "mig-7g.141gb" | "mig-7g.180gb" | "mig-7g.186gb")
+      attr_accessor count: ::Integer
+      SENSITIVE: []
+    end
     class ActionSource
       attr_accessor source_uri: ::String
       attr_accessor source_type: ::String
@@ -598,6 +604,64 @@ module Aws::SageMaker
       SENSITIVE: []
     end
+    class BatchRebootClusterNodeLogicalIdsError
+      attr_accessor node_logical_id: ::String
+      attr_accessor error_code: ("InstanceIdNotFound" | "InvalidInstanceStatus" | "InstanceIdInUse" | "InternalServerError")
+      attr_accessor message: ::String
+      SENSITIVE: []
+    end
+    class BatchRebootClusterNodesError
+      attr_accessor node_id: ::String
+      attr_accessor error_code: ("InstanceIdNotFound" | "InvalidInstanceStatus" | "InstanceIdInUse" | "InternalServerError")
+      attr_accessor message: ::String
+      SENSITIVE: []
+    end
+    class BatchRebootClusterNodesRequest
+      attr_accessor cluster_name: ::String
+      attr_accessor node_ids: ::Array[::String]
+      attr_accessor node_logical_ids: ::Array[::String]
+      SENSITIVE: []
+    end
+    class BatchRebootClusterNodesResponse
+      attr_accessor successful: ::Array[::String]
+      attr_accessor failed: ::Array[Types::BatchRebootClusterNodesError]
+      attr_accessor failed_node_logical_ids: ::Array[Types::BatchRebootClusterNodeLogicalIdsError]
+      attr_accessor successful_node_logical_ids: ::Array[::String]
+      SENSITIVE: []
+    end
+    class BatchReplaceClusterNodeLogicalIdsError
+      attr_accessor node_logical_id: ::String
+      attr_accessor error_code: ("InstanceIdNotFound" | "InvalidInstanceStatus" | "InstanceIdInUse" | "InternalServerError")
+      attr_accessor message: ::String
+      SENSITIVE: []
+    end
+    class BatchReplaceClusterNodesError
+      attr_accessor node_id: ::String
+      attr_accessor error_code: ("InstanceIdNotFound" | "InvalidInstanceStatus" | "InstanceIdInUse" | "InternalServerError")
+      attr_accessor message: ::String
+      SENSITIVE: []
+    end
+    class BatchReplaceClusterNodesRequest
+      attr_accessor cluster_name: ::String
+      attr_accessor node_ids: ::Array[::String]
+      attr_accessor node_logical_ids: ::Array[::String]
+      SENSITIVE: []
+    end
+    class BatchReplaceClusterNodesResponse
+      attr_accessor successful: ::Array[::String]
+      attr_accessor failed: ::Array[Types::BatchReplaceClusterNodesError]
+      attr_accessor failed_node_logical_ids: ::Array[Types::BatchReplaceClusterNodeLogicalIdsError]
+      attr_accessor successful_node_logical_ids: ::Array[::String]
+      SENSITIVE: []
+    end
     class BatchTransformInput
       attr_accessor data_captured_destination_s3_uri: ::String
       attr_accessor dataset_format: Types::MonitoringDatasetFormat
@@ -1005,6 +1069,7 @@ module Aws::SageMaker
       attr_accessor last_software_update_time: ::Time
       attr_accessor instance_status: Types::ClusterInstanceStatusDetails
       attr_accessor ultra_server_info: Types::UltraServerInfo
+      attr_accessor private_dns_hostname: ::String
       SENSITIVE: []
     end
@@ -1165,6 +1230,7 @@ module Aws::SageMaker
       attr_accessor accelerators: ::Integer
       attr_accessor v_cpu: ::Float
       attr_accessor memory_in_gi_b: ::Float
+      attr_accessor accelerator_partition: Types::AcceleratorPartitionConfig
       SENSITIVE: []
     end
@@ -4388,7 +4454,7 @@ module Aws::SageMaker
       attr_accessor unhealthy_instance_count: ::Integer
       attr_accessor available_spare_instance_count: ::Integer
       attr_accessor total_ultra_server_count: ::Integer
-      attr_accessor target_resources: ::Array[("training-job" | "hyperpod-cluster")]
+      attr_accessor target_resources: ::Array[("training-job" | "hyperpod-cluster" | "endpoint")]
       attr_accessor reserved_capacity_summaries: ::Array[Types::ReservedCapacitySummary]
       SENSITIVE: []
     end
@@ -9567,7 +9633,7 @@ module Aws::SageMaker
       attr_accessor start_time_after: ::Time
       attr_accessor end_time_before: ::Time
       attr_accessor duration_hours: ::Integer
-      attr_accessor target_resources: ::Array[("training-job" | "hyperpod-cluster")]
+      attr_accessor target_resources: ::Array[("training-job" | "hyperpod-cluster" | "endpoint")]
       SENSITIVE: []
     end
@@ -10224,7 +10290,7 @@ module Aws::SageMaker
     class TrainingPlanOffering
       attr_accessor training_plan_offering_id: ::String
-      attr_accessor target_resources: ::Array[("training-job" | "hyperpod-cluster")]
+      attr_accessor target_resources: ::Array[("training-job" | "hyperpod-cluster" | "endpoint")]
       attr_accessor requested_start_time_after: ::Time
       attr_accessor requested_end_time_before: ::Time
       attr_accessor duration_hours: ::Integer
@@ -10250,7 +10316,7 @@ module Aws::SageMaker
       attr_accessor available_instance_count: ::Integer
       attr_accessor in_use_instance_count: ::Integer
       attr_accessor total_ultra_server_count: ::Integer
-      attr_accessor target_resources: ::Array[("training-job" | "hyperpod-cluster")]
+      attr_accessor target_resources: ::Array[("training-job" | "hyperpod-cluster" | "endpoint")]
       attr_accessor reserved_capacity_summaries: ::Array[Types::ReservedCapacitySummary]
       SENSITIVE: []
     end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: aws-sdk-sagemaker
 version: !ruby/object:Gem::Version
-  version: 1.339.0
+  version: 1.340.0
 platform: ruby
 authors:
 - Amazon Web Services