aws-sdk-sagemaker 1.318.0 → 1.320.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -136,6 +136,27 @@ module Aws::SageMaker
136
136
  include Aws::Structure
137
137
  end
138
138
 
139
+ # Specifies an instance group and the number of nodes to add to it.
140
+ #
141
+ # @!attribute [rw] instance_group_name
142
+ # The name of the instance group to which you want to add nodes.
143
+ # @return [String]
144
+ #
145
+ # @!attribute [rw] increment_target_count_by
146
+ # The number of nodes to add to the specified instance group. The
147
+ # total number of nodes across all instance groups in a single request
148
+ # cannot exceed 50.
149
+ # @return [Integer]
150
+ #
151
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/AddClusterNodeSpecification AWS API Documentation
152
+ #
153
+ class AddClusterNodeSpecification < Struct.new(
154
+ :instance_group_name,
155
+ :increment_target_count_by)
156
+ SENSITIVE = []
157
+ include Aws::Structure
158
+ end
159
+
139
160
  # @!attribute [rw] resource_arn
140
161
  # The Amazon Resource Name (ARN) of the resource that you want to tag.
141
162
  # @return [String]
@@ -172,6 +193,22 @@ module Aws::SageMaker
172
193
  include Aws::Structure
173
194
  end
174
195
 
196
+ # Information about additional Elastic Network Interfaces (ENIs)
197
+ # associated with an instance.
198
+ #
199
+ # @!attribute [rw] efa_enis
200
+ # A list of Elastic Fabric Adapter (EFA) ENIs associated with the
201
+ # instance.
202
+ # @return [Array<String>]
203
+ #
204
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/AdditionalEnis AWS API Documentation
205
+ #
206
+ class AdditionalEnis < Struct.new(
207
+ :efa_enis)
208
+ SENSITIVE = []
209
+ include Aws::Structure
210
+ end
211
+
175
212
  # A structure of additional Inference Specification. Additional
176
213
  # Inference Specification specifies details about inference jobs that
177
214
  # can be run with models based on this model package
@@ -3043,6 +3080,93 @@ module Aws::SageMaker
3043
3080
  include Aws::Structure
3044
3081
  end
3045
3082
 
3083
+ # Information about an error that occurred during the node addition
3084
+ # operation.
3085
+ #
3086
+ # @!attribute [rw] instance_group_name
3087
+ # The name of the instance group for which the error occurred.
3088
+ # @return [String]
3089
+ #
3090
+ # @!attribute [rw] error_code
3091
+ # The error code associated with the failure. Possible values include
3092
+ # `InstanceGroupNotFound` and `InvalidInstanceGroupState`.
3093
+ # @return [String]
3094
+ #
3095
+ # @!attribute [rw] failed_count
3096
+ # The number of nodes that failed to be added to the specified
3097
+ # instance group.
3098
+ # @return [Integer]
3099
+ #
3100
+ # @!attribute [rw] message
3101
+ # A descriptive message providing additional details about the error.
3102
+ # @return [String]
3103
+ #
3104
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchAddClusterNodesError AWS API Documentation
3105
+ #
3106
+ class BatchAddClusterNodesError < Struct.new(
3107
+ :instance_group_name,
3108
+ :error_code,
3109
+ :failed_count,
3110
+ :message)
3111
+ SENSITIVE = []
3112
+ include Aws::Structure
3113
+ end
3114
+
3115
+ # @!attribute [rw] cluster_name
3116
+ # The name of the HyperPod cluster to which you want to add nodes.
3117
+ # @return [String]
3118
+ #
3119
+ # @!attribute [rw] client_token
3120
+ # A unique, case-sensitive identifier that you provide to ensure the
3121
+ # idempotency of the request. This token is valid for 8 hours. If you
3122
+ # retry the request with the same client token within this timeframe
3123
+ # and the same parameters, the API returns the same set of
3124
+ # `NodeLogicalIds` with their latest status.
3125
+ #
3126
+ # **A suitable default value is auto-generated.** You should normally
3127
+ # not need to pass this option.
3128
+ # @return [String]
3129
+ #
3130
+ # @!attribute [rw] nodes_to_add
3131
+ # A list of instance groups and the number of nodes to add to each.
3132
+ # You can specify up to 5 instance groups in a single request, with a
3133
+ # maximum of 50 nodes total across all instance groups.
3134
+ # @return [Array<Types::AddClusterNodeSpecification>]
3135
+ #
3136
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchAddClusterNodesRequest AWS API Documentation
3137
+ #
3138
+ class BatchAddClusterNodesRequest < Struct.new(
3139
+ :cluster_name,
3140
+ :client_token,
3141
+ :nodes_to_add)
3142
+ SENSITIVE = []
3143
+ include Aws::Structure
3144
+ end
3145
+
3146
+ # @!attribute [rw] successful
3147
+ # A list of `NodeLogicalIDs` that were successfully added to the
3148
+ # cluster. The `NodeLogicalID` is unique per cluster and does not
3149
+ # change between instance replacements. Each entry includes a
3150
+ # `NodeLogicalId` that can be used to track the node's provisioning
3151
+ # status (with `DescribeClusterNode`), the instance group name, and
3152
+ # the current status of the node.
3153
+ # @return [Array<Types::NodeAdditionResult>]
3154
+ #
3155
+ # @!attribute [rw] failed
3156
+ # A list of errors that occurred during the node addition operation.
3157
+ # Each entry includes the instance group name, error code, number of
3158
+ # failed additions, and an error message.
3159
+ # @return [Array<Types::BatchAddClusterNodesError>]
3160
+ #
3161
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchAddClusterNodesResponse AWS API Documentation
3162
+ #
3163
+ class BatchAddClusterNodesResponse < Struct.new(
3164
+ :successful,
3165
+ :failed)
3166
+ SENSITIVE = []
3167
+ include Aws::Structure
3168
+ end
3169
+
3046
3170
  # Configuration to control how SageMaker captures inference data for
3047
3171
  # batch transform jobs.
3048
3172
  #
@@ -3083,6 +3207,32 @@ module Aws::SageMaker
3083
3207
  include Aws::Structure
3084
3208
  end
3085
3209
 
3210
+ # Information about an error that occurred when attempting to delete a
3211
+ # node identified by its `NodeLogicalId`.
3212
+ #
3213
+ # @!attribute [rw] code
3214
+ # The error code associated with the failure. Possible values include
3215
+ # `NodeLogicalIdNotFound`, `InvalidNodeStatus`, and `InternalError`.
3216
+ # @return [String]
3217
+ #
3218
+ # @!attribute [rw] message
3219
+ # A descriptive message providing additional details about the error.
3220
+ # @return [String]
3221
+ #
3222
+ # @!attribute [rw] node_logical_id
3223
+ # The `NodeLogicalId` of the node that could not be deleted.
3224
+ # @return [String]
3225
+ #
3226
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchDeleteClusterNodeLogicalIdsError AWS API Documentation
3227
+ #
3228
+ class BatchDeleteClusterNodeLogicalIdsError < Struct.new(
3229
+ :code,
3230
+ :message,
3231
+ :node_logical_id)
3232
+ SENSITIVE = []
3233
+ include Aws::Structure
3234
+ end
3235
+
3086
3236
  # Represents an error encountered when deleting a node from a SageMaker
3087
3237
  # HyperPod cluster.
3088
3238
  #
@@ -3136,11 +3286,19 @@ module Aws::SageMaker
3136
3286
  # [1]: http://aws.amazon.com/contact-us/
3137
3287
  # @return [Array<String>]
3138
3288
  #
3289
+ # @!attribute [rw] node_logical_ids
3290
+ # A list of `NodeLogicalIds` identifying the nodes to be deleted. You
3291
+ # can specify up to 50 `NodeLogicalIds`. You must specify either
3292
+ # `NodeLogicalIds`, `InstanceIds`, or both, with a combined maximum of
3293
+ # 50 identifiers.
3294
+ # @return [Array<String>]
3295
+ #
3139
3296
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchDeleteClusterNodesRequest AWS API Documentation
3140
3297
  #
3141
3298
  class BatchDeleteClusterNodesRequest < Struct.new(
3142
3299
  :cluster_name,
3143
- :node_ids)
3300
+ :node_ids,
3301
+ :node_logical_ids)
3144
3302
  SENSITIVE = []
3145
3303
  include Aws::Structure
3146
3304
  end
@@ -3154,11 +3312,23 @@ module Aws::SageMaker
3154
3312
  # cluster.
3155
3313
  # @return [Array<String>]
3156
3314
  #
3315
+ # @!attribute [rw] failed_node_logical_ids
3316
+ # A list of `NodeLogicalIds` that could not be deleted, along with
3317
+ # error information explaining why the deletion failed.
3318
+ # @return [Array<Types::BatchDeleteClusterNodeLogicalIdsError>]
3319
+ #
3320
+ # @!attribute [rw] successful_node_logical_ids
3321
+ # A list of `NodeLogicalIds` that were successfully deleted from the
3322
+ # cluster.
3323
+ # @return [Array<String>]
3324
+ #
3157
3325
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/BatchDeleteClusterNodesResponse AWS API Documentation
3158
3326
  #
3159
3327
  class BatchDeleteClusterNodesResponse < Struct.new(
3160
3328
  :failed,
3161
- :successful)
3329
+ :successful,
3330
+ :failed_node_logical_ids,
3331
+ :successful_node_logical_ids)
3162
3332
  SENSITIVE = []
3163
3333
  include Aws::Structure
3164
3334
  end
@@ -3633,6 +3803,27 @@ module Aws::SageMaker
3633
3803
  include Aws::Structure
3634
3804
  end
3635
3805
 
3806
+ # Information about the Capacity Reservation used by an instance or
3807
+ # instance group.
3808
+ #
3809
+ # @!attribute [rw] arn
3810
+ # The Amazon Resource Name (ARN) of the Capacity Reservation.
3811
+ # @return [String]
3812
+ #
3813
+ # @!attribute [rw] type
3814
+ # The type of Capacity Reservation. Valid values are `ODCR` (On-Demand
3815
+ # Capacity Reservation) or `CRG` (Capacity Reservation Group).
3816
+ # @return [String]
3817
+ #
3818
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/CapacityReservation AWS API Documentation
3819
+ #
3820
+ class CapacityReservation < Struct.new(
3821
+ :arn,
3822
+ :type)
3823
+ SENSITIVE = []
3824
+ include Aws::Structure
3825
+ end
3826
+
3636
3827
  # Specifies the type and size of the endpoint capacity to activate for a
3637
3828
  # blue/green deployment, a rolling deployment, or a rollback strategy.
3638
3829
  # You can specify your batches as either instance count or the overall
@@ -4534,6 +4725,117 @@ module Aws::SageMaker
4534
4725
  include Aws::Structure
4535
4726
  end
4536
4727
 
4728
+ # Detailed information about a specific event in a HyperPod cluster.
4729
+ #
4730
+ # @!attribute [rw] event_id
4731
+ # The unique identifier (UUID) of the event.
4732
+ # @return [String]
4733
+ #
4734
+ # @!attribute [rw] cluster_arn
4735
+ # The Amazon Resource Name (ARN) of the HyperPod cluster associated
4736
+ # with the event.
4737
+ # @return [String]
4738
+ #
4739
+ # @!attribute [rw] cluster_name
4740
+ # The name of the HyperPod cluster associated with the event.
4741
+ # @return [String]
4742
+ #
4743
+ # @!attribute [rw] instance_group_name
4744
+ # The name of the instance group associated with the event, if
4745
+ # applicable.
4746
+ # @return [String]
4747
+ #
4748
+ # @!attribute [rw] instance_id
4749
+ # The EC2 instance ID associated with the event, if applicable.
4750
+ # @return [String]
4751
+ #
4752
+ # @!attribute [rw] resource_type
4753
+ # The type of resource associated with the event. Valid values are
4754
+ # `Cluster`, `InstanceGroup`, or `Instance`.
4755
+ # @return [String]
4756
+ #
4757
+ # @!attribute [rw] event_time
4758
+ # The timestamp when the event occurred.
4759
+ # @return [Time]
4760
+ #
4761
+ # @!attribute [rw] event_details
4762
+ # Additional details about the event, including event-specific
4763
+ # metadata.
4764
+ # @return [Types::EventDetails]
4765
+ #
4766
+ # @!attribute [rw] description
4767
+ # A human-readable description of the event.
4768
+ # @return [String]
4769
+ #
4770
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ClusterEventDetail AWS API Documentation
4771
+ #
4772
+ class ClusterEventDetail < Struct.new(
4773
+ :event_id,
4774
+ :cluster_arn,
4775
+ :cluster_name,
4776
+ :instance_group_name,
4777
+ :instance_id,
4778
+ :resource_type,
4779
+ :event_time,
4780
+ :event_details,
4781
+ :description)
4782
+ SENSITIVE = []
4783
+ include Aws::Structure
4784
+ end
4785
+
4786
+ # A summary of an event in a HyperPod cluster.
4787
+ #
4788
+ # @!attribute [rw] event_id
4789
+ # The unique identifier (UUID) of the event.
4790
+ # @return [String]
4791
+ #
4792
+ # @!attribute [rw] cluster_arn
4793
+ # The Amazon Resource Name (ARN) of the HyperPod cluster associated
4794
+ # with the event.
4795
+ # @return [String]
4796
+ #
4797
+ # @!attribute [rw] cluster_name
4798
+ # The name of the HyperPod cluster associated with the event.
4799
+ # @return [String]
4800
+ #
4801
+ # @!attribute [rw] instance_group_name
4802
+ # The name of the instance group associated with the event, if
4803
+ # applicable.
4804
+ # @return [String]
4805
+ #
4806
+ # @!attribute [rw] instance_id
4807
+ # The Amazon Elastic Compute Cloud (EC2) instance ID associated with
4808
+ # the event, if applicable.
4809
+ # @return [String]
4810
+ #
4811
+ # @!attribute [rw] resource_type
4812
+ # The type of resource associated with the event. Valid values are
4813
+ # `Cluster`, `InstanceGroup`, or `Instance`.
4814
+ # @return [String]
4815
+ #
4816
+ # @!attribute [rw] event_time
4817
+ # The timestamp when the event occurred.
4818
+ # @return [Time]
4819
+ #
4820
+ # @!attribute [rw] description
4821
+ # A brief, human-readable description of the event.
4822
+ # @return [String]
4823
+ #
4824
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ClusterEventSummary AWS API Documentation
4825
+ #
4826
+ class ClusterEventSummary < Struct.new(
4827
+ :event_id,
4828
+ :cluster_arn,
4829
+ :cluster_name,
4830
+ :instance_group_name,
4831
+ :instance_id,
4832
+ :resource_type,
4833
+ :event_time,
4834
+ :description)
4835
+ SENSITIVE = []
4836
+ include Aws::Structure
4837
+ end
4838
+
4537
4839
  # Details of an instance group in a SageMaker HyperPod cluster.
4538
4840
  #
4539
4841
  # @!attribute [rw] current_count
@@ -4630,6 +4932,16 @@ module Aws::SageMaker
4630
4932
  # updating the AMI.
4631
4933
  # @return [Types::ScheduledUpdateConfig]
4632
4934
  #
4935
+ # @!attribute [rw] current_image_id
4936
+ # The ID of the Amazon Machine Image (AMI) currently in use by the
4937
+ # instance group.
4938
+ # @return [String]
4939
+ #
4940
+ # @!attribute [rw] desired_image_id
4941
+ # The ID of the Amazon Machine Image (AMI) desired for the instance
4942
+ # group.
4943
+ # @return [String]
4944
+ #
4633
4945
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ClusterInstanceGroupDetails AWS API Documentation
4634
4946
  #
4635
4947
  class ClusterInstanceGroupDetails < Struct.new(
@@ -4646,7 +4958,9 @@ module Aws::SageMaker
4646
4958
  :training_plan_arn,
4647
4959
  :training_plan_status,
4648
4960
  :override_vpc_config,
4649
- :scheduled_update_config)
4961
+ :scheduled_update_config,
4962
+ :current_image_id,
4963
+ :desired_image_id)
4650
4964
  SENSITIVE = []
4651
4965
  include Aws::Structure
4652
4966
  end
@@ -4756,6 +5070,33 @@ module Aws::SageMaker
4756
5070
  # update the AMI.
4757
5071
  # @return [Types::ScheduledUpdateConfig]
4758
5072
  #
5073
+ # @!attribute [rw] image_id
5074
+ # When configuring your HyperPod cluster, you can specify an image ID
5075
+ # using one of the following options:
5076
+ #
5077
+ # * `HyperPodPublicAmiId`: Use a HyperPod public AMI
5078
+ #
5079
+ # * `CustomAmiId`: Use your custom AMI
5080
+ #
5081
+ # * `default`: Use the default latest system image
5082
+ #
5083
+ # f you choose to use a custom AMI (`CustomAmiId`), ensure it meets
5084
+ # the following requirements:
5085
+ #
5086
+ # * Encryption: The custom AMI must be unencrypted.
5087
+ #
5088
+ # * Ownership: The custom AMI must be owned by the same Amazon Web
5089
+ # Services account that is creating the HyperPod cluster.
5090
+ #
5091
+ # * Volume support: Only the primary AMI snapshot volume is supported;
5092
+ # additional AMI volumes are not supported.
5093
+ #
5094
+ # When updating the instance group's AMI through the
5095
+ # `UpdateClusterSoftware` operation, if an instance group uses a
5096
+ # custom AMI, you must provide an `ImageId` or use the default as
5097
+ # input.
5098
+ # @return [String]
5099
+ #
4759
5100
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ClusterInstanceGroupSpecification AWS API Documentation
4760
5101
  #
4761
5102
  class ClusterInstanceGroupSpecification < Struct.new(
@@ -4769,7 +5110,8 @@ module Aws::SageMaker
4769
5110
  :on_start_deep_health_checks,
4770
5111
  :training_plan_arn,
4771
5112
  :override_vpc_config,
4772
- :scheduled_update_config)
5113
+ :scheduled_update_config,
5114
+ :image_id)
4773
5115
  SENSITIVE = []
4774
5116
  include Aws::Structure
4775
5117
  end
@@ -4879,6 +5221,37 @@ module Aws::SageMaker
4879
5221
  include Aws::Structure
4880
5222
  end
4881
5223
 
5224
+ # Metadata information about a HyperPod cluster showing information
5225
+ # about the cluster level operations, such as creating, updating, and
5226
+ # deleting.
5227
+ #
5228
+ # @!attribute [rw] failure_message
5229
+ # An error message describing why the cluster level operation (such as
5230
+ # creating, updating, or deleting) failed.
5231
+ # @return [String]
5232
+ #
5233
+ # @!attribute [rw] eks_role_access_entries
5234
+ # A list of Amazon EKS IAM role ARNs associated with the cluster. This
5235
+ # is created by HyperPod on your behalf and only applies for EKS
5236
+ # orchestrated clusters.
5237
+ # @return [Array<String>]
5238
+ #
5239
+ # @!attribute [rw] slr_access_entry
5240
+ # The Service-Linked Role (SLR) associated with the cluster. This is
5241
+ # created by HyperPod on your behalf and only applies for EKS
5242
+ # orchestrated clusters.
5243
+ # @return [String]
5244
+ #
5245
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ClusterMetadata AWS API Documentation
5246
+ #
5247
+ class ClusterMetadata < Struct.new(
5248
+ :failure_message,
5249
+ :eks_role_access_entries,
5250
+ :slr_access_entry)
5251
+ SENSITIVE = []
5252
+ include Aws::Structure
5253
+ end
5254
+
4882
5255
  # Details of an instance (also called a *node* interchangeably) in a
4883
5256
  # SageMaker HyperPod cluster.
4884
5257
  #
@@ -4890,6 +5263,13 @@ module Aws::SageMaker
4890
5263
  # The ID of the instance.
4891
5264
  # @return [String]
4892
5265
  #
5266
+ # @!attribute [rw] node_logical_id
5267
+ # A unique identifier for the node that persists throughout its
5268
+ # lifecycle, from provisioning request to termination. This identifier
5269
+ # can be used to track the node even before it has an assigned
5270
+ # `InstanceId`.
5271
+ # @return [String]
5272
+ #
4893
5273
  # @!attribute [rw] instance_status
4894
5274
  # The status of the instance.
4895
5275
  # @return [Types::ClusterInstanceStatusDetails]
@@ -4947,11 +5327,25 @@ module Aws::SageMaker
4947
5327
  # The placement details of the SageMaker HyperPod cluster node.
4948
5328
  # @return [Types::ClusterInstancePlacement]
4949
5329
  #
5330
+ # @!attribute [rw] current_image_id
5331
+ # The ID of the Amazon Machine Image (AMI) currently in use by the
5332
+ # node.
5333
+ # @return [String]
5334
+ #
5335
+ # @!attribute [rw] desired_image_id
5336
+ # The ID of the Amazon Machine Image (AMI) desired for the node.
5337
+ # @return [String]
5338
+ #
5339
+ # @!attribute [rw] ultra_server_info
5340
+ # Contains information about the UltraServer.
5341
+ # @return [Types::UltraServerInfo]
5342
+ #
4950
5343
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ClusterNodeDetails AWS API Documentation
4951
5344
  #
4952
5345
  class ClusterNodeDetails < Struct.new(
4953
5346
  :instance_group_name,
4954
5347
  :instance_id,
5348
+ :node_logical_id,
4955
5349
  :instance_status,
4956
5350
  :instance_type,
4957
5351
  :launch_time,
@@ -4963,7 +5357,10 @@ module Aws::SageMaker
4963
5357
  :private_primary_ip,
4964
5358
  :private_primary_ipv_6,
4965
5359
  :private_dns_hostname,
4966
- :placement)
5360
+ :placement,
5361
+ :current_image_id,
5362
+ :desired_image_id,
5363
+ :ultra_server_info)
4967
5364
  SENSITIVE = []
4968
5365
  include Aws::Structure
4969
5366
  end
@@ -4979,6 +5376,15 @@ module Aws::SageMaker
4979
5376
  # The ID of the instance.
4980
5377
  # @return [String]
4981
5378
  #
5379
+ # @!attribute [rw] node_logical_id
5380
+ # A unique identifier for the node that persists throughout its
5381
+ # lifecycle, from provisioning request to termination. This identifier
5382
+ # can be used to track the node even before it has an assigned
5383
+ # `InstanceId`. This field is only included when
5384
+ # `IncludeNodeLogicalIds` is set to `True` in the `ListClusterNodes`
5385
+ # request.
5386
+ # @return [String]
5387
+ #
4982
5388
  # @!attribute [rw] instance_type
4983
5389
  # The type of the instance.
4984
5390
  # @return [String]
@@ -4996,15 +5402,21 @@ module Aws::SageMaker
4996
5402
  # The status of the instance.
4997
5403
  # @return [Types::ClusterInstanceStatusDetails]
4998
5404
  #
5405
+ # @!attribute [rw] ultra_server_info
5406
+ # Contains information about the UltraServer.
5407
+ # @return [Types::UltraServerInfo]
5408
+ #
4999
5409
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ClusterNodeSummary AWS API Documentation
5000
5410
  #
5001
5411
  class ClusterNodeSummary < Struct.new(
5002
5412
  :instance_group_name,
5003
5413
  :instance_id,
5414
+ :node_logical_id,
5004
5415
  :instance_type,
5005
5416
  :launch_time,
5006
5417
  :last_software_update_time,
5007
- :instance_status)
5418
+ :instance_status,
5419
+ :ultra_server_info)
5008
5420
  SENSITIVE = []
5009
5421
  include Aws::Structure
5010
5422
  end
@@ -6822,7 +7234,7 @@ module Aws::SageMaker
6822
7234
  # @!attribute [rw] orchestrator
6823
7235
  # The type of orchestrator to use for the SageMaker HyperPod cluster.
6824
7236
  # Currently, the only supported value is `"eks"`, which is to use an
6825
- # Amazon Elastic Kubernetes Service (EKS) cluster as the orchestrator.
7237
+ # Amazon Elastic Kubernetes Service cluster as the orchestrator.
6826
7238
  # @return [Types::ClusterOrchestrator]
6827
7239
  #
6828
7240
  # @!attribute [rw] node_recovery
@@ -6833,6 +7245,21 @@ module Aws::SageMaker
6833
7245
  # cluster instances.
6834
7246
  # @return [String]
6835
7247
  #
7248
+ # @!attribute [rw] node_provisioning_mode
7249
+ # The mode for provisioning nodes in the cluster. You can specify the
7250
+ # following modes:
7251
+ #
7252
+ # * **Continuous**: Scaling behavior that enables 1) concurrent
7253
+ # operation execution within instance groups, 2) continuous retry
7254
+ # mechanisms for failed operations, 3) enhanced customer visibility
7255
+ # into cluster events through detailed event streams, 4) partial
7256
+ # provisioning capabilities. Your clusters and instance groups
7257
+ # remain `InService` while scaling. This mode is only supported for
7258
+ # EKS orchestrated clusters.
7259
+ #
7260
+ # ^
7261
+ # @return [String]
7262
+ #
6836
7263
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/CreateClusterRequest AWS API Documentation
6837
7264
  #
6838
7265
  class CreateClusterRequest < Struct.new(
@@ -6842,7 +7269,8 @@ module Aws::SageMaker
6842
7269
  :vpc_config,
6843
7270
  :tags,
6844
7271
  :orchestrator,
6845
- :node_recovery)
7272
+ :node_recovery,
7273
+ :node_provisioning_mode)
6846
7274
  SENSITIVE = []
6847
7275
  include Aws::Structure
6848
7276
  end
@@ -11200,6 +11628,11 @@ module Aws::SageMaker
11200
11628
  # creating this plan.
11201
11629
  # @return [String]
11202
11630
  #
11631
+ # @!attribute [rw] spare_instance_count_per_ultra_server
11632
+ # Number of spare instances to reserve per UltraServer for enhanced
11633
+ # resiliency. Default is 1.
11634
+ # @return [Integer]
11635
+ #
11203
11636
  # @!attribute [rw] tags
11204
11637
  # An array of key-value pairs to apply to this training plan.
11205
11638
  # @return [Array<Types::Tag>]
@@ -11209,6 +11642,7 @@ module Aws::SageMaker
11209
11642
  class CreateTrainingPlanRequest < Struct.new(
11210
11643
  :training_plan_name,
11211
11644
  :training_plan_offering_id,
11645
+ :spare_instance_count_per_ultra_server,
11212
11646
  :tags)
11213
11647
  SENSITIVE = []
11214
11648
  include Aws::Structure
@@ -14391,6 +14825,39 @@ module Aws::SageMaker
14391
14825
  include Aws::Structure
14392
14826
  end
14393
14827
 
14828
+ # @!attribute [rw] event_id
14829
+ # The unique identifier (UUID) of the event to describe. This ID can
14830
+ # be obtained from the `ListClusterEvents` operation.
14831
+ # @return [String]
14832
+ #
14833
+ # @!attribute [rw] cluster_name
14834
+ # The name or Amazon Resource Name (ARN) of the HyperPod cluster
14835
+ # associated with the event.
14836
+ # @return [String]
14837
+ #
14838
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/DescribeClusterEventRequest AWS API Documentation
14839
+ #
14840
+ class DescribeClusterEventRequest < Struct.new(
14841
+ :event_id,
14842
+ :cluster_name)
14843
+ SENSITIVE = []
14844
+ include Aws::Structure
14845
+ end
14846
+
14847
+ # @!attribute [rw] event_details
14848
+ # Detailed information about the requested cluster event, including
14849
+ # event metadata for various resource types such as `Cluster`,
14850
+ # `InstanceGroup`, `Instance`, and their associated attributes.
14851
+ # @return [Types::ClusterEventDetail]
14852
+ #
14853
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/DescribeClusterEventResponse AWS API Documentation
14854
+ #
14855
+ class DescribeClusterEventResponse < Struct.new(
14856
+ :event_details)
14857
+ SENSITIVE = []
14858
+ include Aws::Structure
14859
+ end
14860
+
14394
14861
  # @!attribute [rw] cluster_name
14395
14862
  # The string name or the Amazon Resource Name (ARN) of the SageMaker
14396
14863
  # HyperPod cluster in which the node is.
@@ -14400,11 +14867,19 @@ module Aws::SageMaker
14400
14867
  # The ID of the SageMaker HyperPod cluster node.
14401
14868
  # @return [String]
14402
14869
  #
14870
+ # @!attribute [rw] node_logical_id
14871
+ # The logical identifier of the node to describe. You can specify
14872
+ # either `NodeLogicalId` or `InstanceId`, but not both.
14873
+ # `NodeLogicalId` can be used to describe nodes that are still being
14874
+ # provisioned and don't yet have an `InstanceId` assigned.
14875
+ # @return [String]
14876
+ #
14403
14877
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/DescribeClusterNodeRequest AWS API Documentation
14404
14878
  #
14405
14879
  class DescribeClusterNodeRequest < Struct.new(
14406
14880
  :cluster_name,
14407
- :node_id)
14881
+ :node_id,
14882
+ :node_logical_id)
14408
14883
  SENSITIVE = []
14409
14884
  include Aws::Structure
14410
14885
  end
@@ -14484,6 +14959,10 @@ module Aws::SageMaker
14484
14959
  # cluster.
14485
14960
  # @return [String]
14486
14961
  #
14962
+ # @!attribute [rw] node_provisioning_mode
14963
+ # The mode used for provisioning nodes in the cluster.
14964
+ # @return [String]
14965
+ #
14487
14966
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/DescribeClusterResponse AWS API Documentation
14488
14967
  #
14489
14968
  class DescribeClusterResponse < Struct.new(
@@ -14496,7 +14975,8 @@ module Aws::SageMaker
14496
14975
  :restricted_instance_groups,
14497
14976
  :vpc_config,
14498
14977
  :orchestrator,
14499
- :node_recovery)
14978
+ :node_recovery,
14979
+ :node_provisioning_mode)
14500
14980
  SENSITIVE = []
14501
14981
  include Aws::Structure
14502
14982
  end
@@ -19216,6 +19696,94 @@ module Aws::SageMaker
19216
19696
  include Aws::Structure
19217
19697
  end
19218
19698
 
19699
+ # @!attribute [rw] reserved_capacity_arn
19700
+ # ARN of the reserved capacity to describe.
19701
+ # @return [String]
19702
+ #
19703
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/DescribeReservedCapacityRequest AWS API Documentation
19704
+ #
19705
+ class DescribeReservedCapacityRequest < Struct.new(
19706
+ :reserved_capacity_arn)
19707
+ SENSITIVE = []
19708
+ include Aws::Structure
19709
+ end
19710
+
19711
+ # @!attribute [rw] reserved_capacity_arn
19712
+ # ARN of the reserved capacity.
19713
+ # @return [String]
19714
+ #
19715
+ # @!attribute [rw] reserved_capacity_type
19716
+ # The type of reserved capacity.
19717
+ # @return [String]
19718
+ #
19719
+ # @!attribute [rw] status
19720
+ # The current status of the reserved capacity.
19721
+ # @return [String]
19722
+ #
19723
+ # @!attribute [rw] availability_zone
19724
+ # The Availability Zone where the reserved capacity is provisioned.
19725
+ # @return [String]
19726
+ #
19727
+ # @!attribute [rw] duration_hours
19728
+ # The total duration of the reserved capacity in hours.
19729
+ # @return [Integer]
19730
+ #
19731
+ # @!attribute [rw] duration_minutes
19732
+ # The number of minutes for the duration of the reserved capacity. For
19733
+ # example, if a reserved capacity starts at 08:55 and ends at 11:30,
19734
+ # the minutes field would be 35.
19735
+ # @return [Integer]
19736
+ #
19737
+ # @!attribute [rw] start_time
19738
+ # The timestamp when the reserved capacity becomes active.
19739
+ # @return [Time]
19740
+ #
19741
+ # @!attribute [rw] end_time
19742
+ # The timestamp when the reserved capacity expires.
19743
+ # @return [Time]
19744
+ #
19745
+ # @!attribute [rw] instance_type
19746
+ # The Amazon EC2 instance type used in the reserved capacity.
19747
+ # @return [String]
19748
+ #
19749
+ # @!attribute [rw] total_instance_count
19750
+ # The total number of instances allocated to this reserved capacity.
19751
+ # @return [Integer]
19752
+ #
19753
+ # @!attribute [rw] available_instance_count
19754
+ # The number of instances currently available for use in this reserved
19755
+ # capacity.
19756
+ # @return [Integer]
19757
+ #
19758
+ # @!attribute [rw] in_use_instance_count
19759
+ # The number of instances currently in use from this reserved
19760
+ # capacity.
19761
+ # @return [Integer]
19762
+ #
19763
+ # @!attribute [rw] ultra_server_summary
19764
+ # A summary of the UltraServer associated with this reserved capacity.
19765
+ # @return [Types::UltraServerSummary]
19766
+ #
19767
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/DescribeReservedCapacityResponse AWS API Documentation
19768
+ #
19769
+ class DescribeReservedCapacityResponse < Struct.new(
19770
+ :reserved_capacity_arn,
19771
+ :reserved_capacity_type,
19772
+ :status,
19773
+ :availability_zone,
19774
+ :duration_hours,
19775
+ :duration_minutes,
19776
+ :start_time,
19777
+ :end_time,
19778
+ :instance_type,
19779
+ :total_instance_count,
19780
+ :available_instance_count,
19781
+ :in_use_instance_count,
19782
+ :ultra_server_summary)
19783
+ SENSITIVE = []
19784
+ include Aws::Structure
19785
+ end
19786
+
19219
19787
  # @!attribute [rw] domain_id
19220
19788
  # The ID of the associated domain.
19221
19789
  # @return [String]
@@ -19883,6 +20451,19 @@ module Aws::SageMaker
19883
20451
  # The number of instances currently in use from this training plan.
19884
20452
  # @return [Integer]
19885
20453
  #
20454
+ # @!attribute [rw] unhealthy_instance_count
20455
+ # The number of instances in the training plan that are currently in
20456
+ # an unhealthy state.
20457
+ # @return [Integer]
20458
+ #
20459
+ # @!attribute [rw] available_spare_instance_count
20460
+ # The number of available spare instances in the training plan.
20461
+ # @return [Integer]
20462
+ #
20463
+ # @!attribute [rw] total_ultra_server_count
20464
+ # The total number of UltraServers reserved to this training plan.
20465
+ # @return [Integer]
20466
+ #
19886
20467
  # @!attribute [rw] target_resources
19887
20468
  # The target resources (e.g., SageMaker Training Jobs, SageMaker
19888
20469
  # HyperPod) that can use this training plan.
@@ -19917,6 +20498,9 @@ module Aws::SageMaker
19917
20498
  :total_instance_count,
19918
20499
  :available_instance_count,
19919
20500
  :in_use_instance_count,
20501
+ :unhealthy_instance_count,
20502
+ :available_spare_instance_count,
20503
+ :total_ultra_server_count,
19920
20504
  :target_resources,
19921
20505
  :reserved_capacity_summaries)
19922
20506
  SENSITIVE = []
@@ -22293,6 +22877,61 @@ module Aws::SageMaker
22293
22877
  include Aws::Structure
22294
22878
  end
22295
22879
 
22880
+ # Detailed information about a specific event, including event metadata.
22881
+ #
22882
+ # @!attribute [rw] event_metadata
22883
+ # Metadata specific to the event, which may include information about
22884
+ # the cluster, instance group, or instance involved.
22885
+ # @return [Types::EventMetadata]
22886
+ #
22887
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/EventDetails AWS API Documentation
22888
+ #
22889
+ class EventDetails < Struct.new(
22890
+ :event_metadata)
22891
+ SENSITIVE = []
22892
+ include Aws::Structure
22893
+ end
22894
+
22895
+ # Metadata associated with a cluster event, which may include details
22896
+ # about various resource types.
22897
+ #
22898
+ # @note EventMetadata is a union - when returned from an API call exactly one value will be set and the returned type will be a subclass of EventMetadata corresponding to the set member.
22899
+ #
22900
+ # @!attribute [rw] cluster
22901
+ # Metadata specific to cluster-level events.
22902
+ # @return [Types::ClusterMetadata]
22903
+ #
22904
+ # @!attribute [rw] instance_group
22905
+ # Metadata specific to instance group-level events.
22906
+ # @return [Types::InstanceGroupMetadata]
22907
+ #
22908
+ # @!attribute [rw] instance_group_scaling
22909
+ # Metadata related to instance group scaling events.
22910
+ # @return [Types::InstanceGroupScalingMetadata]
22911
+ #
22912
+ # @!attribute [rw] instance
22913
+ # Metadata specific to instance-level events.
22914
+ # @return [Types::InstanceMetadata]
22915
+ #
22916
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/EventMetadata AWS API Documentation
22917
+ #
22918
+ class EventMetadata < Struct.new(
22919
+ :cluster,
22920
+ :instance_group,
22921
+ :instance_group_scaling,
22922
+ :instance,
22923
+ :unknown)
22924
+ SENSITIVE = []
22925
+ include Aws::Structure
22926
+ include Aws::Structure::Union
22927
+
22928
+ class Cluster < EventMetadata; end
22929
+ class InstanceGroup < EventMetadata; end
22930
+ class InstanceGroupScaling < EventMetadata; end
22931
+ class Instance < EventMetadata; end
22932
+ class Unknown < EventMetadata; end
22933
+ end
22934
+
22296
22935
  # The properties of an experiment as returned by the [Search][1] API.
22297
22936
  # For information about experiments, see the [CreateExperiment][2] API.
22298
22937
  #
@@ -27531,6 +28170,116 @@ module Aws::SageMaker
27531
28170
  include Aws::Structure
27532
28171
  end
27533
28172
 
28173
+ # Metadata information about an instance group in a HyperPod cluster.
28174
+ #
28175
+ # @!attribute [rw] failure_message
28176
+ # An error message describing why the instance group level operation
28177
+ # (such as creating, scaling, or deleting) failed.
28178
+ # @return [String]
28179
+ #
28180
+ # @!attribute [rw] availability_zone_id
28181
+ # The ID of the Availability Zone where the instance group is located.
28182
+ # @return [String]
28183
+ #
28184
+ # @!attribute [rw] capacity_reservation
28185
+ # Information about the Capacity Reservation used by the instance
28186
+ # group.
28187
+ # @return [Types::CapacityReservation]
28188
+ #
28189
+ # @!attribute [rw] subnet_id
28190
+ # The ID of the subnet where the instance group is located.
28191
+ # @return [String]
28192
+ #
28193
+ # @!attribute [rw] security_group_ids
28194
+ # A list of security group IDs associated with the instance group.
28195
+ # @return [Array<String>]
28196
+ #
28197
+ # @!attribute [rw] ami_override
28198
+ # If you use a custom Amazon Machine Image (AMI) for the instance
28199
+ # group, this field shows the ID of the custom AMI.
28200
+ # @return [String]
28201
+ #
28202
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/InstanceGroupMetadata AWS API Documentation
28203
+ #
28204
+ class InstanceGroupMetadata < Struct.new(
28205
+ :failure_message,
28206
+ :availability_zone_id,
28207
+ :capacity_reservation,
28208
+ :subnet_id,
28209
+ :security_group_ids,
28210
+ :ami_override)
28211
+ SENSITIVE = []
28212
+ include Aws::Structure
28213
+ end
28214
+
28215
+ # Metadata information about scaling operations for an instance group.
28216
+ #
28217
+ # @!attribute [rw] instance_count
28218
+ # The current number of instances in the group.
28219
+ # @return [Integer]
28220
+ #
28221
+ # @!attribute [rw] target_count
28222
+ # The desired number of instances for the group after scaling.
28223
+ # @return [Integer]
28224
+ #
28225
+ # @!attribute [rw] failure_message
28226
+ # An error message describing why the scaling operation failed, if
28227
+ # applicable.
28228
+ # @return [String]
28229
+ #
28230
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/InstanceGroupScalingMetadata AWS API Documentation
28231
+ #
28232
+ class InstanceGroupScalingMetadata < Struct.new(
28233
+ :instance_count,
28234
+ :target_count,
28235
+ :failure_message)
28236
+ SENSITIVE = []
28237
+ include Aws::Structure
28238
+ end
28239
+
28240
+ # Metadata information about an instance in a HyperPod cluster.
28241
+ #
28242
+ # @!attribute [rw] customer_eni
28243
+ # The ID of the customer-managed Elastic Network Interface (ENI)
28244
+ # associated with the instance.
28245
+ # @return [String]
28246
+ #
28247
+ # @!attribute [rw] additional_enis
28248
+ # Information about additional Elastic Network Interfaces (ENIs)
28249
+ # associated with the instance.
28250
+ # @return [Types::AdditionalEnis]
28251
+ #
28252
+ # @!attribute [rw] capacity_reservation
28253
+ # Information about the Capacity Reservation used by the instance.
28254
+ # @return [Types::CapacityReservation]
28255
+ #
28256
+ # @!attribute [rw] failure_message
28257
+ # An error message describing why the instance creation or update
28258
+ # failed, if applicable.
28259
+ # @return [String]
28260
+ #
28261
+ # @!attribute [rw] lcs_execution_state
28262
+ # The execution state of the Lifecycle Script (LCS) for the instance.
28263
+ # @return [String]
28264
+ #
28265
+ # @!attribute [rw] node_logical_id
28266
+ # The unique logical identifier of the node within the cluster. The ID
28267
+ # used here is the same object as in the `BatchAddClusterNodes` API.
28268
+ # @return [String]
28269
+ #
28270
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/InstanceMetadata AWS API Documentation
28271
+ #
28272
+ class InstanceMetadata < Struct.new(
28273
+ :customer_eni,
28274
+ :additional_enis,
28275
+ :capacity_reservation,
28276
+ :failure_message,
28277
+ :lcs_execution_state,
28278
+ :node_logical_id)
28279
+ SENSITIVE = []
28280
+ include Aws::Structure
28281
+ end
28282
+
27534
28283
  # Information on the IMDS configuration of the notebook instance
27535
28284
  #
27536
28285
  # @!attribute [rw] minimum_instance_metadata_service_version
@@ -27549,6 +28298,30 @@ module Aws::SageMaker
27549
28298
  include Aws::Structure
27550
28299
  end
27551
28300
 
28301
+ # Configuration for how instances are placed and allocated within
28302
+ # UltraServers. This is only applicable for UltraServer capacity.
28303
+ #
28304
+ # @!attribute [rw] enable_multiple_jobs
28305
+ # If set to true, allows multiple jobs to share the same UltraServer
28306
+ # instances. If set to false, ensures this job's instances are placed
28307
+ # on an UltraServer exclusively, with no other jobs sharing the same
28308
+ # UltraServer. Default is false.
28309
+ # @return [Boolean]
28310
+ #
28311
+ # @!attribute [rw] placement_specifications
28312
+ # A list of specifications for how instances should be placed on
28313
+ # specific UltraServers. Maximum of 10 items is supported.
28314
+ # @return [Array<Types::PlacementSpecification>]
28315
+ #
28316
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/InstancePlacementConfig AWS API Documentation
28317
+ #
28318
+ class InstancePlacementConfig < Struct.new(
28319
+ :enable_multiple_jobs,
28320
+ :placement_specifications)
28321
+ SENSITIVE = []
28322
+ include Aws::Structure
28323
+ end
28324
+
27552
28325
  # For a hyperparameter of the integer type, specifies the range that a
27553
28326
  # hyperparameter tuning job searches.
27554
28327
  #
@@ -29072,6 +29845,91 @@ module Aws::SageMaker
29072
29845
  include Aws::Structure
29073
29846
  end
29074
29847
 
29848
+ # @!attribute [rw] cluster_name
29849
+ # The name or Amazon Resource Name (ARN) of the HyperPod cluster for
29850
+ # which to list events.
29851
+ # @return [String]
29852
+ #
29853
+ # @!attribute [rw] instance_group_name
29854
+ # The name of the instance group to filter events. If specified, only
29855
+ # events related to this instance group are returned.
29856
+ # @return [String]
29857
+ #
29858
+ # @!attribute [rw] node_id
29859
+ # The EC2 instance ID to filter events. If specified, only events
29860
+ # related to this instance are returned.
29861
+ # @return [String]
29862
+ #
29863
+ # @!attribute [rw] event_time_after
29864
+ # The start of the time range for filtering events. Only events that
29865
+ # occurred after this time are included in the results.
29866
+ # @return [Time]
29867
+ #
29868
+ # @!attribute [rw] event_time_before
29869
+ # The end of the time range for filtering events. Only events that
29870
+ # occurred before this time are included in the results.
29871
+ # @return [Time]
29872
+ #
29873
+ # @!attribute [rw] sort_by
29874
+ # The field to use for sorting the event list. Currently, the only
29875
+ # supported value is `EventTime`.
29876
+ # @return [String]
29877
+ #
29878
+ # @!attribute [rw] sort_order
29879
+ # The order in which to sort the results. Valid values are `Ascending`
29880
+ # or `Descending` (the default is `Descending`).
29881
+ # @return [String]
29882
+ #
29883
+ # @!attribute [rw] resource_type
29884
+ # The type of resource for which to filter events. Valid values are
29885
+ # `Cluster`, `InstanceGroup`, or `Instance`.
29886
+ # @return [String]
29887
+ #
29888
+ # @!attribute [rw] max_results
29889
+ # The maximum number of events to return in the response. Valid range
29890
+ # is 1 to 100.
29891
+ # @return [Integer]
29892
+ #
29893
+ # @!attribute [rw] next_token
29894
+ # A token to retrieve the next set of results. This token is obtained
29895
+ # from the output of a previous `ListClusterEvents` call.
29896
+ # @return [String]
29897
+ #
29898
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ListClusterEventsRequest AWS API Documentation
29899
+ #
29900
+ class ListClusterEventsRequest < Struct.new(
29901
+ :cluster_name,
29902
+ :instance_group_name,
29903
+ :node_id,
29904
+ :event_time_after,
29905
+ :event_time_before,
29906
+ :sort_by,
29907
+ :sort_order,
29908
+ :resource_type,
29909
+ :max_results,
29910
+ :next_token)
29911
+ SENSITIVE = []
29912
+ include Aws::Structure
29913
+ end
29914
+
29915
+ # @!attribute [rw] next_token
29916
+ # A token to retrieve the next set of results. Include this token in
29917
+ # subsequent `ListClusterEvents` calls to fetch more events.
29918
+ # @return [String]
29919
+ #
29920
+ # @!attribute [rw] events
29921
+ # A list of event summaries matching the specified criteria.
29922
+ # @return [Array<Types::ClusterEventSummary>]
29923
+ #
29924
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ListClusterEventsResponse AWS API Documentation
29925
+ #
29926
+ class ListClusterEventsResponse < Struct.new(
29927
+ :next_token,
29928
+ :events)
29929
+ SENSITIVE = []
29930
+ include Aws::Structure
29931
+ end
29932
+
29075
29933
  # @!attribute [rw] cluster_name
29076
29934
  # The string name or the Amazon Resource Name (ARN) of the SageMaker
29077
29935
  # HyperPod cluster in which you want to retrieve the list of nodes.
@@ -29140,6 +29998,13 @@ module Aws::SageMaker
29140
29998
  # The sort order for results. The default value is `Ascending`.
29141
29999
  # @return [String]
29142
30000
  #
30001
+ # @!attribute [rw] include_node_logical_ids
30002
+ # Specifies whether to include nodes that are still being provisioned
30003
+ # in the response. When set to true, the response includes all nodes
30004
+ # regardless of their provisioning status. When set to `False`
30005
+ # (default), only nodes with assigned `InstanceIds` are returned.
30006
+ # @return [Boolean]
30007
+ #
29143
30008
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ListClusterNodesRequest AWS API Documentation
29144
30009
  #
29145
30010
  class ListClusterNodesRequest < Struct.new(
@@ -29150,7 +30015,8 @@ module Aws::SageMaker
29150
30015
  :max_results,
29151
30016
  :next_token,
29152
30017
  :sort_by,
29153
- :sort_order)
30018
+ :sort_order,
30019
+ :include_node_logical_ids)
29154
30020
  SENSITIVE = []
29155
30021
  include Aws::Structure
29156
30022
  end
@@ -34283,6 +35149,49 @@ module Aws::SageMaker
34283
35149
  include Aws::Structure
34284
35150
  end
34285
35151
 
35152
+ # @!attribute [rw] reserved_capacity_arn
35153
+ # The ARN of the reserved capacity to list UltraServers for.
35154
+ # @return [String]
35155
+ #
35156
+ # @!attribute [rw] max_results
35157
+ # The maximum number of UltraServers to return in the response. The
35158
+ # default value is 10.
35159
+ # @return [Integer]
35160
+ #
35161
+ # @!attribute [rw] next_token
35162
+ # If the previous response was truncated, you receive this token. Use
35163
+ # it in your next request to receive the next set of results.
35164
+ # @return [String]
35165
+ #
35166
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ListUltraServersByReservedCapacityRequest AWS API Documentation
35167
+ #
35168
+ class ListUltraServersByReservedCapacityRequest < Struct.new(
35169
+ :reserved_capacity_arn,
35170
+ :max_results,
35171
+ :next_token)
35172
+ SENSITIVE = []
35173
+ include Aws::Structure
35174
+ end
35175
+
35176
+ # @!attribute [rw] next_token
35177
+ # If the response is truncated, SageMaker returns this token. Use it
35178
+ # in the next request to retrieve the next set of UltraServers.
35179
+ # @return [String]
35180
+ #
35181
+ # @!attribute [rw] ultra_servers
35182
+ # A list of UltraServers that are part of the specified reserved
35183
+ # capacity.
35184
+ # @return [Array<Types::UltraServer>]
35185
+ #
35186
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ListUltraServersByReservedCapacityResponse AWS API Documentation
35187
+ #
35188
+ class ListUltraServersByReservedCapacityResponse < Struct.new(
35189
+ :next_token,
35190
+ :ultra_servers)
35191
+ SENSITIVE = []
35192
+ include Aws::Structure
35193
+ end
35194
+
34286
35195
  # @!attribute [rw] next_token
34287
35196
  # If the previous response was truncated, you will receive this token.
34288
35197
  # Use it in your next request to receive the next set of results.
@@ -37695,6 +38604,33 @@ module Aws::SageMaker
37695
38604
  include Aws::Structure
37696
38605
  end
37697
38606
 
38607
+ # Information about a node that was successfully added to the cluster.
38608
+ #
38609
+ # @!attribute [rw] node_logical_id
38610
+ # A unique identifier assigned to the node that can be used to track
38611
+ # its provisioning status through the `DescribeClusterNode` operation.
38612
+ # @return [String]
38613
+ #
38614
+ # @!attribute [rw] instance_group_name
38615
+ # The name of the instance group to which the node was added.
38616
+ # @return [String]
38617
+ #
38618
+ # @!attribute [rw] status
38619
+ # The current status of the node. Possible values include `Pending`,
38620
+ # `Running`, `Failed`, `ShuttingDown`, `SystemUpdating`,
38621
+ # `DeepHealthCheckInProgress`, and `NotFound`.
38622
+ # @return [String]
38623
+ #
38624
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/NodeAdditionResult AWS API Documentation
38625
+ #
38626
+ class NodeAdditionResult < Struct.new(
38627
+ :node_logical_id,
38628
+ :instance_group_name,
38629
+ :status)
38630
+ SENSITIVE = []
38631
+ include Aws::Structure
38632
+ end
38633
+
37698
38634
  # Provides a summary of a notebook instance lifecycle configuration.
37699
38635
  #
37700
38636
  # @!attribute [rw] notebook_instance_lifecycle_config_name
@@ -39813,6 +40749,27 @@ module Aws::SageMaker
39813
40749
  include Aws::Structure
39814
40750
  end
39815
40751
 
40752
+ # Specifies how instances should be placed on a specific UltraServer.
40753
+ #
40754
+ # @!attribute [rw] ultra_server_id
40755
+ # The unique identifier of the UltraServer where instances should be
40756
+ # placed.
40757
+ # @return [String]
40758
+ #
40759
+ # @!attribute [rw] instance_count
40760
+ # The number of ML compute instances required to be placed together on
40761
+ # the same UltraServer. Minimum value of 1.
40762
+ # @return [Integer]
40763
+ #
40764
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/PlacementSpecification AWS API Documentation
40765
+ #
40766
+ class PlacementSpecification < Struct.new(
40767
+ :ultra_server_id,
40768
+ :instance_count)
40769
+ SENSITIVE = []
40770
+ include Aws::Structure
40771
+ end
40772
+
39816
40773
  # A specification for a predefined metric.
39817
40774
  #
39818
40775
  # @!attribute [rw] predefined_metric_type
@@ -42691,6 +43648,20 @@ module Aws::SageMaker
42691
43648
  # SageMaker HyperPod clusters using Amazon SageMaker Training Plan, see
42692
43649
  # ` CreateTrainingPlan `.
42693
43650
  #
43651
+ # @!attribute [rw] reserved_capacity_type
43652
+ # The type of reserved capacity offering.
43653
+ # @return [String]
43654
+ #
43655
+ # @!attribute [rw] ultra_server_type
43656
+ # The type of UltraServer included in this reserved capacity offering,
43657
+ # such as ml.u-p6e-gb200x72.
43658
+ # @return [String]
43659
+ #
43660
+ # @!attribute [rw] ultra_server_count
43661
+ # The number of UltraServers included in this reserved capacity
43662
+ # offering.
43663
+ # @return [Integer]
43664
+ #
42694
43665
  # @!attribute [rw] instance_type
42695
43666
  # The instance type for the reserved capacity offering.
42696
43667
  # @return [String]
@@ -42724,6 +43695,9 @@ module Aws::SageMaker
42724
43695
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ReservedCapacityOffering AWS API Documentation
42725
43696
  #
42726
43697
  class ReservedCapacityOffering < Struct.new(
43698
+ :reserved_capacity_type,
43699
+ :ultra_server_type,
43700
+ :ultra_server_count,
42727
43701
  :instance_type,
42728
43702
  :instance_count,
42729
43703
  :availability_zone,
@@ -42745,6 +43719,19 @@ module Aws::SageMaker
42745
43719
  # The Amazon Resource Name (ARN); of the reserved capacity.
42746
43720
  # @return [String]
42747
43721
  #
43722
+ # @!attribute [rw] reserved_capacity_type
43723
+ # The type of reserved capacity.
43724
+ # @return [String]
43725
+ #
43726
+ # @!attribute [rw] ultra_server_type
43727
+ # The type of UltraServer included in this reserved capacity, such as
43728
+ # ml.u-p6e-gb200x72.
43729
+ # @return [String]
43730
+ #
43731
+ # @!attribute [rw] ultra_server_count
43732
+ # The number of UltraServers included in this reserved capacity.
43733
+ # @return [Integer]
43734
+ #
42748
43735
  # @!attribute [rw] instance_type
42749
43736
  # The instance type for the reserved capacity.
42750
43737
  # @return [String]
@@ -42783,6 +43770,9 @@ module Aws::SageMaker
42783
43770
  #
42784
43771
  class ReservedCapacitySummary < Struct.new(
42785
43772
  :reserved_capacity_arn,
43773
+ :reserved_capacity_type,
43774
+ :ultra_server_type,
43775
+ :ultra_server_count,
42786
43776
  :instance_type,
42787
43777
  :total_instance_count,
42788
43778
  :status,
@@ -42955,6 +43945,12 @@ module Aws::SageMaker
42955
43945
  # resource configuration.
42956
43946
  # @return [String]
42957
43947
  #
43948
+ # @!attribute [rw] instance_placement_config
43949
+ # Configuration for how training job instances are placed and
43950
+ # allocated within UltraServers. Only applicable for UltraServer
43951
+ # capacity.
43952
+ # @return [Types::InstancePlacementConfig]
43953
+ #
42958
43954
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ResourceConfig AWS API Documentation
42959
43955
  #
42960
43956
  class ResourceConfig < Struct.new(
@@ -42964,7 +43960,8 @@ module Aws::SageMaker
42964
43960
  :volume_kms_key_id,
42965
43961
  :keep_alive_period_in_seconds,
42966
43962
  :instance_groups,
42967
- :training_plan_arn)
43963
+ :training_plan_arn,
43964
+ :instance_placement_config)
42968
43965
  SENSITIVE = []
42969
43966
  include Aws::Structure
42970
43967
  end
@@ -44140,6 +45137,14 @@ module Aws::SageMaker
44140
45137
  # match your requirements.
44141
45138
  # @return [Integer]
44142
45139
  #
45140
+ # @!attribute [rw] ultra_server_type
45141
+ # The type of UltraServer to search for, such as ml.u-p6e-gb200x72.
45142
+ # @return [String]
45143
+ #
45144
+ # @!attribute [rw] ultra_server_count
45145
+ # The number of UltraServers to search for.
45146
+ # @return [Integer]
45147
+ #
44143
45148
  # @!attribute [rw] start_time_after
44144
45149
  # A filter to search for training plan offerings with a start time
44145
45150
  # after a specified date.
@@ -44172,6 +45177,8 @@ module Aws::SageMaker
44172
45177
  class SearchTrainingPlanOfferingsRequest < Struct.new(
44173
45178
  :instance_type,
44174
45179
  :instance_count,
45180
+ :ultra_server_type,
45181
+ :ultra_server_count,
44175
45182
  :start_time_after,
44176
45183
  :end_time_before,
44177
45184
  :duration_hours,
@@ -47523,6 +48530,10 @@ module Aws::SageMaker
47523
48530
  # The number of instances currently in use from this training plan.
47524
48531
  # @return [Integer]
47525
48532
  #
48533
+ # @!attribute [rw] total_ultra_server_count
48534
+ # The total number of UltraServers allocated to this training plan.
48535
+ # @return [Integer]
48536
+ #
47526
48537
  # @!attribute [rw] target_resources
47527
48538
  # The target resources (e.g., training jobs, HyperPod clusters) that
47528
48539
  # can use this training plan.
@@ -47558,6 +48569,7 @@ module Aws::SageMaker
47558
48569
  :total_instance_count,
47559
48570
  :available_instance_count,
47560
48571
  :in_use_instance_count,
48572
+ :total_ultra_server_count,
47561
48573
  :target_resources,
47562
48574
  :reserved_capacity_summaries)
47563
48575
  SENSITIVE = []
@@ -49072,6 +50084,125 @@ module Aws::SageMaker
49072
50084
  include Aws::Structure
49073
50085
  end
49074
50086
 
50087
+ # Represents a high-performance compute server used for distributed
50088
+ # training in SageMaker AI. An UltraServer consists of multiple
50089
+ # instances within a shared NVLink interconnect domain.
50090
+ #
50091
+ # @!attribute [rw] ultra_server_id
50092
+ # The unique identifier for the UltraServer.
50093
+ # @return [String]
50094
+ #
50095
+ # @!attribute [rw] ultra_server_type
50096
+ # The type of UltraServer, such as ml.u-p6e-gb200x72.
50097
+ # @return [String]
50098
+ #
50099
+ # @!attribute [rw] availability_zone
50100
+ # The name of the Availability Zone where the UltraServer is
50101
+ # provisioned.
50102
+ # @return [String]
50103
+ #
50104
+ # @!attribute [rw] instance_type
50105
+ # The Amazon EC2 instance type used in the UltraServer.
50106
+ # @return [String]
50107
+ #
50108
+ # @!attribute [rw] total_instance_count
50109
+ # The total number of instances in this UltraServer.
50110
+ # @return [Integer]
50111
+ #
50112
+ # @!attribute [rw] configured_spare_instance_count
50113
+ # The number of spare instances configured for this UltraServer to
50114
+ # provide enhanced resiliency.
50115
+ # @return [Integer]
50116
+ #
50117
+ # @!attribute [rw] available_instance_count
50118
+ # The number of instances currently available for use in this
50119
+ # UltraServer.
50120
+ # @return [Integer]
50121
+ #
50122
+ # @!attribute [rw] in_use_instance_count
50123
+ # The number of instances currently in use in this UltraServer.
50124
+ # @return [Integer]
50125
+ #
50126
+ # @!attribute [rw] available_spare_instance_count
50127
+ # The number of available spare instances in the UltraServer.
50128
+ # @return [Integer]
50129
+ #
50130
+ # @!attribute [rw] unhealthy_instance_count
50131
+ # The number of instances in this UltraServer that are currently in an
50132
+ # unhealthy state.
50133
+ # @return [Integer]
50134
+ #
50135
+ # @!attribute [rw] health_status
50136
+ # The overall health status of the UltraServer.
50137
+ # @return [String]
50138
+ #
50139
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/UltraServer AWS API Documentation
50140
+ #
50141
+ class UltraServer < Struct.new(
50142
+ :ultra_server_id,
50143
+ :ultra_server_type,
50144
+ :availability_zone,
50145
+ :instance_type,
50146
+ :total_instance_count,
50147
+ :configured_spare_instance_count,
50148
+ :available_instance_count,
50149
+ :in_use_instance_count,
50150
+ :available_spare_instance_count,
50151
+ :unhealthy_instance_count,
50152
+ :health_status)
50153
+ SENSITIVE = []
50154
+ include Aws::Structure
50155
+ end
50156
+
50157
+ # Contains information about the UltraServer object.
50158
+ #
50159
+ # @!attribute [rw] id
50160
+ # The unique identifier of the UltraServer.
50161
+ # @return [String]
50162
+ #
50163
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/UltraServerInfo AWS API Documentation
50164
+ #
50165
+ class UltraServerInfo < Struct.new(
50166
+ :id)
50167
+ SENSITIVE = []
50168
+ include Aws::Structure
50169
+ end
50170
+
50171
+ # A summary of UltraServer resources and their current status.
50172
+ #
50173
+ # @!attribute [rw] ultra_server_type
50174
+ # The type of UltraServer, such as ml.u-p6e-gb200x72.
50175
+ # @return [String]
50176
+ #
50177
+ # @!attribute [rw] instance_type
50178
+ # The Amazon EC2 instance type used in the UltraServer.
50179
+ # @return [String]
50180
+ #
50181
+ # @!attribute [rw] ultra_server_count
50182
+ # The number of UltraServers of this type.
50183
+ # @return [Integer]
50184
+ #
50185
+ # @!attribute [rw] available_spare_instance_count
50186
+ # The number of available spare instances in the UltraServers.
50187
+ # @return [Integer]
50188
+ #
50189
+ # @!attribute [rw] unhealthy_instance_count
50190
+ # The total number of instances across all UltraServers of this type
50191
+ # that are currently in an unhealthy state.
50192
+ # @return [Integer]
50193
+ #
50194
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/UltraServerSummary AWS API Documentation
50195
+ #
50196
+ class UltraServerSummary < Struct.new(
50197
+ :ultra_server_type,
50198
+ :instance_type,
50199
+ :ultra_server_count,
50200
+ :available_spare_instance_count,
50201
+ :unhealthy_instance_count)
50202
+ SENSITIVE = []
50203
+ include Aws::Structure
50204
+ end
50205
+
49075
50206
  # The settings that apply to an Amazon SageMaker AI domain when you use
49076
50207
  # it in Amazon SageMaker Unified Studio.
49077
50208
  #
@@ -49393,12 +50524,40 @@ module Aws::SageMaker
49393
50524
  # The configuration to use when updating the AMI versions.
49394
50525
  # @return [Types::DeploymentConfiguration]
49395
50526
  #
50527
+ # @!attribute [rw] image_id
50528
+ # When configuring your HyperPod cluster, you can specify an image ID
50529
+ # using one of the following options:
50530
+ #
50531
+ # * `HyperPodPublicAmiId`: Use a HyperPod public AMI
50532
+ #
50533
+ # * `CustomAmiId`: Use your custom AMI
50534
+ #
50535
+ # * `default`: Use the default latest system image
50536
+ #
50537
+ # f you choose to use a custom AMI (`CustomAmiId`), ensure it meets
50538
+ # the following requirements:
50539
+ #
50540
+ # * Encryption: The custom AMI must be unencrypted.
50541
+ #
50542
+ # * Ownership: The custom AMI must be owned by the same Amazon Web
50543
+ # Services account that is creating the HyperPod cluster.
50544
+ #
50545
+ # * Volume support: Only the primary AMI snapshot volume is supported;
50546
+ # additional AMI volumes are not supported.
50547
+ #
50548
+ # When updating the instance group's AMI through the
50549
+ # `UpdateClusterSoftware` operation, if an instance group uses a
50550
+ # custom AMI, you must provide an `ImageId` or use the default as
50551
+ # input.
50552
+ # @return [String]
50553
+ #
49396
50554
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/UpdateClusterSoftwareRequest AWS API Documentation
49397
50555
  #
49398
50556
  class UpdateClusterSoftwareRequest < Struct.new(
49399
50557
  :cluster_name,
49400
50558
  :instance_groups,
49401
- :deployment_config)
50559
+ :deployment_config,
50560
+ :image_id)
49402
50561
  SENSITIVE = []
49403
50562
  include Aws::Structure
49404
50563
  end