sagemaker-core 1.0.47__py3-none-any.whl → 1.0.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -106,7 +106,8 @@ SAGEMAKER_PYTHON_SDK_CONFIG_SCHEMA = {
106
106
  "type": "array",
107
107
  "items": {"type": "string"},
108
108
  },
109
- }
109
+ },
110
+ "cluster_role": {"type": "string"},
110
111
  },
111
112
  },
112
113
  "CompilationJob": {
@@ -978,6 +978,7 @@ class App(Base):
978
978
  user_profile_name: The user profile name.
979
979
  space_name: The name of the space. If this value is not set, then UserProfileName must be set.
980
980
  status: The status.
981
+ effective_trusted_identity_propagation_status: The effective status of Trusted Identity Propagation (TIP) for this application. When enabled, user identities from IAM Identity Center are being propagated through the application to TIP enabled Amazon Web Services services. When disabled, standard IAM role-based access is used.
981
982
  recovery_mode: Indicates whether the application is launched in recovery mode.
982
983
  last_health_check_timestamp: The timestamp of the last health check.
983
984
  last_user_activity_timestamp: The timestamp of the last user's activity. LastUserActivityTimestamp is also updated when SageMaker AI performs health checks without user activity. As a result, this value is set to the same value as LastHealthCheckTimestamp.
@@ -995,6 +996,7 @@ class App(Base):
995
996
  user_profile_name: Optional[str] = Unassigned()
996
997
  space_name: Optional[str] = Unassigned()
997
998
  status: Optional[str] = Unassigned()
999
+ effective_trusted_identity_propagation_status: Optional[str] = Unassigned()
998
1000
  recovery_mode: Optional[bool] = Unassigned()
999
1001
  last_health_check_timestamp: Optional[datetime.datetime] = Unassigned()
1000
1002
  last_user_activity_timestamp: Optional[datetime.datetime] = Unassigned()
@@ -3222,7 +3224,11 @@ class Cluster(Base):
3222
3224
  restricted_instance_groups: The specialized instance groups for training models like Amazon Nova to be created in the SageMaker HyperPod cluster.
3223
3225
  vpc_config:
3224
3226
  orchestrator: The type of orchestrator used for the SageMaker HyperPod cluster.
3227
+ tiered_storage_config: The current configuration for managed tier checkpointing on the HyperPod cluster. For example, this shows whether the feature is enabled and the percentage of cluster memory allocated for checkpoint storage.
3225
3228
  node_recovery: The node recovery mode configured for the SageMaker HyperPod cluster.
3229
+ node_provisioning_mode: The mode used for provisioning nodes in the cluster.
3230
+ cluster_role: The Amazon Resource Name (ARN) of the IAM role that HyperPod uses for cluster autoscaling operations.
3231
+ auto_scaling: The current autoscaling configuration and status for the autoscaler.
3226
3232
 
3227
3233
  """
3228
3234
 
@@ -3237,7 +3243,11 @@ class Cluster(Base):
3237
3243
  )
3238
3244
  vpc_config: Optional[shapes.VpcConfig] = Unassigned()
3239
3245
  orchestrator: Optional[shapes.ClusterOrchestrator] = Unassigned()
3246
+ tiered_storage_config: Optional[shapes.ClusterTieredStorageConfig] = Unassigned()
3240
3247
  node_recovery: Optional[str] = Unassigned()
3248
+ node_provisioning_mode: Optional[str] = Unassigned()
3249
+ cluster_role: Optional[str] = Unassigned()
3250
+ auto_scaling: Optional[shapes.ClusterAutoScalingConfigOutput] = Unassigned()
3241
3251
 
3242
3252
  def get_name(self) -> str:
3243
3253
  attributes = vars(self)
@@ -3262,7 +3272,8 @@ class Cluster(Base):
3262
3272
  "vpc_config": {
3263
3273
  "security_group_ids": {"type": "array", "items": {"type": "string"}},
3264
3274
  "subnets": {"type": "array", "items": {"type": "string"}},
3265
- }
3275
+ },
3276
+ "cluster_role": {"type": "string"},
3266
3277
  }
3267
3278
  return create_func(
3268
3279
  *args,
@@ -3287,6 +3298,10 @@ class Cluster(Base):
3287
3298
  tags: Optional[List[shapes.Tag]] = Unassigned(),
3288
3299
  orchestrator: Optional[shapes.ClusterOrchestrator] = Unassigned(),
3289
3300
  node_recovery: Optional[str] = Unassigned(),
3301
+ tiered_storage_config: Optional[shapes.ClusterTieredStorageConfig] = Unassigned(),
3302
+ node_provisioning_mode: Optional[str] = Unassigned(),
3303
+ cluster_role: Optional[str] = Unassigned(),
3304
+ auto_scaling: Optional[shapes.ClusterAutoScalingConfig] = Unassigned(),
3290
3305
  session: Optional[Session] = None,
3291
3306
  region: Optional[str] = None,
3292
3307
  ) -> Optional["Cluster"]:
@@ -3299,8 +3314,12 @@ class Cluster(Base):
3299
3314
  restricted_instance_groups: The specialized instance groups for training models like Amazon Nova to be created in the SageMaker HyperPod cluster.
3300
3315
  vpc_config: Specifies the Amazon Virtual Private Cloud (VPC) that is associated with the Amazon SageMaker HyperPod cluster. You can control access to and from your resources by configuring your VPC. For more information, see Give SageMaker access to resources in your Amazon VPC. When your Amazon VPC and subnets support IPv6, network communications differ based on the cluster orchestration platform: Slurm-orchestrated clusters automatically configure nodes with dual IPv6 and IPv4 addresses, allowing immediate IPv6 network communications. In Amazon EKS-orchestrated clusters, nodes receive dual-stack addressing, but pods can only use IPv6 when the Amazon EKS cluster is explicitly IPv6-enabled. For information about deploying an IPv6 Amazon EKS cluster, see Amazon EKS IPv6 Cluster Deployment. Additional resources for IPv6 configuration: For information about adding IPv6 support to your VPC, see to IPv6 Support for VPC. For information about creating a new IPv6-compatible VPC, see Amazon VPC Creation Guide. To configure SageMaker HyperPod with a custom Amazon VPC, see Custom Amazon VPC Setup for SageMaker HyperPod.
3301
3316
  tags: Custom tags for managing the SageMaker HyperPod cluster as an Amazon Web Services resource. You can add tags to your cluster in the same way you add them in other Amazon Web Services services that support tagging. To learn more about tagging Amazon Web Services resources in general, see Tagging Amazon Web Services Resources User Guide.
3302
- orchestrator: The type of orchestrator to use for the SageMaker HyperPod cluster. Currently, the only supported value is "eks", which is to use an Amazon Elastic Kubernetes Service (EKS) cluster as the orchestrator.
3317
+ orchestrator: The type of orchestrator to use for the SageMaker HyperPod cluster. Currently, the only supported value is "eks", which is to use an Amazon Elastic Kubernetes Service cluster as the orchestrator.
3303
3318
  node_recovery: The node recovery mode for the SageMaker HyperPod cluster. When set to Automatic, SageMaker HyperPod will automatically reboot or replace faulty nodes when issues are detected. When set to None, cluster administrators will need to manually manage any faulty cluster instances.
3319
+ tiered_storage_config: The configuration for managed tier checkpointing on the HyperPod cluster. When enabled, this feature uses a multi-tier storage approach for storing model checkpoints, providing faster checkpoint operations and improved fault tolerance across cluster nodes.
3320
+ node_provisioning_mode: The mode for provisioning nodes in the cluster. You can specify the following modes: Continuous: Scaling behavior that enables 1) concurrent operation execution within instance groups, 2) continuous retry mechanisms for failed operations, 3) enhanced customer visibility into cluster events through detailed event streams, 4) partial provisioning capabilities. Your clusters and instance groups remain InService while scaling. This mode is only supported for EKS orchestrated clusters.
3321
+ cluster_role: The Amazon Resource Name (ARN) of the IAM role that HyperPod assumes to perform cluster autoscaling operations. This role must have permissions for sagemaker:BatchAddClusterNodes and sagemaker:BatchDeleteClusterNodes. This is only required when autoscaling is enabled and when HyperPod is performing autoscaling operations.
3322
+ auto_scaling: The autoscaling configuration for the cluster. Enables automatic scaling of cluster nodes based on workload demand using a Karpenter-based system.
3304
3323
  session: Boto3 session.
3305
3324
  region: Region name.
3306
3325
 
@@ -3337,6 +3356,10 @@ class Cluster(Base):
3337
3356
  "Tags": tags,
3338
3357
  "Orchestrator": orchestrator,
3339
3358
  "NodeRecovery": node_recovery,
3359
+ "TieredStorageConfig": tiered_storage_config,
3360
+ "NodeProvisioningMode": node_provisioning_mode,
3361
+ "ClusterRole": cluster_role,
3362
+ "AutoScaling": auto_scaling,
3340
3363
  }
3341
3364
 
3342
3365
  operation_input_args = Base.populate_chained_attributes(
@@ -3450,8 +3473,11 @@ class Cluster(Base):
3450
3473
  restricted_instance_groups: Optional[
3451
3474
  List[shapes.ClusterRestrictedInstanceGroupSpecification]
3452
3475
  ] = Unassigned(),
3476
+ tiered_storage_config: Optional[shapes.ClusterTieredStorageConfig] = Unassigned(),
3453
3477
  node_recovery: Optional[str] = Unassigned(),
3454
3478
  instance_groups_to_delete: Optional[List[str]] = Unassigned(),
3479
+ cluster_role: Optional[str] = Unassigned(),
3480
+ auto_scaling: Optional[shapes.ClusterAutoScalingConfig] = Unassigned(),
3455
3481
  ) -> Optional["Cluster"]:
3456
3482
  """
3457
3483
  Update a Cluster resource
@@ -3484,8 +3510,11 @@ class Cluster(Base):
3484
3510
  "ClusterName": self.cluster_name,
3485
3511
  "InstanceGroups": instance_groups,
3486
3512
  "RestrictedInstanceGroups": restricted_instance_groups,
3513
+ "TieredStorageConfig": tiered_storage_config,
3487
3514
  "NodeRecovery": node_recovery,
3488
3515
  "InstanceGroupsToDelete": instance_groups_to_delete,
3516
+ "ClusterRole": cluster_role,
3517
+ "AutoScaling": auto_scaling,
3489
3518
  }
3490
3519
  logger.debug(f"Input request: {operation_input_args}")
3491
3520
  # serialize the input request
@@ -3731,6 +3760,7 @@ class Cluster(Base):
3731
3760
  def get_node(
3732
3761
  self,
3733
3762
  node_id: Optional[str] = Unassigned(),
3763
+ node_logical_id: Optional[str] = Unassigned(),
3734
3764
  session: Optional[Session] = None,
3735
3765
  region: Optional[str] = None,
3736
3766
  ) -> Optional[shapes.ClusterNodeDetails]:
@@ -3739,6 +3769,7 @@ class Cluster(Base):
3739
3769
 
3740
3770
  Parameters:
3741
3771
  node_id: The ID of the SageMaker HyperPod cluster node.
3772
+ node_logical_id: The logical identifier of the node to describe. You can specify either NodeLogicalId or InstanceId, but not both. NodeLogicalId can be used to describe nodes that are still being provisioned and don't yet have an InstanceId assigned.
3742
3773
  session: Boto3 session.
3743
3774
  region: Region name.
3744
3775
 
@@ -3761,6 +3792,7 @@ class Cluster(Base):
3761
3792
  operation_input_args = {
3762
3793
  "ClusterName": self.cluster_name,
3763
3794
  "NodeId": node_id,
3795
+ "NodeLogicalId": node_logical_id,
3764
3796
  }
3765
3797
  # serialize the input request
3766
3798
  operation_input_args = serialize(operation_input_args)
@@ -3785,6 +3817,7 @@ class Cluster(Base):
3785
3817
  instance_group_name_contains: Optional[str] = Unassigned(),
3786
3818
  sort_by: Optional[str] = Unassigned(),
3787
3819
  sort_order: Optional[str] = Unassigned(),
3820
+ include_node_logical_ids: Optional[bool] = Unassigned(),
3788
3821
  session: Optional[Session] = None,
3789
3822
  region: Optional[str] = None,
3790
3823
  ) -> ResourceIterator[shapes.ClusterNodeDetails]:
@@ -3799,6 +3832,7 @@ class Cluster(Base):
3799
3832
  next_token: If the result of the previous ListClusterNodes request was truncated, the response includes a NextToken. To retrieve the next set of cluster nodes, use the token in the next request.
3800
3833
  sort_by: The field by which to sort results. The default value is CREATION_TIME.
3801
3834
  sort_order: The sort order for results. The default value is Ascending.
3835
+ include_node_logical_ids: Specifies whether to include nodes that are still being provisioned in the response. When set to true, the response includes all nodes regardless of their provisioning status. When set to False (default), only nodes with assigned InstanceIds are returned.
3802
3836
  session: Boto3 session.
3803
3837
  region: Region name.
3804
3838
 
@@ -3825,6 +3859,7 @@ class Cluster(Base):
3825
3859
  "InstanceGroupNameContains": instance_group_name_contains,
3826
3860
  "SortBy": sort_by,
3827
3861
  "SortOrder": sort_order,
3862
+ "IncludeNodeLogicalIds": include_node_logical_ids,
3828
3863
  }
3829
3864
  # serialize the input request
3830
3865
  operation_input_args = serialize(operation_input_args)
@@ -3847,6 +3882,7 @@ class Cluster(Base):
3847
3882
  def update_software(
3848
3883
  self,
3849
3884
  deployment_config: Optional[shapes.DeploymentConfiguration] = Unassigned(),
3885
+ image_id: Optional[str] = Unassigned(),
3850
3886
  session: Optional[Session] = None,
3851
3887
  region: Optional[str] = None,
3852
3888
  ) -> None:
@@ -3855,6 +3891,7 @@ class Cluster(Base):
3855
3891
 
3856
3892
  Parameters:
3857
3893
  deployment_config: The configuration to use when updating the AMI versions.
3894
+ image_id: When configuring your HyperPod cluster, you can specify an image ID using one of the following options: HyperPodPublicAmiId: Use a HyperPod public AMI CustomAmiId: Use your custom AMI default: Use the default latest system image If you choose to use a custom AMI (CustomAmiId), ensure it meets the following requirements: Encryption: The custom AMI must be unencrypted. Ownership: The custom AMI must be owned by the same Amazon Web Services account that is creating the HyperPod cluster. Volume support: Only the primary AMI snapshot volume is supported; additional AMI volumes are not supported. When updating the instance group's AMI through the UpdateClusterSoftware operation, if an instance group uses a custom AMI, you must provide an ImageId or use the default as input. Note that if you don't specify an instance group in your UpdateClusterSoftware request, then all of the instance groups are patched with the specified image.
3858
3895
  session: Boto3 session.
3859
3896
  region: Region name.
3860
3897
 
@@ -3876,6 +3913,7 @@ class Cluster(Base):
3876
3913
  "ClusterName": self.cluster_name,
3877
3914
  "InstanceGroups": self.instance_groups,
3878
3915
  "DeploymentConfig": deployment_config,
3916
+ "ImageId": image_id,
3879
3917
  }
3880
3918
  # serialize the input request
3881
3919
  operation_input_args = serialize(operation_input_args)
@@ -3893,6 +3931,7 @@ class Cluster(Base):
3893
3931
  def batch_delete_nodes(
3894
3932
  self,
3895
3933
  node_ids: Optional[List[str]] = Unassigned(),
3934
+ node_logical_ids: Optional[List[str]] = Unassigned(),
3896
3935
  session: Optional[Session] = None,
3897
3936
  region: Optional[str] = None,
3898
3937
  ) -> Optional[shapes.BatchDeleteClusterNodesResponse]:
@@ -3901,6 +3940,7 @@ class Cluster(Base):
3901
3940
 
3902
3941
  Parameters:
3903
3942
  node_ids: A list of node IDs to be deleted from the specified cluster. For SageMaker HyperPod clusters using the Slurm workload manager, you cannot remove instances that are configured as Slurm controller nodes. If you need to delete more than 99 instances, contact Support for assistance.
3943
+ node_logical_ids: A list of NodeLogicalIds identifying the nodes to be deleted. You can specify up to 50 NodeLogicalIds. You must specify either NodeLogicalIds, InstanceIds, or both, with a combined maximum of 50 identifiers.
3904
3944
  session: Boto3 session.
3905
3945
  region: Region name.
3906
3946
 
@@ -3923,6 +3963,7 @@ class Cluster(Base):
3923
3963
  operation_input_args = {
3924
3964
  "ClusterName": self.cluster_name,
3925
3965
  "NodeIds": node_ids,
3966
+ "NodeLogicalIds": node_logical_ids,
3926
3967
  }
3927
3968
  # serialize the input request
3928
3969
  operation_input_args = serialize(operation_input_args)
@@ -9407,20 +9448,20 @@ class Endpoint(Base):
9407
9448
  region: Optional[str] = None,
9408
9449
  ) -> Optional[shapes.InvokeEndpointOutput]:
9409
9450
  """
9410
- After you deploy a model into production using Amazon SageMaker hosting services, your client applications use this API to get inferences from the model hosted at the specified endpoint.
9451
+ After you deploy a model into production using Amazon SageMaker AI hosting services, your client applications use this API to get inferences from the model hosted at the specified endpoint.
9411
9452
 
9412
9453
  Parameters:
9413
- body: Provides input data, in the format specified in the ContentType request header. Amazon SageMaker passes all of the data in the body to the model. For information about the format of the request body, see Common Data Formats-Inference.
9454
+ body: Provides input data, in the format specified in the ContentType request header. Amazon SageMaker AI passes all of the data in the body to the model. For information about the format of the request body, see Common Data Formats-Inference.
9414
9455
  content_type: The MIME type of the input data in the request body.
9415
9456
  accept: The desired MIME type of the inference response from the model container.
9416
- custom_attributes: Provides additional information about a request for an inference submitted to a model hosted at an Amazon SageMaker endpoint. The information is an opaque value that is forwarded verbatim. You could use this value, for example, to provide an ID that you can use to track a request or to provide other metadata that a service endpoint was programmed to process. The value must consist of no more than 1024 visible US-ASCII characters as specified in Section 3.3.6. Field Value Components of the Hypertext Transfer Protocol (HTTP/1.1). The code in your model is responsible for setting or updating any custom attributes in the response. If your code does not set this value in the response, an empty value is returned. For example, if a custom attribute represents the trace ID, your model can prepend the custom attribute with Trace ID: in your post-processing function. This feature is currently supported in the Amazon Web Services SDKs but not in the Amazon SageMaker Python SDK.
9457
+ custom_attributes: Provides additional information about a request for an inference submitted to a model hosted at an Amazon SageMaker AI endpoint. The information is an opaque value that is forwarded verbatim. You could use this value, for example, to provide an ID that you can use to track a request or to provide other metadata that a service endpoint was programmed to process. The value must consist of no more than 1024 visible US-ASCII characters as specified in Section 3.3.6. Field Value Components of the Hypertext Transfer Protocol (HTTP/1.1). The code in your model is responsible for setting or updating any custom attributes in the response. If your code does not set this value in the response, an empty value is returned. For example, if a custom attribute represents the trace ID, your model can prepend the custom attribute with Trace ID: in your post-processing function. This feature is currently supported in the Amazon Web Services SDKs but not in the Amazon SageMaker AI Python SDK.
9417
9458
  target_model: The model to request for inference when invoking a multi-model endpoint.
9418
9459
  target_variant: Specify the production variant to send the inference request to when invoking an endpoint that is running two or more variants. Note that this parameter overrides the default behavior for the endpoint, which is to distribute the invocation traffic based on the variant weights. For information about how to use variant targeting to perform a/b testing, see Test models in production
9419
9460
  target_container_hostname: If the endpoint hosts multiple containers and is configured to use direct invocation, this parameter specifies the host name of the container to invoke.
9420
9461
  inference_id: If you provide a value, it is added to the captured data when you enable data capture on the endpoint. For information about data capture, see Capture Data.
9421
9462
  enable_explanations: An optional JMESPath expression used to override the EnableExplanations parameter of the ClarifyExplainerConfig API. See the EnableExplanations section in the developer guide for more information.
9422
9463
  inference_component_name: If the endpoint hosts one or more inference components, this parameter specifies the name of inference component to invoke.
9423
- session_id: Creates a stateful session or identifies an existing one. You can do one of the following: Create a stateful session by specifying the value NEW_SESSION. Send your request to an existing stateful session by specifying the ID of that session. With a stateful session, you can send multiple requests to a stateful model. When you create a session with a stateful model, the model must create the session ID and set the expiration time. The model must also provide that information in the response to your request. You can get the ID and timestamp from the NewSessionId response parameter. For any subsequent request where you specify that session ID, SageMaker routes the request to the same instance that supports the session.
9464
+ session_id: Creates a stateful session or identifies an existing one. You can do one of the following: Create a stateful session by specifying the value NEW_SESSION. Send your request to an existing stateful session by specifying the ID of that session. With a stateful session, you can send multiple requests to a stateful model. When you create a session with a stateful model, the model must create the session ID and set the expiration time. The model must also provide that information in the response to your request. You can get the ID and timestamp from the NewSessionId response parameter. For any subsequent request where you specify that session ID, SageMaker AI routes the request to the same instance that supports the session.
9424
9465
  session: Boto3 session.
9425
9466
  region: Region name.
9426
9467
 
@@ -9488,14 +9529,14 @@ class Endpoint(Base):
9488
9529
  region: Optional[str] = None,
9489
9530
  ) -> Optional[shapes.InvokeEndpointAsyncOutput]:
9490
9531
  """
9491
- After you deploy a model into production using Amazon SageMaker hosting services, your client applications use this API to get inferences from the model hosted at the specified endpoint in an asynchronous manner.
9532
+ After you deploy a model into production using Amazon SageMaker AI hosting services, your client applications use this API to get inferences from the model hosted at the specified endpoint in an asynchronous manner.
9492
9533
 
9493
9534
  Parameters:
9494
9535
  input_location: The Amazon S3 URI where the inference request payload is stored.
9495
9536
  content_type: The MIME type of the input data in the request body.
9496
9537
  accept: The desired MIME type of the inference response from the model container.
9497
- custom_attributes: Provides additional information about a request for an inference submitted to a model hosted at an Amazon SageMaker endpoint. The information is an opaque value that is forwarded verbatim. You could use this value, for example, to provide an ID that you can use to track a request or to provide other metadata that a service endpoint was programmed to process. The value must consist of no more than 1024 visible US-ASCII characters as specified in Section 3.3.6. Field Value Components of the Hypertext Transfer Protocol (HTTP/1.1). The code in your model is responsible for setting or updating any custom attributes in the response. If your code does not set this value in the response, an empty value is returned. For example, if a custom attribute represents the trace ID, your model can prepend the custom attribute with Trace ID: in your post-processing function. This feature is currently supported in the Amazon Web Services SDKs but not in the Amazon SageMaker Python SDK.
9498
- inference_id: The identifier for the inference request. Amazon SageMaker will generate an identifier for you if none is specified.
9538
+ custom_attributes: Provides additional information about a request for an inference submitted to a model hosted at an Amazon SageMaker AI endpoint. The information is an opaque value that is forwarded verbatim. You could use this value, for example, to provide an ID that you can use to track a request or to provide other metadata that a service endpoint was programmed to process. The value must consist of no more than 1024 visible US-ASCII characters as specified in Section 3.3.6. Field Value Components of the Hypertext Transfer Protocol (HTTP/1.1). The code in your model is responsible for setting or updating any custom attributes in the response. If your code does not set this value in the response, an empty value is returned. For example, if a custom attribute represents the trace ID, your model can prepend the custom attribute with Trace ID: in your post-processing function. This feature is currently supported in the Amazon Web Services SDKs but not in the Amazon SageMaker AI Python SDK.
9539
+ inference_id: The identifier for the inference request. Amazon SageMaker AI will generate an identifier for you if none is specified.
9499
9540
  request_ttl_seconds: Maximum age in seconds a request can be in the queue before it is marked as expired. The default is 6 hours, or 21,600 seconds.
9500
9541
  invocation_timeout_seconds: Maximum amount of time in seconds a request can be processed before it is marked as expired. The default is 15 minutes, or 900 seconds.
9501
9542
  session: Boto3 session.
@@ -9563,10 +9604,10 @@ class Endpoint(Base):
9563
9604
  Invokes a model at the specified endpoint to return the inference response as a stream.
9564
9605
 
9565
9606
  Parameters:
9566
- body: Provides input data, in the format specified in the ContentType request header. Amazon SageMaker passes all of the data in the body to the model. For information about the format of the request body, see Common Data Formats-Inference.
9607
+ body: Provides input data, in the format specified in the ContentType request header. Amazon SageMaker AI passes all of the data in the body to the model. For information about the format of the request body, see Common Data Formats-Inference.
9567
9608
  content_type: The MIME type of the input data in the request body.
9568
9609
  accept: The desired MIME type of the inference response from the model container.
9569
- custom_attributes: Provides additional information about a request for an inference submitted to a model hosted at an Amazon SageMaker endpoint. The information is an opaque value that is forwarded verbatim. You could use this value, for example, to provide an ID that you can use to track a request or to provide other metadata that a service endpoint was programmed to process. The value must consist of no more than 1024 visible US-ASCII characters as specified in Section 3.3.6. Field Value Components of the Hypertext Transfer Protocol (HTTP/1.1). The code in your model is responsible for setting or updating any custom attributes in the response. If your code does not set this value in the response, an empty value is returned. For example, if a custom attribute represents the trace ID, your model can prepend the custom attribute with Trace ID: in your post-processing function. This feature is currently supported in the Amazon Web Services SDKs but not in the Amazon SageMaker Python SDK.
9610
+ custom_attributes: Provides additional information about a request for an inference submitted to a model hosted at an Amazon SageMaker AI endpoint. The information is an opaque value that is forwarded verbatim. You could use this value, for example, to provide an ID that you can use to track a request or to provide other metadata that a service endpoint was programmed to process. The value must consist of no more than 1024 visible US-ASCII characters as specified in Section 3.3.6. Field Value Components of the Hypertext Transfer Protocol (HTTP/1.1). The code in your model is responsible for setting or updating any custom attributes in the response. If your code does not set this value in the response, an empty value is returned. For example, if a custom attribute represents the trace ID, your model can prepend the custom attribute with Trace ID: in your post-processing function. This feature is currently supported in the Amazon Web Services SDKs but not in the Amazon SageMaker AI Python SDK.
9570
9611
  target_variant: Specify the production variant to send the inference request to when invoking an endpoint that is running two or more variants. Note that this parameter overrides the default behavior for the endpoint, which is to distribute the invocation traffic based on the variant weights. For information about how to use variant targeting to perform a/b testing, see Test models in production
9571
9612
  target_container_hostname: If the endpoint hosts multiple containers and is configured to use direct invocation, this parameter specifies the host name of the container to invoke.
9572
9613
  inference_id: An identifier that you assign to your request.
@@ -9591,7 +9632,7 @@ class Endpoint(Base):
9591
9632
  InternalFailure: An internal failure occurred. Try your request again. If the problem persists, contact Amazon Web Services customer support.
9592
9633
  InternalStreamFailure: The stream processing failed because of an unknown error, exception or failure. Try your request again.
9593
9634
  ModelError: Model (owned by the customer in the container) returned 4xx or 5xx error code.
9594
- ModelStreamError: An error occurred while streaming the response body. This error can have the following error codes: ModelInvocationTimeExceeded The model failed to finish sending the response within the timeout period allowed by Amazon SageMaker. StreamBroken The Transmission Control Protocol (TCP) connection between the client and the model was reset or closed.
9635
+ ModelStreamError: An error occurred while streaming the response body. This error can have the following error codes: ModelInvocationTimeExceeded The model failed to finish sending the response within the timeout period allowed by Amazon SageMaker AI. StreamBroken The Transmission Control Protocol (TCP) connection between the client and the model was reset or closed.
9595
9636
  ServiceUnavailable: The service is currently unavailable.
9596
9637
  ValidationError: There was an error validating your request.
9597
9638
  """
@@ -16916,7 +16957,7 @@ class LabelingJob(Base):
16916
16957
 
16917
16958
  Parameters:
16918
16959
  labeling_job_name: The name of the labeling job. This name is used to identify the job in a list of labeling jobs. Labeling job names must be unique within an Amazon Web Services account and region. LabelingJobName is not case sensitive. For example, Example-job and example-job are considered the same labeling job name by Ground Truth.
16919
- label_attribute_name: The attribute name to use for the label in the output manifest file. This is the key for the key/value pair formed with the label that a worker assigns to the object. The LabelAttributeName must meet the following requirements. The name can't end with "-metadata". If you are using one of the following built-in task types, the attribute name must end with "-ref". If the task type you are using is not listed below, the attribute name must not end with "-ref". Image semantic segmentation (SemanticSegmentation), and adjustment (AdjustmentSemanticSegmentation) and verification (VerificationSemanticSegmentation) labeling jobs for this task type. Video frame object detection (VideoObjectDetection), and adjustment and verification (AdjustmentVideoObjectDetection) labeling jobs for this task type. Video frame object tracking (VideoObjectTracking), and adjustment and verification (AdjustmentVideoObjectTracking) labeling jobs for this task type. 3D point cloud semantic segmentation (3DPointCloudSemanticSegmentation), and adjustment and verification (Adjustment3DPointCloudSemanticSegmentation) labeling jobs for this task type. 3D point cloud object tracking (3DPointCloudObjectTracking), and adjustment and verification (Adjustment3DPointCloudObjectTracking) labeling jobs for this task type. If you are creating an adjustment or verification labeling job, you must use a different LabelAttributeName than the one used in the original labeling job. The original labeling job is the Ground Truth labeling job that produced the labels that you want verified or adjusted. To learn more about adjustment and verification labeling jobs, see Verify and Adjust Labels.
16960
+ label_attribute_name: The attribute name to use for the label in the output manifest file. This is the key for the key/value pair formed with the label that a worker assigns to the object. The LabelAttributeName must meet the following requirements. The name can't end with "-metadata". If you are using one of the built-in task types or one of the following, the attribute name must end with "-ref". Image semantic segmentation (SemanticSegmentation) and adjustment (AdjustmentSemanticSegmentation) labeling jobs for this task type. One exception is that verification (VerificationSemanticSegmentation) must not end with -"ref". Video frame object detection (VideoObjectDetection), and adjustment and verification (AdjustmentVideoObjectDetection) labeling jobs for this task type. Video frame object tracking (VideoObjectTracking), and adjustment and verification (AdjustmentVideoObjectTracking) labeling jobs for this task type. 3D point cloud semantic segmentation (3DPointCloudSemanticSegmentation), and adjustment and verification (Adjustment3DPointCloudSemanticSegmentation) labeling jobs for this task type. 3D point cloud object tracking (3DPointCloudObjectTracking), and adjustment and verification (Adjustment3DPointCloudObjectTracking) labeling jobs for this task type. If you are creating an adjustment or verification labeling job, you must use a different LabelAttributeName than the one used in the original labeling job. The original labeling job is the Ground Truth labeling job that produced the labels that you want verified or adjusted. To learn more about adjustment and verification labeling jobs, see Verify and Adjust Labels.
16920
16961
  input_config: Input data for the labeling job, such as the Amazon S3 location of the data objects and the location of the manifest file that describes the data objects. You must specify at least one of the following: S3DataSource or SnsDataSource. Use SnsDataSource to specify an SNS input topic for a streaming labeling job. If you do not specify and SNS input topic ARN, Ground Truth will create a one-time labeling job that stops after all data objects in the input manifest file have been labeled. Use S3DataSource to specify an input manifest file for both streaming and one-time labeling jobs. Adding an S3DataSource is optional if you use SnsDataSource to create a streaming labeling job. If you use the Amazon Mechanical Turk workforce, your input data should not include confidential information, personal information or protected health information. Use ContentClassifiers to specify that your data is free of personally identifiable information and adult content.
16921
16962
  output_config: The location of the output data and the Amazon Web Services Key Management Service key ID for the key used to encrypt the output data, if any.
16922
16963
  role_arn: The Amazon Resource Number (ARN) that Amazon SageMaker assumes to perform tasks on your behalf during data labeling. You must grant this role the necessary permissions so that Amazon SageMaker can successfully complete data labeling.
@@ -22510,6 +22551,7 @@ class NotebookInstance(Base):
22510
22551
  failure_reason: If status is Failed, the reason it failed.
22511
22552
  url: The URL that you use to connect to the Jupyter notebook that is running in your notebook instance.
22512
22553
  instance_type: The type of ML compute instance running on the notebook instance.
22554
+ ip_address_type: The IP address type configured for the notebook instance. Returns ipv4 for IPv4-only connectivity or dualstack for both IPv4 and IPv6 connectivity.
22513
22555
  subnet_id: The ID of the VPC subnet.
22514
22556
  security_groups: The IDs of the VPC security groups.
22515
22557
  role_arn: The Amazon Resource Name (ARN) of the IAM role associated with the instance.
@@ -22535,6 +22577,7 @@ class NotebookInstance(Base):
22535
22577
  failure_reason: Optional[str] = Unassigned()
22536
22578
  url: Optional[str] = Unassigned()
22537
22579
  instance_type: Optional[str] = Unassigned()
22580
+ ip_address_type: Optional[str] = Unassigned()
22538
22581
  subnet_id: Optional[str] = Unassigned()
22539
22582
  security_groups: Optional[List[str]] = Unassigned()
22540
22583
  role_arn: Optional[str] = Unassigned()
@@ -22598,6 +22641,7 @@ class NotebookInstance(Base):
22598
22641
  role_arn: str,
22599
22642
  subnet_id: Optional[str] = Unassigned(),
22600
22643
  security_group_ids: Optional[List[str]] = Unassigned(),
22644
+ ip_address_type: Optional[str] = Unassigned(),
22601
22645
  kms_key_id: Optional[str] = Unassigned(),
22602
22646
  tags: Optional[List[shapes.Tag]] = Unassigned(),
22603
22647
  lifecycle_config_name: Optional[str] = Unassigned(),
@@ -22623,6 +22667,7 @@ class NotebookInstance(Base):
22623
22667
  role_arn: When you send any requests to Amazon Web Services resources from the notebook instance, SageMaker AI assumes this role to perform tasks on your behalf. You must grant this role necessary permissions so SageMaker AI can perform these tasks. The policy must allow the SageMaker AI service principal (sagemaker.amazonaws.com) permissions to assume this role. For more information, see SageMaker AI Roles. To be able to pass this role to SageMaker AI, the caller of this API must have the iam:PassRole permission.
22624
22668
  subnet_id: The ID of the subnet in a VPC to which you would like to have a connectivity from your ML compute instance.
22625
22669
  security_group_ids: The VPC security group IDs, in the form sg-xxxxxxxx. The security groups must be for the same VPC as specified in the subnet.
22670
+ ip_address_type: The IP address type for the notebook instance. Specify ipv4 for IPv4-only connectivity or dualstack for both IPv4 and IPv6 connectivity. When you specify dualstack, the subnet must support IPv6 CIDR blocks. If not specified, defaults to ipv4.
22626
22671
  kms_key_id: The Amazon Resource Name (ARN) of a Amazon Web Services Key Management Service key that SageMaker AI uses to encrypt data on the storage volume attached to your notebook instance. The KMS key you provide must be enabled. For information, see Enabling and Disabling Keys in the Amazon Web Services Key Management Service Developer Guide.
22627
22672
  tags: An array of key-value pairs. You can use tags to categorize your Amazon Web Services resources in different ways, for example, by purpose, owner, or environment. For more information, see Tagging Amazon Web Services Resources.
22628
22673
  lifecycle_config_name: The name of a lifecycle configuration to associate with the notebook instance. For information about lifestyle configurations, see Step 2.1: (Optional) Customize a Notebook Instance.
@@ -22632,7 +22677,7 @@ class NotebookInstance(Base):
22632
22677
  default_code_repository: A Git repository to associate with the notebook instance as its default code repository. This can be either the name of a Git repository stored as a resource in your account, or the URL of a Git repository in Amazon Web Services CodeCommit or in any other Git repository. When you open a notebook instance, it opens in the directory that contains this repository. For more information, see Associating Git Repositories with SageMaker AI Notebook Instances.
22633
22678
  additional_code_repositories: An array of up to three Git repositories to associate with the notebook instance. These can be either the names of Git repositories stored as resources in your account, or the URL of Git repositories in Amazon Web Services CodeCommit or in any other Git repository. These repositories are cloned at the same level as the default repository of your notebook instance. For more information, see Associating Git Repositories with SageMaker AI Notebook Instances.
22634
22679
  root_access: Whether root access is enabled or disabled for users of the notebook instance. The default value is Enabled. Lifecycle configurations need root access to be able to set up a notebook instance. Because of this, lifecycle configurations associated with a notebook instance always run with root access even if you disable root access for users.
22635
- platform_identifier: The platform identifier of the notebook instance runtime environment.
22680
+ platform_identifier: The platform identifier of the notebook instance runtime environment. The default value is notebook-al2-v2.
22636
22681
  instance_metadata_service_configuration: Information on the IMDS configuration of the notebook instance
22637
22682
  session: Boto3 session.
22638
22683
  region: Region name.
@@ -22666,6 +22711,7 @@ class NotebookInstance(Base):
22666
22711
  "InstanceType": instance_type,
22667
22712
  "SubnetId": subnet_id,
22668
22713
  "SecurityGroupIds": security_group_ids,
22714
+ "IpAddressType": ip_address_type,
22669
22715
  "RoleArn": role_arn,
22670
22716
  "KmsKeyId": kms_key_id,
22671
22717
  "Tags": tags,
@@ -22788,6 +22834,8 @@ class NotebookInstance(Base):
22788
22834
  def update(
22789
22835
  self,
22790
22836
  instance_type: Optional[str] = Unassigned(),
22837
+ ip_address_type: Optional[str] = Unassigned(),
22838
+ platform_identifier: Optional[str] = Unassigned(),
22791
22839
  role_arn: Optional[str] = Unassigned(),
22792
22840
  lifecycle_config_name: Optional[str] = Unassigned(),
22793
22841
  disassociate_lifecycle_config: Optional[bool] = Unassigned(),
@@ -22835,6 +22883,8 @@ class NotebookInstance(Base):
22835
22883
  operation_input_args = {
22836
22884
  "NotebookInstanceName": self.notebook_instance_name,
22837
22885
  "InstanceType": instance_type,
22886
+ "IpAddressType": ip_address_type,
22887
+ "PlatformIdentifier": platform_identifier,
22838
22888
  "RoleArn": role_arn,
22839
22889
  "LifecycleConfigName": lifecycle_config_name,
22840
22890
  "DisassociateLifecycleConfig": disassociate_lifecycle_config,
@@ -23949,7 +23999,7 @@ class PartnerApp(Base):
23949
23999
  arn: The ARN of the SageMaker Partner AI App that was described.
23950
24000
  name: The name of the SageMaker Partner AI App.
23951
24001
  type: The type of SageMaker Partner AI App. Must be one of the following: lakera-guard, comet, deepchecks-llm-evaluation, or fiddler.
23952
- status: The status of the SageMaker Partner AI App.
24002
+ status: The status of the SageMaker Partner AI App. Creating: SageMaker AI is creating the partner AI app. The partner AI app is not available during creation. Updating: SageMaker AI is updating the partner AI app. The partner AI app is not available when updating. Deleting: SageMaker AI is deleting the partner AI app. The partner AI app is not available during deletion. Available: The partner AI app is provisioned and accessible. Failed: The partner AI app is in a failed state and isn't available. SageMaker AI is investigating the issue. For further guidance, contact Amazon Web Services Support. UpdateFailed: The partner AI app couldn't be updated but is available. Deleted: The partner AI app is permanently deleted and not available.
23953
24003
  creation_time: The time that the SageMaker Partner AI App was created.
23954
24004
  last_modified_time: The time that the SageMaker Partner AI App was last modified.
23955
24005
  execution_role_arn: The ARN of the IAM role associated with the SageMaker Partner AI App.
@@ -26260,6 +26310,40 @@ class ProcessingJob(Base):
26260
26310
  transform(response, "DescribeProcessingJobResponse", self)
26261
26311
  return self
26262
26312
 
26313
+ @Base.add_validate_call
26314
+ def delete(
26315
+ self,
26316
+ ) -> None:
26317
+ """
26318
+ Delete a ProcessingJob resource
26319
+
26320
+ Raises:
26321
+ botocore.exceptions.ClientError: This exception is raised for AWS service related errors.
26322
+ The error message and error code can be parsed from the exception as follows:
26323
+ ```
26324
+ try:
26325
+ # AWS service call here
26326
+ except botocore.exceptions.ClientError as e:
26327
+ error_message = e.response['Error']['Message']
26328
+ error_code = e.response['Error']['Code']
26329
+ ```
26330
+ ResourceInUse: Resource being accessed is in use.
26331
+ ResourceNotFound: Resource being access is not found.
26332
+ """
26333
+
26334
+ client = Base.get_sagemaker_client()
26335
+
26336
+ operation_input_args = {
26337
+ "ProcessingJobName": self.processing_job_name,
26338
+ }
26339
+ # serialize the input request
26340
+ operation_input_args = serialize(operation_input_args)
26341
+ logger.debug(f"Serialized input request: {operation_input_args}")
26342
+
26343
+ client.delete_processing_job(**operation_input_args)
26344
+
26345
+ logger.info(f"Deleting {self.__class__.__name__} - {self.get_name()}")
26346
+
26263
26347
  @Base.add_validate_call
26264
26348
  def stop(self) -> None:
26265
26349
  """
@@ -28660,6 +28744,40 @@ class TrainingJob(Base):
28660
28744
 
28661
28745
  return self
28662
28746
 
28747
+ @Base.add_validate_call
28748
+ def delete(
28749
+ self,
28750
+ ) -> None:
28751
+ """
28752
+ Delete a TrainingJob resource
28753
+
28754
+ Raises:
28755
+ botocore.exceptions.ClientError: This exception is raised for AWS service related errors.
28756
+ The error message and error code can be parsed from the exception as follows:
28757
+ ```
28758
+ try:
28759
+ # AWS service call here
28760
+ except botocore.exceptions.ClientError as e:
28761
+ error_message = e.response['Error']['Message']
28762
+ error_code = e.response['Error']['Code']
28763
+ ```
28764
+ ResourceInUse: Resource being accessed is in use.
28765
+ ResourceNotFound: Resource being access is not found.
28766
+ """
28767
+
28768
+ client = Base.get_sagemaker_client()
28769
+
28770
+ operation_input_args = {
28771
+ "TrainingJobName": self.training_job_name,
28772
+ }
28773
+ # serialize the input request
28774
+ operation_input_args = serialize(operation_input_args)
28775
+ logger.debug(f"Serialized input request: {operation_input_args}")
28776
+
28777
+ client.delete_training_job(**operation_input_args)
28778
+
28779
+ logger.info(f"Deleting {self.__class__.__name__} - {self.get_name()}")
28780
+
28663
28781
  @Base.add_validate_call
28664
28782
  def stop(self) -> None:
28665
28783
  """
@@ -28774,6 +28892,69 @@ class TrainingJob(Base):
28774
28892
  raise TimeoutExceededError(resouce_type="TrainingJob", status=current_status)
28775
28893
  time.sleep(poll)
28776
28894
 
28895
+ @Base.add_validate_call
28896
+ def wait_for_delete(
28897
+ self,
28898
+ poll: int = 5,
28899
+ timeout: Optional[int] = None,
28900
+ ) -> None:
28901
+ """
28902
+ Wait for a TrainingJob resource to be deleted.
28903
+
28904
+ Parameters:
28905
+ poll: The number of seconds to wait between each poll.
28906
+ timeout: The maximum number of seconds to wait before timing out.
28907
+
28908
+ Raises:
28909
+ botocore.exceptions.ClientError: This exception is raised for AWS service related errors.
28910
+ The error message and error code can be parsed from the exception as follows:
28911
+ ```
28912
+ try:
28913
+ # AWS service call here
28914
+ except botocore.exceptions.ClientError as e:
28915
+ error_message = e.response['Error']['Message']
28916
+ error_code = e.response['Error']['Code']
28917
+ ```
28918
+ TimeoutExceededError: If the resource does not reach a terminal state before the timeout.
28919
+ DeleteFailedStatusError: If the resource reaches a failed state.
28920
+ WaiterError: Raised when an error occurs while waiting.
28921
+ """
28922
+ start_time = time.time()
28923
+
28924
+ progress = Progress(
28925
+ SpinnerColumn("bouncingBar"),
28926
+ TextColumn("{task.description}"),
28927
+ TimeElapsedColumn(),
28928
+ )
28929
+ progress.add_task("Waiting for TrainingJob to be deleted...")
28930
+ status = Status("Current status:")
28931
+
28932
+ with Live(
28933
+ Panel(
28934
+ Group(progress, status),
28935
+ title="Wait Log Panel",
28936
+ border_style=Style(color=Color.BLUE.value),
28937
+ )
28938
+ ):
28939
+ while True:
28940
+ try:
28941
+ self.refresh()
28942
+ current_status = self.training_job_status
28943
+ status.update(f"Current status: [bold]{current_status}")
28944
+
28945
+ if timeout is not None and time.time() - start_time >= timeout:
28946
+ raise TimeoutExceededError(
28947
+ resouce_type="TrainingJob", status=current_status
28948
+ )
28949
+ except botocore.exceptions.ClientError as e:
28950
+ error_code = e.response["Error"]["Code"]
28951
+
28952
+ if "ResourceNotFound" in error_code or "ValidationException" in error_code:
28953
+ logger.info("Resource was not found. It may have been deleted.")
28954
+ return
28955
+ raise e
28956
+ time.sleep(poll)
28957
+
28777
28958
  @classmethod
28778
28959
  @Base.add_validate_call
28779
28960
  def get_all(
@@ -28874,6 +29055,9 @@ class TrainingPlan(Base):
28874
29055
  total_instance_count: The total number of instances reserved in this training plan.
28875
29056
  available_instance_count: The number of instances currently available for use in this training plan.
28876
29057
  in_use_instance_count: The number of instances currently in use from this training plan.
29058
+ unhealthy_instance_count: The number of instances in the training plan that are currently in an unhealthy state.
29059
+ available_spare_instance_count: The number of available spare instances in the training plan.
29060
+ total_ultra_server_count: The total number of UltraServers reserved to this training plan.
28877
29061
  target_resources: The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod) that can use this training plan. Training plans are specific to their target resource. A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs. A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group.
28878
29062
  reserved_capacity_summaries: The list of Reserved Capacity providing the underlying compute resources of the plan.
28879
29063
 
@@ -28892,6 +29076,9 @@ class TrainingPlan(Base):
28892
29076
  total_instance_count: Optional[int] = Unassigned()
28893
29077
  available_instance_count: Optional[int] = Unassigned()
28894
29078
  in_use_instance_count: Optional[int] = Unassigned()
29079
+ unhealthy_instance_count: Optional[int] = Unassigned()
29080
+ available_spare_instance_count: Optional[int] = Unassigned()
29081
+ total_ultra_server_count: Optional[int] = Unassigned()
28895
29082
  target_resources: Optional[List[str]] = Unassigned()
28896
29083
  reserved_capacity_summaries: Optional[List[shapes.ReservedCapacitySummary]] = Unassigned()
28897
29084
 
@@ -28917,6 +29104,7 @@ class TrainingPlan(Base):
28917
29104
  cls,
28918
29105
  training_plan_name: str,
28919
29106
  training_plan_offering_id: str,
29107
+ spare_instance_count_per_ultra_server: Optional[int] = Unassigned(),
28920
29108
  tags: Optional[List[shapes.Tag]] = Unassigned(),
28921
29109
  session: Optional[Session] = None,
28922
29110
  region: Optional[str] = None,
@@ -28927,6 +29115,7 @@ class TrainingPlan(Base):
28927
29115
  Parameters:
28928
29116
  training_plan_name: The name of the training plan to create.
28929
29117
  training_plan_offering_id: The unique identifier of the training plan offering to use for creating this plan.
29118
+ spare_instance_count_per_ultra_server: Number of spare instances to reserve per UltraServer for enhanced resiliency. Default is 1.
28930
29119
  tags: An array of key-value pairs to apply to this training plan.
28931
29120
  session: Boto3 session.
28932
29121
  region: Region name.
@@ -28960,6 +29149,7 @@ class TrainingPlan(Base):
28960
29149
  operation_input_args = {
28961
29150
  "TrainingPlanName": training_plan_name,
28962
29151
  "TrainingPlanOfferingId": training_plan_offering_id,
29152
+ "SpareInstanceCountPerUltraServer": spare_instance_count_per_ultra_server,
28963
29153
  "Tags": tags,
28964
29154
  }
28965
29155