sagemaker-core 1.0.49__tar.gz → 1.0.50__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sagemaker-core might be problematic. Click here for more details.

Files changed (42) hide show
  1. {sagemaker_core-1.0.49/src/sagemaker_core.egg-info → sagemaker_core-1.0.50}/PKG-INFO +1 -1
  2. sagemaker_core-1.0.50/VERSION +1 -0
  3. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/code_injection/shape_dag.py +149 -0
  4. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/resources.py +10 -1
  5. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/shapes.py +129 -12
  6. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50/src/sagemaker_core.egg-info}/PKG-INFO +1 -1
  7. sagemaker_core-1.0.49/VERSION +0 -1
  8. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/LICENSE +0 -0
  9. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/README.rst +0 -0
  10. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/pyproject.toml +0 -0
  11. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/setup.cfg +0 -0
  12. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/__init__.py +0 -0
  13. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/_version.py +0 -0
  14. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/helper/__init__.py +0 -0
  15. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/helper/session_helper.py +0 -0
  16. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/__init__.py +0 -0
  17. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/code_injection/__init__.py +0 -0
  18. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/code_injection/base.py +0 -0
  19. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/code_injection/codec.py +0 -0
  20. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/code_injection/constants.py +0 -0
  21. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/config_schema.py +0 -0
  22. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/default_configs_helper.py +0 -0
  23. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/exceptions.py +0 -0
  24. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/logs.py +0 -0
  25. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/user_agent.py +0 -0
  26. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/main/utils.py +0 -0
  27. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/resources/__init__.py +0 -0
  28. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/shapes/__init__.py +0 -0
  29. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/tools/__init__.py +0 -0
  30. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/tools/codegen.py +0 -0
  31. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/tools/constants.py +0 -0
  32. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/tools/data_extractor.py +0 -0
  33. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/tools/method.py +0 -0
  34. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/tools/resources_codegen.py +0 -0
  35. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/tools/resources_extractor.py +0 -0
  36. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/tools/shapes_codegen.py +0 -0
  37. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/tools/shapes_extractor.py +0 -0
  38. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core/tools/templates.py +0 -0
  39. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core.egg-info/SOURCES.txt +0 -0
  40. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core.egg-info/dependency_links.txt +0 -0
  41. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core.egg-info/requires.txt +0 -0
  42. {sagemaker_core-1.0.49 → sagemaker_core-1.0.50}/src/sagemaker_core.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sagemaker-core
3
- Version: 1.0.49
3
+ Version: 1.0.50
4
4
  Summary: An python package for sagemaker core functionalities
5
5
  Author-email: AWS <sagemaker-interests@amazon.com>
6
6
  Project-URL: Repository, https://github.com/aws/sagemaker-core.git
@@ -0,0 +1 @@
1
+ 1.0.50
@@ -1623,6 +1623,7 @@ SHAPE_DAG = {
1623
1623
  {"name": "Placement", "shape": "ClusterInstancePlacement", "type": "structure"},
1624
1624
  {"name": "CurrentImageId", "shape": "ImageId", "type": "string"},
1625
1625
  {"name": "DesiredImageId", "shape": "ImageId", "type": "string"},
1626
+ {"name": "UltraServerInfo", "shape": "UltraServerInfo", "type": "structure"},
1626
1627
  ],
1627
1628
  "type": "structure",
1628
1629
  },
@@ -1650,6 +1651,7 @@ SHAPE_DAG = {
1650
1651
  "shape": "ClusterInstanceStatusDetails",
1651
1652
  "type": "structure",
1652
1653
  },
1654
+ {"name": "UltraServerInfo", "shape": "UltraServerInfo", "type": "structure"},
1653
1655
  ],
1654
1656
  "type": "structure",
1655
1657
  },
@@ -3438,6 +3440,11 @@ SHAPE_DAG = {
3438
3440
  "members": [
3439
3441
  {"name": "TrainingPlanName", "shape": "TrainingPlanName", "type": "string"},
3440
3442
  {"name": "TrainingPlanOfferingId", "shape": "TrainingPlanOfferingId", "type": "string"},
3443
+ {
3444
+ "name": "SpareInstanceCountPerUltraServer",
3445
+ "shape": "SpareInstanceCountPerUltraServer",
3446
+ "type": "integer",
3447
+ },
3441
3448
  {"name": "Tags", "shape": "TagList", "type": "list"},
3442
3449
  ],
3443
3450
  "type": "structure",
@@ -6112,6 +6119,34 @@ SHAPE_DAG = {
6112
6119
  ],
6113
6120
  "type": "structure",
6114
6121
  },
6122
+ "DescribeReservedCapacityRequest": {
6123
+ "members": [
6124
+ {"name": "ReservedCapacityArn", "shape": "ReservedCapacityArn", "type": "string"}
6125
+ ],
6126
+ "type": "structure",
6127
+ },
6128
+ "DescribeReservedCapacityResponse": {
6129
+ "members": [
6130
+ {"name": "ReservedCapacityArn", "shape": "ReservedCapacityArn", "type": "string"},
6131
+ {"name": "ReservedCapacityType", "shape": "ReservedCapacityType", "type": "string"},
6132
+ {"name": "Status", "shape": "ReservedCapacityStatus", "type": "string"},
6133
+ {"name": "AvailabilityZone", "shape": "AvailabilityZone", "type": "string"},
6134
+ {"name": "DurationHours", "shape": "ReservedCapacityDurationHours", "type": "long"},
6135
+ {"name": "DurationMinutes", "shape": "ReservedCapacityDurationMinutes", "type": "long"},
6136
+ {"name": "StartTime", "shape": "Timestamp", "type": "timestamp"},
6137
+ {"name": "EndTime", "shape": "Timestamp", "type": "timestamp"},
6138
+ {"name": "InstanceType", "shape": "ReservedCapacityInstanceType", "type": "string"},
6139
+ {"name": "TotalInstanceCount", "shape": "TotalInstanceCount", "type": "integer"},
6140
+ {
6141
+ "name": "AvailableInstanceCount",
6142
+ "shape": "AvailableInstanceCount",
6143
+ "type": "integer",
6144
+ },
6145
+ {"name": "InUseInstanceCount", "shape": "InUseInstanceCount", "type": "integer"},
6146
+ {"name": "UltraServerSummary", "shape": "UltraServerSummary", "type": "structure"},
6147
+ ],
6148
+ "type": "structure",
6149
+ },
6115
6150
  "DescribeSpaceRequest": {
6116
6151
  "members": [
6117
6152
  {"name": "DomainId", "shape": "DomainId", "type": "string"},
@@ -6287,6 +6322,17 @@ SHAPE_DAG = {
6287
6322
  "type": "integer",
6288
6323
  },
6289
6324
  {"name": "InUseInstanceCount", "shape": "InUseInstanceCount", "type": "integer"},
6325
+ {
6326
+ "name": "UnhealthyInstanceCount",
6327
+ "shape": "UnhealthyInstanceCount",
6328
+ "type": "integer",
6329
+ },
6330
+ {
6331
+ "name": "AvailableSpareInstanceCount",
6332
+ "shape": "AvailableSpareInstanceCount",
6333
+ "type": "integer",
6334
+ },
6335
+ {"name": "TotalUltraServerCount", "shape": "UltraServerCount", "type": "integer"},
6290
6336
  {"name": "TargetResources", "shape": "SageMakerResourceNames", "type": "list"},
6291
6337
  {
6292
6338
  "name": "ReservedCapacitySummaries",
@@ -8589,6 +8635,13 @@ SHAPE_DAG = {
8589
8635
  ],
8590
8636
  "type": "structure",
8591
8637
  },
8638
+ "InstancePlacementConfig": {
8639
+ "members": [
8640
+ {"name": "EnableMultipleJobs", "shape": "Boolean", "type": "boolean"},
8641
+ {"name": "PlacementSpecifications", "shape": "PlacementSpecifications", "type": "list"},
8642
+ ],
8643
+ "type": "structure",
8644
+ },
8592
8645
  "IntegerParameterRange": {
8593
8646
  "members": [
8594
8647
  {"name": "Name", "shape": "ParameterKey", "type": "string"},
@@ -10756,6 +10809,21 @@ SHAPE_DAG = {
10756
10809
  ],
10757
10810
  "type": "structure",
10758
10811
  },
10812
+ "ListUltraServersByReservedCapacityRequest": {
10813
+ "members": [
10814
+ {"name": "ReservedCapacityArn", "shape": "ReservedCapacityArn", "type": "string"},
10815
+ {"name": "MaxResults", "shape": "MaxResults", "type": "integer"},
10816
+ {"name": "NextToken", "shape": "NextToken", "type": "string"},
10817
+ ],
10818
+ "type": "structure",
10819
+ },
10820
+ "ListUltraServersByReservedCapacityResponse": {
10821
+ "members": [
10822
+ {"name": "NextToken", "shape": "NextToken", "type": "string"},
10823
+ {"name": "UltraServers", "shape": "UltraServers", "type": "list"},
10824
+ ],
10825
+ "type": "structure",
10826
+ },
10759
10827
  "ListUserProfilesRequest": {
10760
10828
  "members": [
10761
10829
  {"name": "NextToken", "shape": "NextToken", "type": "string"},
@@ -12800,6 +12868,18 @@ SHAPE_DAG = {
12800
12868
  "member_type": "structure",
12801
12869
  "type": "list",
12802
12870
  },
12871
+ "PlacementSpecification": {
12872
+ "members": [
12873
+ {"name": "UltraServerId", "shape": "String256", "type": "string"},
12874
+ {"name": "InstanceCount", "shape": "TrainingInstanceCount", "type": "integer"},
12875
+ ],
12876
+ "type": "structure",
12877
+ },
12878
+ "PlacementSpecifications": {
12879
+ "member_shape": "PlacementSpecification",
12880
+ "member_type": "structure",
12881
+ "type": "list",
12882
+ },
12803
12883
  "PredefinedMetricSpecification": {
12804
12884
  "members": [{"name": "PredefinedMetricType", "shape": "String", "type": "string"}],
12805
12885
  "type": "structure",
@@ -13737,6 +13817,9 @@ SHAPE_DAG = {
13737
13817
  },
13738
13818
  "ReservedCapacityOffering": {
13739
13819
  "members": [
13820
+ {"name": "ReservedCapacityType", "shape": "ReservedCapacityType", "type": "string"},
13821
+ {"name": "UltraServerType", "shape": "UltraServerType", "type": "string"},
13822
+ {"name": "UltraServerCount", "shape": "UltraServerCount", "type": "integer"},
13740
13823
  {"name": "InstanceType", "shape": "ReservedCapacityInstanceType", "type": "string"},
13741
13824
  {"name": "InstanceCount", "shape": "ReservedCapacityInstanceCount", "type": "integer"},
13742
13825
  {"name": "AvailabilityZone", "shape": "AvailabilityZone", "type": "string"},
@@ -13760,6 +13843,9 @@ SHAPE_DAG = {
13760
13843
  "ReservedCapacitySummary": {
13761
13844
  "members": [
13762
13845
  {"name": "ReservedCapacityArn", "shape": "ReservedCapacityArn", "type": "string"},
13846
+ {"name": "ReservedCapacityType", "shape": "ReservedCapacityType", "type": "string"},
13847
+ {"name": "UltraServerType", "shape": "UltraServerType", "type": "string"},
13848
+ {"name": "UltraServerCount", "shape": "UltraServerCount", "type": "integer"},
13763
13849
  {"name": "InstanceType", "shape": "ReservedCapacityInstanceType", "type": "string"},
13764
13850
  {"name": "TotalInstanceCount", "shape": "TotalInstanceCount", "type": "integer"},
13765
13851
  {"name": "Status", "shape": "ReservedCapacityStatus", "type": "string"},
@@ -13810,6 +13896,11 @@ SHAPE_DAG = {
13810
13896
  },
13811
13897
  {"name": "InstanceGroups", "shape": "InstanceGroups", "type": "list"},
13812
13898
  {"name": "TrainingPlanArn", "shape": "TrainingPlanArn", "type": "string"},
13899
+ {
13900
+ "name": "InstancePlacementConfig",
13901
+ "shape": "InstancePlacementConfig",
13902
+ "type": "structure",
13903
+ },
13813
13904
  ],
13814
13905
  "type": "structure",
13815
13906
  },
@@ -14132,6 +14223,8 @@ SHAPE_DAG = {
14132
14223
  "members": [
14133
14224
  {"name": "InstanceType", "shape": "ReservedCapacityInstanceType", "type": "string"},
14134
14225
  {"name": "InstanceCount", "shape": "ReservedCapacityInstanceCount", "type": "integer"},
14226
+ {"name": "UltraServerType", "shape": "UltraServerType", "type": "string"},
14227
+ {"name": "UltraServerCount", "shape": "UltraServerCount", "type": "integer"},
14135
14228
  {"name": "StartTimeAfter", "shape": "Timestamp", "type": "timestamp"},
14136
14229
  {"name": "EndTimeBefore", "shape": "Timestamp", "type": "timestamp"},
14137
14230
  {"name": "DurationHours", "shape": "TrainingPlanDurationHoursInput", "type": "long"},
@@ -15153,6 +15246,7 @@ SHAPE_DAG = {
15153
15246
  "type": "integer",
15154
15247
  },
15155
15248
  {"name": "InUseInstanceCount", "shape": "InUseInstanceCount", "type": "integer"},
15249
+ {"name": "TotalUltraServerCount", "shape": "UltraServerCount", "type": "integer"},
15156
15250
  {"name": "TargetResources", "shape": "SageMakerResourceNames", "type": "list"},
15157
15251
  {
15158
15252
  "name": "ReservedCapacitySummaries",
@@ -15552,6 +15646,61 @@ SHAPE_DAG = {
15552
15646
  ],
15553
15647
  "type": "structure",
15554
15648
  },
15649
+ "UltraServer": {
15650
+ "members": [
15651
+ {"name": "UltraServerId", "shape": "NonEmptyString256", "type": "string"},
15652
+ {"name": "UltraServerType", "shape": "UltraServerType", "type": "string"},
15653
+ {"name": "AvailabilityZone", "shape": "AvailabilityZone", "type": "string"},
15654
+ {"name": "InstanceType", "shape": "ReservedCapacityInstanceType", "type": "string"},
15655
+ {"name": "TotalInstanceCount", "shape": "TotalInstanceCount", "type": "integer"},
15656
+ {
15657
+ "name": "ConfiguredSpareInstanceCount",
15658
+ "shape": "ConfiguredSpareInstanceCount",
15659
+ "type": "integer",
15660
+ },
15661
+ {
15662
+ "name": "AvailableInstanceCount",
15663
+ "shape": "AvailableInstanceCount",
15664
+ "type": "integer",
15665
+ },
15666
+ {"name": "InUseInstanceCount", "shape": "InUseInstanceCount", "type": "integer"},
15667
+ {
15668
+ "name": "AvailableSpareInstanceCount",
15669
+ "shape": "AvailableSpareInstanceCount",
15670
+ "type": "integer",
15671
+ },
15672
+ {
15673
+ "name": "UnhealthyInstanceCount",
15674
+ "shape": "UnhealthyInstanceCount",
15675
+ "type": "integer",
15676
+ },
15677
+ {"name": "HealthStatus", "shape": "UltraServerHealthStatus", "type": "string"},
15678
+ ],
15679
+ "type": "structure",
15680
+ },
15681
+ "UltraServerInfo": {
15682
+ "members": [{"name": "Id", "shape": "String", "type": "string"}],
15683
+ "type": "structure",
15684
+ },
15685
+ "UltraServerSummary": {
15686
+ "members": [
15687
+ {"name": "UltraServerType", "shape": "UltraServerType", "type": "string"},
15688
+ {"name": "InstanceType", "shape": "ReservedCapacityInstanceType", "type": "string"},
15689
+ {"name": "UltraServerCount", "shape": "UltraServerCount", "type": "integer"},
15690
+ {
15691
+ "name": "AvailableSpareInstanceCount",
15692
+ "shape": "AvailableSpareInstanceCount",
15693
+ "type": "integer",
15694
+ },
15695
+ {
15696
+ "name": "UnhealthyInstanceCount",
15697
+ "shape": "UnhealthyInstanceCount",
15698
+ "type": "integer",
15699
+ },
15700
+ ],
15701
+ "type": "structure",
15702
+ },
15703
+ "UltraServers": {"member_shape": "UltraServer", "member_type": "structure", "type": "list"},
15555
15704
  "UnifiedStudioSettings": {
15556
15705
  "members": [
15557
15706
  {"name": "StudioWebPortalAccess", "shape": "FeatureStatus", "type": "string"},
@@ -3302,7 +3302,7 @@ class Cluster(Base):
3302
3302
  restricted_instance_groups: The specialized instance groups for training models like Amazon Nova to be created in the SageMaker HyperPod cluster.
3303
3303
  vpc_config: Specifies the Amazon Virtual Private Cloud (VPC) that is associated with the Amazon SageMaker HyperPod cluster. You can control access to and from your resources by configuring your VPC. For more information, see Give SageMaker access to resources in your Amazon VPC. When your Amazon VPC and subnets support IPv6, network communications differ based on the cluster orchestration platform: Slurm-orchestrated clusters automatically configure nodes with dual IPv6 and IPv4 addresses, allowing immediate IPv6 network communications. In Amazon EKS-orchestrated clusters, nodes receive dual-stack addressing, but pods can only use IPv6 when the Amazon EKS cluster is explicitly IPv6-enabled. For information about deploying an IPv6 Amazon EKS cluster, see Amazon EKS IPv6 Cluster Deployment. Additional resources for IPv6 configuration: For information about adding IPv6 support to your VPC, see to IPv6 Support for VPC. For information about creating a new IPv6-compatible VPC, see Amazon VPC Creation Guide. To configure SageMaker HyperPod with a custom Amazon VPC, see Custom Amazon VPC Setup for SageMaker HyperPod.
3304
3304
  tags: Custom tags for managing the SageMaker HyperPod cluster as an Amazon Web Services resource. You can add tags to your cluster in the same way you add them in other Amazon Web Services services that support tagging. To learn more about tagging Amazon Web Services resources in general, see Tagging Amazon Web Services Resources User Guide.
3305
- orchestrator: The type of orchestrator to use for the SageMaker HyperPod cluster. Currently, the only supported value is "eks", which is to use an Amazon Elastic Kubernetes Service (EKS) cluster as the orchestrator.
3305
+ orchestrator: The type of orchestrator to use for the SageMaker HyperPod cluster. Currently, the only supported value is "eks", which is to use an Amazon Elastic Kubernetes Service cluster as the orchestrator.
3306
3306
  node_recovery: The node recovery mode for the SageMaker HyperPod cluster. When set to Automatic, SageMaker HyperPod will automatically reboot or replace faulty nodes when issues are detected. When set to None, cluster administrators will need to manually manage any faulty cluster instances.
3307
3307
  node_provisioning_mode: The mode for provisioning nodes in the cluster. You can specify the following modes: Continuous: Scaling behavior that enables 1) concurrent operation execution within instance groups, 2) continuous retry mechanisms for failed operations, 3) enhanced customer visibility into cluster events through detailed event streams, 4) partial provisioning capabilities. Your clusters and instance groups remain InService while scaling. This mode is only supported for EKS orchestrated clusters.
3308
3308
  session: Boto3 session.
@@ -28891,6 +28891,9 @@ class TrainingPlan(Base):
28891
28891
  total_instance_count: The total number of instances reserved in this training plan.
28892
28892
  available_instance_count: The number of instances currently available for use in this training plan.
28893
28893
  in_use_instance_count: The number of instances currently in use from this training plan.
28894
+ unhealthy_instance_count: The number of instances in the training plan that are currently in an unhealthy state.
28895
+ available_spare_instance_count: The number of available spare instances in the training plan.
28896
+ total_ultra_server_count: The total number of UltraServers reserved to this training plan.
28894
28897
  target_resources: The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod) that can use this training plan. Training plans are specific to their target resource. A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs. A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group.
28895
28898
  reserved_capacity_summaries: The list of Reserved Capacity providing the underlying compute resources of the plan.
28896
28899
 
@@ -28909,6 +28912,9 @@ class TrainingPlan(Base):
28909
28912
  total_instance_count: Optional[int] = Unassigned()
28910
28913
  available_instance_count: Optional[int] = Unassigned()
28911
28914
  in_use_instance_count: Optional[int] = Unassigned()
28915
+ unhealthy_instance_count: Optional[int] = Unassigned()
28916
+ available_spare_instance_count: Optional[int] = Unassigned()
28917
+ total_ultra_server_count: Optional[int] = Unassigned()
28912
28918
  target_resources: Optional[List[str]] = Unassigned()
28913
28919
  reserved_capacity_summaries: Optional[List[shapes.ReservedCapacitySummary]] = Unassigned()
28914
28920
 
@@ -28934,6 +28940,7 @@ class TrainingPlan(Base):
28934
28940
  cls,
28935
28941
  training_plan_name: str,
28936
28942
  training_plan_offering_id: str,
28943
+ spare_instance_count_per_ultra_server: Optional[int] = Unassigned(),
28937
28944
  tags: Optional[List[shapes.Tag]] = Unassigned(),
28938
28945
  session: Optional[Session] = None,
28939
28946
  region: Optional[str] = None,
@@ -28944,6 +28951,7 @@ class TrainingPlan(Base):
28944
28951
  Parameters:
28945
28952
  training_plan_name: The name of the training plan to create.
28946
28953
  training_plan_offering_id: The unique identifier of the training plan offering to use for creating this plan.
28954
+ spare_instance_count_per_ultra_server: Number of spare instances to reserve per UltraServer for enhanced resiliency. Default is 1.
28947
28955
  tags: An array of key-value pairs to apply to this training plan.
28948
28956
  session: Boto3 session.
28949
28957
  region: Region name.
@@ -28977,6 +28985,7 @@ class TrainingPlan(Base):
28977
28985
  operation_input_args = {
28978
28986
  "TrainingPlanName": training_plan_name,
28979
28987
  "TrainingPlanOfferingId": training_plan_offering_id,
28988
+ "SpareInstanceCountPerUltraServer": spare_instance_count_per_ultra_server,
28980
28989
  "Tags": tags,
28981
28990
  }
28982
28991
 
@@ -1020,6 +1020,36 @@ class InstanceGroup(Base):
1020
1020
  instance_group_name: str
1021
1021
 
1022
1022
 
1023
+ class PlacementSpecification(Base):
1024
+ """
1025
+ PlacementSpecification
1026
+ Specifies how instances should be placed on a specific UltraServer.
1027
+
1028
+ Attributes
1029
+ ----------------------
1030
+ ultra_server_id: The unique identifier of the UltraServer where instances should be placed.
1031
+ instance_count: The number of ML compute instances required to be placed together on the same UltraServer. Minimum value of 1.
1032
+ """
1033
+
1034
+ instance_count: int
1035
+ ultra_server_id: Optional[str] = Unassigned()
1036
+
1037
+
1038
+ class InstancePlacementConfig(Base):
1039
+ """
1040
+ InstancePlacementConfig
1041
+ Configuration for how instances are placed and allocated within UltraServers. This is only applicable for UltraServer capacity.
1042
+
1043
+ Attributes
1044
+ ----------------------
1045
+ enable_multiple_jobs: If set to true, allows multiple jobs to share the same UltraServer instances. If set to false, ensures this job's instances are placed on an UltraServer exclusively, with no other jobs sharing the same UltraServer. Default is false.
1046
+ placement_specifications: A list of specifications for how instances should be placed on specific UltraServers. Maximum of 10 items is supported.
1047
+ """
1048
+
1049
+ enable_multiple_jobs: Optional[bool] = Unassigned()
1050
+ placement_specifications: Optional[List[PlacementSpecification]] = Unassigned()
1051
+
1052
+
1023
1053
  class ResourceConfig(Base):
1024
1054
  """
1025
1055
  ResourceConfig
@@ -1034,6 +1064,7 @@ class ResourceConfig(Base):
1034
1064
  keep_alive_period_in_seconds: The duration of time in seconds to retain configured resources in a warm pool for subsequent training jobs.
1035
1065
  instance_groups: The configuration of a heterogeneous cluster in JSON format.
1036
1066
  training_plan_arn: The Amazon Resource Name (ARN); of the training plan to use for this resource configuration.
1067
+ instance_placement_config: Configuration for how training job instances are placed and allocated within UltraServers. Only applicable for UltraServer capacity.
1037
1068
  """
1038
1069
 
1039
1070
  volume_size_in_gb: int
@@ -1043,6 +1074,7 @@ class ResourceConfig(Base):
1043
1074
  keep_alive_period_in_seconds: Optional[int] = Unassigned()
1044
1075
  instance_groups: Optional[List[InstanceGroup]] = Unassigned()
1045
1076
  training_plan_arn: Optional[str] = Unassigned()
1077
+ instance_placement_config: Optional[InstancePlacementConfig] = Unassigned()
1046
1078
 
1047
1079
 
1048
1080
  class StoppingCondition(Base):
@@ -3390,13 +3422,13 @@ class ClusterEbsVolumeConfig(Base):
3390
3422
  class ClusterMetadata(Base):
3391
3423
  """
3392
3424
  ClusterMetadata
3393
- Metadata information about a SageMaker HyperPod cluster showing information about the cluster level operations, such as creating, updating, and deleting.
3425
+ Metadata information about a HyperPod cluster showing information about the cluster level operations, such as creating, updating, and deleting.
3394
3426
 
3395
3427
  Attributes
3396
3428
  ----------------------
3397
3429
  failure_message: An error message describing why the cluster level operation (such as creating, updating, or deleting) failed.
3398
- eks_role_access_entries: A list of Amazon EKS IAM role ARNs associated with the cluster. This is created by SageMaker HyperPod on your behalf and only applies for EKS-orchestrated clusters.
3399
- slr_access_entry: The Service-Linked Role (SLR) associated with the cluster. This is created by SageMaker HyperPod on your behalf and only applies for EKS-orchestrated clusters.
3430
+ eks_role_access_entries: A list of Amazon EKS IAM role ARNs associated with the cluster. This is created by HyperPod on your behalf and only applies for EKS orchestrated clusters.
3431
+ slr_access_entry: The Service-Linked Role (SLR) associated with the cluster. This is created by HyperPod on your behalf and only applies for EKS orchestrated clusters.
3400
3432
  """
3401
3433
 
3402
3434
  failure_message: Optional[str] = Unassigned()
@@ -3407,7 +3439,7 @@ class ClusterMetadata(Base):
3407
3439
  class InstanceGroupMetadata(Base):
3408
3440
  """
3409
3441
  InstanceGroupMetadata
3410
- Metadata information about an instance group in a SageMaker HyperPod cluster.
3442
+ Metadata information about an instance group in a HyperPod cluster.
3411
3443
 
3412
3444
  Attributes
3413
3445
  ----------------------
@@ -3507,11 +3539,11 @@ class ClusterEventDetail(Base):
3507
3539
  Attributes
3508
3540
  ----------------------
3509
3541
  event_id: The unique identifier (UUID) of the event.
3510
- cluster_arn: The Amazon Resource Name (ARN) of the SageMaker HyperPod cluster associated with the event.
3511
- cluster_name: The name of the SageMaker HyperPod cluster associated with the event.
3542
+ cluster_arn: The Amazon Resource Name (ARN) of the HyperPod cluster associated with the event.
3543
+ cluster_name: The name of the HyperPod cluster associated with the event.
3512
3544
  instance_group_name: The name of the instance group associated with the event, if applicable.
3513
3545
  instance_id: The EC2 instance ID associated with the event, if applicable.
3514
- resource_type: The type of resource associated with the event. Valid values are "Cluster", "InstanceGroup", or "Instance".
3546
+ resource_type: The type of resource associated with the event. Valid values are Cluster, InstanceGroup, or Instance.
3515
3547
  event_time: The timestamp when the event occurred.
3516
3548
  event_details: Additional details about the event, including event-specific metadata.
3517
3549
  description: A human-readable description of the event.
@@ -3531,16 +3563,16 @@ class ClusterEventDetail(Base):
3531
3563
  class ClusterEventSummary(Base):
3532
3564
  """
3533
3565
  ClusterEventSummary
3534
- A summary of an event in a SageMaker HyperPod cluster.
3566
+ A summary of an event in a HyperPod cluster.
3535
3567
 
3536
3568
  Attributes
3537
3569
  ----------------------
3538
3570
  event_id: The unique identifier (UUID) of the event.
3539
- cluster_arn: The Amazon Resource Name (ARN) of the SageMaker HyperPod cluster associated with the event.
3540
- cluster_name: The name of the SageMaker HyperPod cluster associated with the event.
3571
+ cluster_arn: The Amazon Resource Name (ARN) of the HyperPod cluster associated with the event.
3572
+ cluster_name: The name of the HyperPod cluster associated with the event.
3541
3573
  instance_group_name: The name of the instance group associated with the event, if applicable.
3542
- instance_id: The EC2 instance ID associated with the event, if applicable.
3543
- resource_type: The type of resource associated with the event. Valid values are "Cluster", "InstanceGroup", or "Instance".
3574
+ instance_id: The Amazon Elastic Compute Cloud (EC2) instance ID associated with the event, if applicable.
3575
+ resource_type: The type of resource associated with the event. Valid values are Cluster, InstanceGroup, or Instance.
3544
3576
  event_time: The timestamp when the event occurred.
3545
3577
  description: A brief, human-readable description of the event.
3546
3578
  """
@@ -3738,6 +3770,19 @@ class ClusterInstanceStatusDetails(Base):
3738
3770
  message: Optional[str] = Unassigned()
3739
3771
 
3740
3772
 
3773
+ class UltraServerInfo(Base):
3774
+ """
3775
+ UltraServerInfo
3776
+ Contains information about the UltraServer object.
3777
+
3778
+ Attributes
3779
+ ----------------------
3780
+ id: The unique identifier of the UltraServer.
3781
+ """
3782
+
3783
+ id: Optional[str] = Unassigned()
3784
+
3785
+
3741
3786
  class ClusterNodeDetails(Base):
3742
3787
  """
3743
3788
  ClusterNodeDetails
@@ -3762,6 +3807,7 @@ class ClusterNodeDetails(Base):
3762
3807
  placement: The placement details of the SageMaker HyperPod cluster node.
3763
3808
  current_image_id: The ID of the Amazon Machine Image (AMI) currently in use by the node.
3764
3809
  desired_image_id: The ID of the Amazon Machine Image (AMI) desired for the node.
3810
+ ultra_server_info: Contains information about the UltraServer.
3765
3811
  """
3766
3812
 
3767
3813
  instance_group_name: Optional[str] = Unassigned()
@@ -3781,6 +3827,7 @@ class ClusterNodeDetails(Base):
3781
3827
  placement: Optional[ClusterInstancePlacement] = Unassigned()
3782
3828
  current_image_id: Optional[str] = Unassigned()
3783
3829
  desired_image_id: Optional[str] = Unassigned()
3830
+ ultra_server_info: Optional[UltraServerInfo] = Unassigned()
3784
3831
 
3785
3832
 
3786
3833
  class ClusterNodeSummary(Base):
@@ -3797,6 +3844,7 @@ class ClusterNodeSummary(Base):
3797
3844
  launch_time: The time when the instance is launched.
3798
3845
  last_software_update_time: The time when SageMaker last updated the software of the instances in the cluster.
3799
3846
  instance_status: The status of the instance.
3847
+ ultra_server_info: Contains information about the UltraServer.
3800
3848
  """
3801
3849
 
3802
3850
  instance_group_name: str
@@ -3806,6 +3854,7 @@ class ClusterNodeSummary(Base):
3806
3854
  instance_status: ClusterInstanceStatusDetails
3807
3855
  node_logical_id: Optional[str] = Unassigned()
3808
3856
  last_software_update_time: Optional[datetime.datetime] = Unassigned()
3857
+ ultra_server_info: Optional[UltraServerInfo] = Unassigned()
3809
3858
 
3810
3859
 
3811
3860
  class ClusterOrchestratorEksConfig(Base):
@@ -9638,6 +9687,27 @@ class TemplateProviderDetail(Base):
9638
9687
  cfn_template_provider_detail: Optional[CfnTemplateProviderDetail] = Unassigned()
9639
9688
 
9640
9689
 
9690
+ class UltraServerSummary(Base):
9691
+ """
9692
+ UltraServerSummary
9693
+ A summary of UltraServer resources and their current status.
9694
+
9695
+ Attributes
9696
+ ----------------------
9697
+ ultra_server_type: The type of UltraServer, such as ml.u-p6e-gb200x72.
9698
+ instance_type: The Amazon EC2 instance type used in the UltraServer.
9699
+ ultra_server_count: The number of UltraServers of this type.
9700
+ available_spare_instance_count: The number of available spare instances in the UltraServers.
9701
+ unhealthy_instance_count: The total number of instances across all UltraServers of this type that are currently in an unhealthy state.
9702
+ """
9703
+
9704
+ ultra_server_type: str
9705
+ instance_type: str
9706
+ ultra_server_count: Optional[int] = Unassigned()
9707
+ available_spare_instance_count: Optional[int] = Unassigned()
9708
+ unhealthy_instance_count: Optional[int] = Unassigned()
9709
+
9710
+
9641
9711
  class SubscribedWorkteam(Base):
9642
9712
  """
9643
9713
  SubscribedWorkteam
@@ -9741,6 +9811,9 @@ class ReservedCapacitySummary(Base):
9741
9811
  Attributes
9742
9812
  ----------------------
9743
9813
  reserved_capacity_arn: The Amazon Resource Name (ARN); of the reserved capacity.
9814
+ reserved_capacity_type: The type of reserved capacity.
9815
+ ultra_server_type: The type of UltraServer included in this reserved capacity, such as ml.u-p6e-gb200x72.
9816
+ ultra_server_count: The number of UltraServers included in this reserved capacity.
9744
9817
  instance_type: The instance type for the reserved capacity.
9745
9818
  total_instance_count: The total number of instances in the reserved capacity.
9746
9819
  status: The current status of the reserved capacity.
@@ -9755,6 +9828,9 @@ class ReservedCapacitySummary(Base):
9755
9828
  instance_type: str
9756
9829
  total_instance_count: int
9757
9830
  status: str
9831
+ reserved_capacity_type: Optional[str] = Unassigned()
9832
+ ultra_server_type: Optional[str] = Unassigned()
9833
+ ultra_server_count: Optional[int] = Unassigned()
9758
9834
  availability_zone: Optional[str] = Unassigned()
9759
9835
  duration_hours: Optional[int] = Unassigned()
9760
9836
  duration_minutes: Optional[int] = Unassigned()
@@ -12207,6 +12283,7 @@ class TrainingPlanSummary(Base):
12207
12283
  total_instance_count: The total number of instances reserved in this training plan.
12208
12284
  available_instance_count: The number of instances currently available for use in this training plan.
12209
12285
  in_use_instance_count: The number of instances currently in use from this training plan.
12286
+ total_ultra_server_count: The total number of UltraServers allocated to this training plan.
12210
12287
  target_resources: The target resources (e.g., training jobs, HyperPod clusters) that can use this training plan. Training plans are specific to their target resource. A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs. A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group.
12211
12288
  reserved_capacity_summaries: A list of reserved capacities associated with this training plan, including details such as instance types, counts, and availability zones.
12212
12289
  """
@@ -12224,6 +12301,7 @@ class TrainingPlanSummary(Base):
12224
12301
  total_instance_count: Optional[int] = Unassigned()
12225
12302
  available_instance_count: Optional[int] = Unassigned()
12226
12303
  in_use_instance_count: Optional[int] = Unassigned()
12304
+ total_ultra_server_count: Optional[int] = Unassigned()
12227
12305
  target_resources: Optional[List[str]] = Unassigned()
12228
12306
  reserved_capacity_summaries: Optional[List[ReservedCapacitySummary]] = Unassigned()
12229
12307
 
@@ -12309,6 +12387,39 @@ class TrialSummary(Base):
12309
12387
  last_modified_time: Optional[datetime.datetime] = Unassigned()
12310
12388
 
12311
12389
 
12390
+ class UltraServer(Base):
12391
+ """
12392
+ UltraServer
12393
+ Represents a high-performance compute server used for distributed training in SageMaker AI. An UltraServer consists of multiple instances within a shared NVLink interconnect domain.
12394
+
12395
+ Attributes
12396
+ ----------------------
12397
+ ultra_server_id: The unique identifier for the UltraServer.
12398
+ ultra_server_type: The type of UltraServer, such as ml.u-p6e-gb200x72.
12399
+ availability_zone: The name of the Availability Zone where the UltraServer is provisioned.
12400
+ instance_type: The Amazon EC2 instance type used in the UltraServer.
12401
+ total_instance_count: The total number of instances in this UltraServer.
12402
+ configured_spare_instance_count: The number of spare instances configured for this UltraServer to provide enhanced resiliency.
12403
+ available_instance_count: The number of instances currently available for use in this UltraServer.
12404
+ in_use_instance_count: The number of instances currently in use in this UltraServer.
12405
+ available_spare_instance_count: The number of available spare instances in the UltraServer.
12406
+ unhealthy_instance_count: The number of instances in this UltraServer that are currently in an unhealthy state.
12407
+ health_status: The overall health status of the UltraServer.
12408
+ """
12409
+
12410
+ ultra_server_id: str
12411
+ ultra_server_type: str
12412
+ availability_zone: str
12413
+ instance_type: str
12414
+ total_instance_count: int
12415
+ configured_spare_instance_count: Optional[int] = Unassigned()
12416
+ available_instance_count: Optional[int] = Unassigned()
12417
+ in_use_instance_count: Optional[int] = Unassigned()
12418
+ available_spare_instance_count: Optional[int] = Unassigned()
12419
+ unhealthy_instance_count: Optional[int] = Unassigned()
12420
+ health_status: Optional[str] = Unassigned()
12421
+
12422
+
12312
12423
  class UserProfileDetails(Base):
12313
12424
  """
12314
12425
  UserProfileDetails
@@ -13028,6 +13139,9 @@ class ReservedCapacityOffering(Base):
13028
13139
 
13029
13140
  Attributes
13030
13141
  ----------------------
13142
+ reserved_capacity_type: The type of reserved capacity offering.
13143
+ ultra_server_type: The type of UltraServer included in this reserved capacity offering, such as ml.u-p6e-gb200x72.
13144
+ ultra_server_count: The number of UltraServers included in this reserved capacity offering.
13031
13145
  instance_type: The instance type for the reserved capacity offering.
13032
13146
  instance_count: The number of instances in the reserved capacity offering.
13033
13147
  availability_zone: The availability zone for the reserved capacity offering.
@@ -13039,6 +13153,9 @@ class ReservedCapacityOffering(Base):
13039
13153
 
13040
13154
  instance_type: str
13041
13155
  instance_count: int
13156
+ reserved_capacity_type: Optional[str] = Unassigned()
13157
+ ultra_server_type: Optional[str] = Unassigned()
13158
+ ultra_server_count: Optional[int] = Unassigned()
13042
13159
  availability_zone: Optional[str] = Unassigned()
13043
13160
  duration_hours: Optional[int] = Unassigned()
13044
13161
  duration_minutes: Optional[int] = Unassigned()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sagemaker-core
3
- Version: 1.0.49
3
+ Version: 1.0.50
4
4
  Summary: An python package for sagemaker core functionalities
5
5
  Author-email: AWS <sagemaker-interests@amazon.com>
6
6
  Project-URL: Repository, https://github.com/aws/sagemaker-core.git
@@ -1 +0,0 @@
1
- 1.0.49
File without changes