aws-sdk-sagemaker 1.356.0 → 1.357.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3f0b9d6eec27903cf17198c529d6a3afafb0173ccb84d4975f8c44b4fe354d52
4
- data.tar.gz: cd19f6247b5bd223b05f1a73e34aae8527c0e069257b24fb9fccc233b6579959
3
+ metadata.gz: 7f316e630f07094ff5167bc4cc33094f38cd54a4bc12ab9115ed3b92ac53a187
4
+ data.tar.gz: c769b634cb5498cac7f8cbff132d978204d47b44d8ca9a0f374984fa1f3fd4f1
5
5
  SHA512:
6
- metadata.gz: e25ef94ab127188f40f966af619066bf328b81f18c9906bf6aa0cb30ec43263fd153be41519867f22561614247d58d55ec1e5a66faf23469bf09d33c213a2075
7
- data.tar.gz: 2fd039d0bedadee4e5edc2b58af02e39bce099b81c676498348d541a585d8738adaa557c7f0ceb2395332ce1db1813f57bd66a7acaf005f13795d6171bd57a5a
6
+ metadata.gz: d23dbe4e149951a9edb1418bcce5b9e0c730a138f422ad6f63dcdb4469f8dadc14f754e570133107a9af62a8db737a3d0de69428d0892b23de324e7338875bce
7
+ data.tar.gz: 5d47d51c8ae0d509a97c0ee14d65090b0252bfc52d40efc37f314b7da7e590503ff44e2fa1a2d6deb10124342be8a6e672bae327204ed4a5d04da0eebedf0147
data/CHANGELOG.md CHANGED
@@ -1,6 +1,11 @@
1
1
  Unreleased Changes
2
2
  ------------------
3
3
 
4
+ 1.357.0 (2026-03-30)
5
+ ------------------
6
+
7
+ * Feature - Added support for placement strategy and consolidation for SageMaker inference component endpoints. Customers can now configure how inference component copies are distributed across instances and availability zones (AZs), and enable automatic consolidation to optimizes resource utilization.
8
+
4
9
  1.356.0 (2026-03-26)
5
10
  ------------------
6
11
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.356.0
1
+ 1.357.0
@@ -4412,6 +4412,11 @@ module Aws::SageMaker
4412
4412
  # status: "ENABLED", # accepts ENABLED, DISABLED
4413
4413
  # min_instance_count: 1,
4414
4414
  # max_instance_count: 1,
4415
+ # scale_in_policy: {
4416
+ # strategy: "IDLE_RELEASE", # required, accepts IDLE_RELEASE, CONSOLIDATION
4417
+ # maximum_step_size: 1,
4418
+ # cooldown_in_minutes: 1,
4419
+ # },
4415
4420
  # },
4416
4421
  # routing_config: {
4417
4422
  # routing_strategy: "LEAST_OUTSTANDING_REQUESTS", # required, accepts LEAST_OUTSTANDING_REQUESTS, RANDOM
@@ -4517,6 +4522,11 @@ module Aws::SageMaker
4517
4522
  # status: "ENABLED", # accepts ENABLED, DISABLED
4518
4523
  # min_instance_count: 1,
4519
4524
  # max_instance_count: 1,
4525
+ # scale_in_policy: {
4526
+ # strategy: "IDLE_RELEASE", # required, accepts IDLE_RELEASE, CONSOLIDATION
4527
+ # maximum_step_size: 1,
4528
+ # cooldown_in_minutes: 1,
4529
+ # },
4520
4530
  # },
4521
4531
  # routing_config: {
4522
4532
  # routing_strategy: "LEAST_OUTSTANDING_REQUESTS", # required, accepts LEAST_OUTSTANDING_REQUESTS, RANDOM
@@ -5963,6 +5973,13 @@ module Aws::SageMaker
5963
5973
  # data_cache_config: {
5964
5974
  # enable_caching: false, # required
5965
5975
  # },
5976
+ # scheduling_config: {
5977
+ # placement_strategy: "SPREAD", # required, accepts SPREAD, BINPACK
5978
+ # availability_zone_balance: {
5979
+ # enforcement_mode: "PERMISSIVE", # required, accepts PERMISSIVE
5980
+ # max_imbalance: 1,
5981
+ # },
5982
+ # },
5966
5983
  # },
5967
5984
  # runtime_config: {
5968
5985
  # copy_count: 1, # required
@@ -15256,6 +15273,9 @@ module Aws::SageMaker
15256
15273
  # resp.production_variants[0].managed_instance_scaling.status #=> String, one of "ENABLED", "DISABLED"
15257
15274
  # resp.production_variants[0].managed_instance_scaling.min_instance_count #=> Integer
15258
15275
  # resp.production_variants[0].managed_instance_scaling.max_instance_count #=> Integer
15276
+ # resp.production_variants[0].managed_instance_scaling.scale_in_policy.strategy #=> String, one of "IDLE_RELEASE", "CONSOLIDATION"
15277
+ # resp.production_variants[0].managed_instance_scaling.scale_in_policy.maximum_step_size #=> Integer
15278
+ # resp.production_variants[0].managed_instance_scaling.scale_in_policy.cooldown_in_minutes #=> Integer
15259
15279
  # resp.production_variants[0].routing_config.routing_strategy #=> String, one of "LEAST_OUTSTANDING_REQUESTS", "RANDOM"
15260
15280
  # resp.production_variants[0].capacity_reservation_config.ml_reservation_arn #=> String
15261
15281
  # resp.production_variants[0].capacity_reservation_config.capacity_reservation_preference #=> String, one of "capacity-reservations-only"
@@ -15326,6 +15346,9 @@ module Aws::SageMaker
15326
15346
  # resp.pending_deployment_summary.production_variants[0].managed_instance_scaling.status #=> String, one of "ENABLED", "DISABLED"
15327
15347
  # resp.pending_deployment_summary.production_variants[0].managed_instance_scaling.min_instance_count #=> Integer
15328
15348
  # resp.pending_deployment_summary.production_variants[0].managed_instance_scaling.max_instance_count #=> Integer
15349
+ # resp.pending_deployment_summary.production_variants[0].managed_instance_scaling.scale_in_policy.strategy #=> String, one of "IDLE_RELEASE", "CONSOLIDATION"
15350
+ # resp.pending_deployment_summary.production_variants[0].managed_instance_scaling.scale_in_policy.maximum_step_size #=> Integer
15351
+ # resp.pending_deployment_summary.production_variants[0].managed_instance_scaling.scale_in_policy.cooldown_in_minutes #=> Integer
15329
15352
  # resp.pending_deployment_summary.production_variants[0].routing_config.routing_strategy #=> String, one of "LEAST_OUTSTANDING_REQUESTS", "RANDOM"
15330
15353
  # resp.pending_deployment_summary.start_time #=> Time
15331
15354
  # resp.pending_deployment_summary.shadow_production_variants #=> Array
@@ -15353,6 +15376,9 @@ module Aws::SageMaker
15353
15376
  # resp.pending_deployment_summary.shadow_production_variants[0].managed_instance_scaling.status #=> String, one of "ENABLED", "DISABLED"
15354
15377
  # resp.pending_deployment_summary.shadow_production_variants[0].managed_instance_scaling.min_instance_count #=> Integer
15355
15378
  # resp.pending_deployment_summary.shadow_production_variants[0].managed_instance_scaling.max_instance_count #=> Integer
15379
+ # resp.pending_deployment_summary.shadow_production_variants[0].managed_instance_scaling.scale_in_policy.strategy #=> String, one of "IDLE_RELEASE", "CONSOLIDATION"
15380
+ # resp.pending_deployment_summary.shadow_production_variants[0].managed_instance_scaling.scale_in_policy.maximum_step_size #=> Integer
15381
+ # resp.pending_deployment_summary.shadow_production_variants[0].managed_instance_scaling.scale_in_policy.cooldown_in_minutes #=> Integer
15356
15382
  # resp.pending_deployment_summary.shadow_production_variants[0].routing_config.routing_strategy #=> String, one of "LEAST_OUTSTANDING_REQUESTS", "RANDOM"
15357
15383
  # resp.explainer_config.clarify_explainer_config.enable_explanations #=> String
15358
15384
  # resp.explainer_config.clarify_explainer_config.inference_config.features_attribute #=> String
@@ -15400,6 +15426,9 @@ module Aws::SageMaker
15400
15426
  # resp.shadow_production_variants[0].managed_instance_scaling.status #=> String, one of "ENABLED", "DISABLED"
15401
15427
  # resp.shadow_production_variants[0].managed_instance_scaling.min_instance_count #=> Integer
15402
15428
  # resp.shadow_production_variants[0].managed_instance_scaling.max_instance_count #=> Integer
15429
+ # resp.shadow_production_variants[0].managed_instance_scaling.scale_in_policy.strategy #=> String, one of "IDLE_RELEASE", "CONSOLIDATION"
15430
+ # resp.shadow_production_variants[0].managed_instance_scaling.scale_in_policy.maximum_step_size #=> Integer
15431
+ # resp.shadow_production_variants[0].managed_instance_scaling.scale_in_policy.cooldown_in_minutes #=> Integer
15403
15432
  # resp.shadow_production_variants[0].routing_config.routing_strategy #=> String, one of "LEAST_OUTSTANDING_REQUESTS", "RANDOM"
15404
15433
  # resp.shadow_production_variants[0].capacity_reservation_config.ml_reservation_arn #=> String
15405
15434
  # resp.shadow_production_variants[0].capacity_reservation_config.capacity_reservation_preference #=> String, one of "capacity-reservations-only"
@@ -15480,6 +15509,9 @@ module Aws::SageMaker
15480
15509
  # resp.production_variants[0].managed_instance_scaling.status #=> String, one of "ENABLED", "DISABLED"
15481
15510
  # resp.production_variants[0].managed_instance_scaling.min_instance_count #=> Integer
15482
15511
  # resp.production_variants[0].managed_instance_scaling.max_instance_count #=> Integer
15512
+ # resp.production_variants[0].managed_instance_scaling.scale_in_policy.strategy #=> String, one of "IDLE_RELEASE", "CONSOLIDATION"
15513
+ # resp.production_variants[0].managed_instance_scaling.scale_in_policy.maximum_step_size #=> Integer
15514
+ # resp.production_variants[0].managed_instance_scaling.scale_in_policy.cooldown_in_minutes #=> Integer
15483
15515
  # resp.production_variants[0].routing_config.routing_strategy #=> String, one of "LEAST_OUTSTANDING_REQUESTS", "RANDOM"
15484
15516
  # resp.production_variants[0].inference_ami_version #=> String, one of "al2-ami-sagemaker-inference-gpu-2", "al2-ami-sagemaker-inference-gpu-2-1", "al2-ami-sagemaker-inference-gpu-3-1", "al2-ami-sagemaker-inference-neuron-2", "al2023-ami-sagemaker-inference-gpu-4-1"
15485
15517
  # resp.production_variants[0].capacity_reservation_config.capacity_reservation_preference #=> String, one of "capacity-reservations-only"
@@ -15546,6 +15578,9 @@ module Aws::SageMaker
15546
15578
  # resp.shadow_production_variants[0].managed_instance_scaling.status #=> String, one of "ENABLED", "DISABLED"
15547
15579
  # resp.shadow_production_variants[0].managed_instance_scaling.min_instance_count #=> Integer
15548
15580
  # resp.shadow_production_variants[0].managed_instance_scaling.max_instance_count #=> Integer
15581
+ # resp.shadow_production_variants[0].managed_instance_scaling.scale_in_policy.strategy #=> String, one of "IDLE_RELEASE", "CONSOLIDATION"
15582
+ # resp.shadow_production_variants[0].managed_instance_scaling.scale_in_policy.maximum_step_size #=> Integer
15583
+ # resp.shadow_production_variants[0].managed_instance_scaling.scale_in_policy.cooldown_in_minutes #=> Integer
15549
15584
  # resp.shadow_production_variants[0].routing_config.routing_strategy #=> String, one of "LEAST_OUTSTANDING_REQUESTS", "RANDOM"
15550
15585
  # resp.shadow_production_variants[0].inference_ami_version #=> String, one of "al2-ami-sagemaker-inference-gpu-2", "al2-ami-sagemaker-inference-gpu-2-1", "al2-ami-sagemaker-inference-gpu-3-1", "al2-ami-sagemaker-inference-neuron-2", "al2023-ami-sagemaker-inference-gpu-4-1"
15551
15586
  # resp.shadow_production_variants[0].capacity_reservation_config.capacity_reservation_preference #=> String, one of "capacity-reservations-only"
@@ -16470,6 +16505,9 @@ module Aws::SageMaker
16470
16505
  # resp.specification.compute_resource_requirements.max_memory_required_in_mb #=> Integer
16471
16506
  # resp.specification.base_inference_component_name #=> String
16472
16507
  # resp.specification.data_cache_config.enable_caching #=> Boolean
16508
+ # resp.specification.scheduling_config.placement_strategy #=> String, one of "SPREAD", "BINPACK"
16509
+ # resp.specification.scheduling_config.availability_zone_balance.enforcement_mode #=> String, one of "PERMISSIVE"
16510
+ # resp.specification.scheduling_config.availability_zone_balance.max_imbalance #=> Integer
16473
16511
  # resp.runtime_config.desired_copy_count #=> Integer
16474
16512
  # resp.runtime_config.current_copy_count #=> Integer
16475
16513
  # resp.creation_time #=> Time
@@ -30312,6 +30350,13 @@ module Aws::SageMaker
30312
30350
  # data_cache_config: {
30313
30351
  # enable_caching: false, # required
30314
30352
  # },
30353
+ # scheduling_config: {
30354
+ # placement_strategy: "SPREAD", # required, accepts SPREAD, BINPACK
30355
+ # availability_zone_balance: {
30356
+ # enforcement_mode: "PERMISSIVE", # required, accepts PERMISSIVE
30357
+ # max_imbalance: 1,
30358
+ # },
30359
+ # },
30315
30360
  # },
30316
30361
  # runtime_config: {
30317
30362
  # copy_count: 1, # required
@@ -32495,7 +32540,7 @@ module Aws::SageMaker
32495
32540
  tracer: tracer
32496
32541
  )
32497
32542
  context[:gem_name] = 'aws-sdk-sagemaker'
32498
- context[:gem_version] = '1.356.0'
32543
+ context[:gem_version] = '1.357.0'
32499
32544
  Seahorse::Client::Request.new(handlers, context)
32500
32545
  end
32501
32546
 
@@ -194,6 +194,8 @@ module Aws::SageMaker
194
194
  Autotune = Shapes::StructureShape.new(name: 'Autotune')
195
195
  AutotuneMode = Shapes::StringShape.new(name: 'AutotuneMode')
196
196
  AvailabilityZone = Shapes::StringShape.new(name: 'AvailabilityZone')
197
+ AvailabilityZoneBalanceEnforcementMode = Shapes::StringShape.new(name: 'AvailabilityZoneBalanceEnforcementMode')
198
+ AvailabilityZoneBalanceMaxImbalance = Shapes::IntegerShape.new(name: 'AvailabilityZoneBalanceMaxImbalance')
197
199
  AvailabilityZoneId = Shapes::StringShape.new(name: 'AvailabilityZoneId')
198
200
  AvailableInstanceCount = Shapes::IntegerShape.new(name: 'AvailableInstanceCount')
199
201
  AvailableSpareInstanceCount = Shapes::IntegerShape.new(name: 'AvailableSpareInstanceCount')
@@ -1332,6 +1334,7 @@ module Aws::SageMaker
1332
1334
  InUseInstanceCount = Shapes::IntegerShape.new(name: 'InUseInstanceCount')
1333
1335
  IncludeNodeLogicalIdsBoolean = Shapes::BooleanShape.new(name: 'IncludeNodeLogicalIdsBoolean')
1334
1336
  InferenceComponentArn = Shapes::StringShape.new(name: 'InferenceComponentArn')
1337
+ InferenceComponentAvailabilityZoneBalance = Shapes::StructureShape.new(name: 'InferenceComponentAvailabilityZoneBalance')
1335
1338
  InferenceComponentCapacitySize = Shapes::StructureShape.new(name: 'InferenceComponentCapacitySize')
1336
1339
  InferenceComponentCapacitySizeType = Shapes::StringShape.new(name: 'InferenceComponentCapacitySizeType')
1337
1340
  InferenceComponentComputeResourceRequirements = Shapes::StructureShape.new(name: 'InferenceComponentComputeResourceRequirements')
@@ -1344,9 +1347,11 @@ module Aws::SageMaker
1344
1347
  InferenceComponentMetadata = Shapes::StructureShape.new(name: 'InferenceComponentMetadata')
1345
1348
  InferenceComponentName = Shapes::StringShape.new(name: 'InferenceComponentName')
1346
1349
  InferenceComponentNameContains = Shapes::StringShape.new(name: 'InferenceComponentNameContains')
1350
+ InferenceComponentPlacementStrategy = Shapes::StringShape.new(name: 'InferenceComponentPlacementStrategy')
1347
1351
  InferenceComponentRollingUpdatePolicy = Shapes::StructureShape.new(name: 'InferenceComponentRollingUpdatePolicy')
1348
1352
  InferenceComponentRuntimeConfig = Shapes::StructureShape.new(name: 'InferenceComponentRuntimeConfig')
1349
1353
  InferenceComponentRuntimeConfigSummary = Shapes::StructureShape.new(name: 'InferenceComponentRuntimeConfigSummary')
1354
+ InferenceComponentSchedulingConfig = Shapes::StructureShape.new(name: 'InferenceComponentSchedulingConfig')
1350
1355
  InferenceComponentSortKey = Shapes::StringShape.new(name: 'InferenceComponentSortKey')
1351
1356
  InferenceComponentSpecification = Shapes::StructureShape.new(name: 'InferenceComponentSpecification')
1352
1357
  InferenceComponentSpecificationSummary = Shapes::StructureShape.new(name: 'InferenceComponentSpecificationSummary')
@@ -1660,8 +1665,11 @@ module Aws::SageMaker
1660
1665
  MLflowConfiguration = Shapes::StructureShape.new(name: 'MLflowConfiguration')
1661
1666
  MaintenanceStatus = Shapes::StringShape.new(name: 'MaintenanceStatus')
1662
1667
  MajorMinorVersion = Shapes::StringShape.new(name: 'MajorMinorVersion')
1668
+ ManagedInstanceScalingCooldownInMinutes = Shapes::IntegerShape.new(name: 'ManagedInstanceScalingCooldownInMinutes')
1663
1669
  ManagedInstanceScalingMaxInstanceCount = Shapes::IntegerShape.new(name: 'ManagedInstanceScalingMaxInstanceCount')
1670
+ ManagedInstanceScalingMaximumStepSize = Shapes::IntegerShape.new(name: 'ManagedInstanceScalingMaximumStepSize')
1664
1671
  ManagedInstanceScalingMinInstanceCount = Shapes::IntegerShape.new(name: 'ManagedInstanceScalingMinInstanceCount')
1672
+ ManagedInstanceScalingScaleInStrategy = Shapes::StringShape.new(name: 'ManagedInstanceScalingScaleInStrategy')
1665
1673
  ManagedInstanceScalingStatus = Shapes::StringShape.new(name: 'ManagedInstanceScalingStatus')
1666
1674
  MapString2048 = Shapes::MapShape.new(name: 'MapString2048')
1667
1675
  MaxAutoMLJobRuntimeInSeconds = Shapes::IntegerShape.new(name: 'MaxAutoMLJobRuntimeInSeconds')
@@ -2115,6 +2123,7 @@ module Aws::SageMaker
2115
2123
  ProductionVariantInstanceType = Shapes::StringShape.new(name: 'ProductionVariantInstanceType')
2116
2124
  ProductionVariantList = Shapes::ListShape.new(name: 'ProductionVariantList')
2117
2125
  ProductionVariantManagedInstanceScaling = Shapes::StructureShape.new(name: 'ProductionVariantManagedInstanceScaling')
2126
+ ProductionVariantManagedInstanceScalingScaleInPolicy = Shapes::StructureShape.new(name: 'ProductionVariantManagedInstanceScalingScaleInPolicy')
2118
2127
  ProductionVariantModelDataDownloadTimeoutInSeconds = Shapes::IntegerShape.new(name: 'ProductionVariantModelDataDownloadTimeoutInSeconds')
2119
2128
  ProductionVariantRoutingConfig = Shapes::StructureShape.new(name: 'ProductionVariantRoutingConfig')
2120
2129
  ProductionVariantSSMAccess = Shapes::BooleanShape.new(name: 'ProductionVariantSSMAccess')
@@ -7683,6 +7692,10 @@ module Aws::SageMaker
7683
7692
  ImportHubContentResponse.add_member(:hub_content_arn, Shapes::ShapeRef.new(shape: HubContentArn, required: true, location_name: "HubContentArn"))
7684
7693
  ImportHubContentResponse.struct_class = Types::ImportHubContentResponse
7685
7694
 
7695
+ InferenceComponentAvailabilityZoneBalance.add_member(:enforcement_mode, Shapes::ShapeRef.new(shape: AvailabilityZoneBalanceEnforcementMode, required: true, location_name: "EnforcementMode"))
7696
+ InferenceComponentAvailabilityZoneBalance.add_member(:max_imbalance, Shapes::ShapeRef.new(shape: AvailabilityZoneBalanceMaxImbalance, location_name: "MaxImbalance"))
7697
+ InferenceComponentAvailabilityZoneBalance.struct_class = Types::InferenceComponentAvailabilityZoneBalance
7698
+
7686
7699
  InferenceComponentCapacitySize.add_member(:type, Shapes::ShapeRef.new(shape: InferenceComponentCapacitySizeType, required: true, location_name: "Type"))
7687
7700
  InferenceComponentCapacitySize.add_member(:value, Shapes::ShapeRef.new(shape: CapacitySizeValue, required: true, location_name: "Value"))
7688
7701
  InferenceComponentCapacitySize.struct_class = Types::InferenceComponentCapacitySize
@@ -7729,12 +7742,17 @@ module Aws::SageMaker
7729
7742
  InferenceComponentRuntimeConfigSummary.add_member(:current_copy_count, Shapes::ShapeRef.new(shape: InferenceComponentCopyCount, location_name: "CurrentCopyCount"))
7730
7743
  InferenceComponentRuntimeConfigSummary.struct_class = Types::InferenceComponentRuntimeConfigSummary
7731
7744
 
7745
+ InferenceComponentSchedulingConfig.add_member(:placement_strategy, Shapes::ShapeRef.new(shape: InferenceComponentPlacementStrategy, required: true, location_name: "PlacementStrategy"))
7746
+ InferenceComponentSchedulingConfig.add_member(:availability_zone_balance, Shapes::ShapeRef.new(shape: InferenceComponentAvailabilityZoneBalance, location_name: "AvailabilityZoneBalance"))
7747
+ InferenceComponentSchedulingConfig.struct_class = Types::InferenceComponentSchedulingConfig
7748
+
7732
7749
  InferenceComponentSpecification.add_member(:model_name, Shapes::ShapeRef.new(shape: ModelName, location_name: "ModelName"))
7733
7750
  InferenceComponentSpecification.add_member(:container, Shapes::ShapeRef.new(shape: InferenceComponentContainerSpecification, location_name: "Container"))
7734
7751
  InferenceComponentSpecification.add_member(:startup_parameters, Shapes::ShapeRef.new(shape: InferenceComponentStartupParameters, location_name: "StartupParameters"))
7735
7752
  InferenceComponentSpecification.add_member(:compute_resource_requirements, Shapes::ShapeRef.new(shape: InferenceComponentComputeResourceRequirements, location_name: "ComputeResourceRequirements"))
7736
7753
  InferenceComponentSpecification.add_member(:base_inference_component_name, Shapes::ShapeRef.new(shape: InferenceComponentName, location_name: "BaseInferenceComponentName"))
7737
7754
  InferenceComponentSpecification.add_member(:data_cache_config, Shapes::ShapeRef.new(shape: InferenceComponentDataCacheConfig, location_name: "DataCacheConfig"))
7755
+ InferenceComponentSpecification.add_member(:scheduling_config, Shapes::ShapeRef.new(shape: InferenceComponentSchedulingConfig, location_name: "SchedulingConfig"))
7738
7756
  InferenceComponentSpecification.struct_class = Types::InferenceComponentSpecification
7739
7757
 
7740
7758
  InferenceComponentSpecificationSummary.add_member(:model_name, Shapes::ShapeRef.new(shape: ModelName, location_name: "ModelName"))
@@ -7743,6 +7761,7 @@ module Aws::SageMaker
7743
7761
  InferenceComponentSpecificationSummary.add_member(:compute_resource_requirements, Shapes::ShapeRef.new(shape: InferenceComponentComputeResourceRequirements, location_name: "ComputeResourceRequirements"))
7744
7762
  InferenceComponentSpecificationSummary.add_member(:base_inference_component_name, Shapes::ShapeRef.new(shape: InferenceComponentName, location_name: "BaseInferenceComponentName"))
7745
7763
  InferenceComponentSpecificationSummary.add_member(:data_cache_config, Shapes::ShapeRef.new(shape: InferenceComponentDataCacheConfigSummary, location_name: "DataCacheConfig"))
7764
+ InferenceComponentSpecificationSummary.add_member(:scheduling_config, Shapes::ShapeRef.new(shape: InferenceComponentSchedulingConfig, location_name: "SchedulingConfig"))
7746
7765
  InferenceComponentSpecificationSummary.struct_class = Types::InferenceComponentSpecificationSummary
7747
7766
 
7748
7767
  InferenceComponentStartupParameters.add_member(:model_data_download_timeout_in_seconds, Shapes::ShapeRef.new(shape: ProductionVariantModelDataDownloadTimeoutInSeconds, location_name: "ModelDataDownloadTimeoutInSeconds"))
@@ -10446,8 +10465,14 @@ module Aws::SageMaker
10446
10465
  ProductionVariantManagedInstanceScaling.add_member(:status, Shapes::ShapeRef.new(shape: ManagedInstanceScalingStatus, location_name: "Status"))
10447
10466
  ProductionVariantManagedInstanceScaling.add_member(:min_instance_count, Shapes::ShapeRef.new(shape: ManagedInstanceScalingMinInstanceCount, location_name: "MinInstanceCount"))
10448
10467
  ProductionVariantManagedInstanceScaling.add_member(:max_instance_count, Shapes::ShapeRef.new(shape: ManagedInstanceScalingMaxInstanceCount, location_name: "MaxInstanceCount"))
10468
+ ProductionVariantManagedInstanceScaling.add_member(:scale_in_policy, Shapes::ShapeRef.new(shape: ProductionVariantManagedInstanceScalingScaleInPolicy, location_name: "ScaleInPolicy"))
10449
10469
  ProductionVariantManagedInstanceScaling.struct_class = Types::ProductionVariantManagedInstanceScaling
10450
10470
 
10471
+ ProductionVariantManagedInstanceScalingScaleInPolicy.add_member(:strategy, Shapes::ShapeRef.new(shape: ManagedInstanceScalingScaleInStrategy, required: true, location_name: "Strategy"))
10472
+ ProductionVariantManagedInstanceScalingScaleInPolicy.add_member(:maximum_step_size, Shapes::ShapeRef.new(shape: ManagedInstanceScalingMaximumStepSize, location_name: "MaximumStepSize"))
10473
+ ProductionVariantManagedInstanceScalingScaleInPolicy.add_member(:cooldown_in_minutes, Shapes::ShapeRef.new(shape: ManagedInstanceScalingCooldownInMinutes, location_name: "CooldownInMinutes"))
10474
+ ProductionVariantManagedInstanceScalingScaleInPolicy.struct_class = Types::ProductionVariantManagedInstanceScalingScaleInPolicy
10475
+
10451
10476
  ProductionVariantRoutingConfig.add_member(:routing_strategy, Shapes::ShapeRef.new(shape: RoutingStrategy, required: true, location_name: "RoutingStrategy"))
10452
10477
  ProductionVariantRoutingConfig.struct_class = Types::ProductionVariantRoutingConfig
10453
10478
 
@@ -28799,6 +28799,40 @@ module Aws::SageMaker
28799
28799
  include Aws::Structure
28800
28800
  end
28801
28801
 
28802
+ # Configuration for balancing inference component copies across
28803
+ # Availability Zones.
28804
+ #
28805
+ # @!attribute [rw] enforcement_mode
28806
+ # Determines how strictly the Availability Zone balance constraint is
28807
+ # enforced.
28808
+ #
28809
+ # PERMISSIVE
28810
+ #
28811
+ # : The endpoint attempts to balance copies across Availability Zones
28812
+ # but proceeds with scheduling even if balance can't be achieved
28813
+ # due to available capacity or instance distribution across
28814
+ # Availability Zones.
28815
+ # @return [String]
28816
+ #
28817
+ # @!attribute [rw] max_imbalance
28818
+ # The maximum allowed difference in the number of inference component
28819
+ # copies between any two Availability Zones. This parameter applies
28820
+ # only when the endpoint has instances across two or more Availability
28821
+ # Zones. A copy placement is allowed if it reduces imbalance or the
28822
+ # resulting imbalance is within this value.
28823
+ #
28824
+ # Default value: `0`.
28825
+ # @return [Integer]
28826
+ #
28827
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/InferenceComponentAvailabilityZoneBalance AWS API Documentation
28828
+ #
28829
+ class InferenceComponentAvailabilityZoneBalance < Struct.new(
28830
+ :enforcement_mode,
28831
+ :max_imbalance)
28832
+ SENSITIVE = []
28833
+ include Aws::Structure
28834
+ end
28835
+
28802
28836
  # Specifies the type and size of the endpoint capacity to activate for a
28803
28837
  # rolling deployment or a rollback strategy. You can specify your
28804
28838
  # batches as either of the following:
@@ -29096,6 +29130,40 @@ module Aws::SageMaker
29096
29130
  include Aws::Structure
29097
29131
  end
29098
29132
 
29133
+ # The scheduling configuration that determines how inference component
29134
+ # copies are placed across available instances when copies are added or
29135
+ # removed.
29136
+ #
29137
+ # @!attribute [rw] placement_strategy
29138
+ # The strategy for placing inference component copies across available
29139
+ # instances. If you also set `AvailabilityZoneBalance`, this strategy
29140
+ # applies to placement within each Availability Zone.
29141
+ #
29142
+ # SPREAD
29143
+ #
29144
+ # : Distributes copies evenly across available instances for better
29145
+ # resilience.
29146
+ #
29147
+ # BINPACK
29148
+ #
29149
+ # : Packs copies onto fewer instances to optimize resource
29150
+ # utilization.
29151
+ # @return [String]
29152
+ #
29153
+ # @!attribute [rw] availability_zone_balance
29154
+ # Configuration for balancing inference component copies across
29155
+ # Availability Zones.
29156
+ # @return [Types::InferenceComponentAvailabilityZoneBalance]
29157
+ #
29158
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/InferenceComponentSchedulingConfig AWS API Documentation
29159
+ #
29160
+ class InferenceComponentSchedulingConfig < Struct.new(
29161
+ :placement_strategy,
29162
+ :availability_zone_balance)
29163
+ SENSITIVE = []
29164
+ include Aws::Structure
29165
+ end
29166
+
29099
29167
  # Details about the resources to deploy with this inference component,
29100
29168
  # including the model, container, and compute resources.
29101
29169
  #
@@ -29149,6 +29217,12 @@ module Aws::SageMaker
29149
29217
  # Settings that affect how the inference component caches data.
29150
29218
  # @return [Types::InferenceComponentDataCacheConfig]
29151
29219
  #
29220
+ # @!attribute [rw] scheduling_config
29221
+ # The scheduling configuration that determines how inference component
29222
+ # copies are placed across available instances when copies are added
29223
+ # or removed.
29224
+ # @return [Types::InferenceComponentSchedulingConfig]
29225
+ #
29152
29226
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/InferenceComponentSpecification AWS API Documentation
29153
29227
  #
29154
29228
  class InferenceComponentSpecification < Struct.new(
@@ -29157,7 +29231,8 @@ module Aws::SageMaker
29157
29231
  :startup_parameters,
29158
29232
  :compute_resource_requirements,
29159
29233
  :base_inference_component_name,
29160
- :data_cache_config)
29234
+ :data_cache_config,
29235
+ :scheduling_config)
29161
29236
  SENSITIVE = []
29162
29237
  include Aws::Structure
29163
29238
  end
@@ -29193,6 +29268,12 @@ module Aws::SageMaker
29193
29268
  # Settings that affect how the inference component caches data.
29194
29269
  # @return [Types::InferenceComponentDataCacheConfigSummary]
29195
29270
  #
29271
+ # @!attribute [rw] scheduling_config
29272
+ # The scheduling configuration that determines how inference component
29273
+ # copies are placed across available instances when copies are added
29274
+ # or removed.
29275
+ # @return [Types::InferenceComponentSchedulingConfig]
29276
+ #
29196
29277
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/InferenceComponentSpecificationSummary AWS API Documentation
29197
29278
  #
29198
29279
  class InferenceComponentSpecificationSummary < Struct.new(
@@ -29201,7 +29282,8 @@ module Aws::SageMaker
29201
29282
  :startup_parameters,
29202
29283
  :compute_resource_requirements,
29203
29284
  :base_inference_component_name,
29204
- :data_cache_config)
29285
+ :data_cache_config,
29286
+ :scheduling_config)
29205
29287
  SENSITIVE = []
29206
29288
  include Aws::Structure
29207
29289
  end
@@ -43874,12 +43956,60 @@ module Aws::SageMaker
43874
43956
  # it scales up to accommodate an increase in traffic.
43875
43957
  # @return [Integer]
43876
43958
  #
43959
+ # @!attribute [rw] scale_in_policy
43960
+ # Configures the scale-in behavior for managed instance scaling.
43961
+ # @return [Types::ProductionVariantManagedInstanceScalingScaleInPolicy]
43962
+ #
43877
43963
  # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ProductionVariantManagedInstanceScaling AWS API Documentation
43878
43964
  #
43879
43965
  class ProductionVariantManagedInstanceScaling < Struct.new(
43880
43966
  :status,
43881
43967
  :min_instance_count,
43882
- :max_instance_count)
43968
+ :max_instance_count,
43969
+ :scale_in_policy)
43970
+ SENSITIVE = []
43971
+ include Aws::Structure
43972
+ end
43973
+
43974
+ # Configures the scale-in behavior for managed instance scaling.
43975
+ #
43976
+ # @!attribute [rw] strategy
43977
+ # The strategy for scaling in instances.
43978
+ #
43979
+ # IDLE\_RELEASE
43980
+ #
43981
+ # : Releases instances that have no hosted inference component copies.
43982
+ #
43983
+ # CONSOLIDATION
43984
+ #
43985
+ # : Consolidates inference component copies onto fewer instances to
43986
+ # release more instances. Consolidation honors the scheduling
43987
+ # configuration of each inference component. For example, if an
43988
+ # inference component specifies Availability Zone balance,
43989
+ # consolidation only proceeds when the resulting distribution does
43990
+ # not increase the imbalance.
43991
+ # @return [String]
43992
+ #
43993
+ # @!attribute [rw] maximum_step_size
43994
+ # The maximum number of instances that the endpoint can terminate at a
43995
+ # time during a consolidation scale-in operation.
43996
+ #
43997
+ # Default value: `1`.
43998
+ # @return [Integer]
43999
+ #
44000
+ # @!attribute [rw] cooldown_in_minutes
44001
+ # The cooldown period, in minutes, after the last endpoint operation
44002
+ # before the endpoint evaluates consolidation scale-in opportunities.
44003
+ #
44004
+ # Default value: `20`.
44005
+ # @return [Integer]
44006
+ #
44007
+ # @see http://docs.aws.amazon.com/goto/WebAPI/sagemaker-2017-07-24/ProductionVariantManagedInstanceScalingScaleInPolicy AWS API Documentation
44008
+ #
44009
+ class ProductionVariantManagedInstanceScalingScaleInPolicy < Struct.new(
44010
+ :strategy,
44011
+ :maximum_step_size,
44012
+ :cooldown_in_minutes)
43883
44013
  SENSITIVE = []
43884
44014
  include Aws::Structure
43885
44015
  end
@@ -55,7 +55,7 @@ module Aws::SageMaker
55
55
  autoload :EndpointProvider, 'aws-sdk-sagemaker/endpoint_provider'
56
56
  autoload :Endpoints, 'aws-sdk-sagemaker/endpoints'
57
57
 
58
- GEM_VERSION = '1.356.0'
58
+ GEM_VERSION = '1.357.0'
59
59
 
60
60
  end
61
61
 
data/sig/client.rbs CHANGED
@@ -1815,7 +1815,12 @@ module Aws
1815
1815
  managed_instance_scaling: {
1816
1816
  status: ("ENABLED" | "DISABLED")?,
1817
1817
  min_instance_count: ::Integer?,
1818
- max_instance_count: ::Integer?
1818
+ max_instance_count: ::Integer?,
1819
+ scale_in_policy: {
1820
+ strategy: ("IDLE_RELEASE" | "CONSOLIDATION"),
1821
+ maximum_step_size: ::Integer?,
1822
+ cooldown_in_minutes: ::Integer?
1823
+ }?
1819
1824
  }?,
1820
1825
  routing_config: {
1821
1826
  routing_strategy: ("LEAST_OUTSTANDING_REQUESTS" | "RANDOM")
@@ -1920,7 +1925,12 @@ module Aws
1920
1925
  managed_instance_scaling: {
1921
1926
  status: ("ENABLED" | "DISABLED")?,
1922
1927
  min_instance_count: ::Integer?,
1923
- max_instance_count: ::Integer?
1928
+ max_instance_count: ::Integer?,
1929
+ scale_in_policy: {
1930
+ strategy: ("IDLE_RELEASE" | "CONSOLIDATION"),
1931
+ maximum_step_size: ::Integer?,
1932
+ cooldown_in_minutes: ::Integer?
1933
+ }?
1924
1934
  }?,
1925
1935
  routing_config: {
1926
1936
  routing_strategy: ("LEAST_OUTSTANDING_REQUESTS" | "RANDOM")
@@ -2606,6 +2616,13 @@ module Aws
2606
2616
  base_inference_component_name: ::String?,
2607
2617
  data_cache_config: {
2608
2618
  enable_caching: bool
2619
+ }?,
2620
+ scheduling_config: {
2621
+ placement_strategy: ("SPREAD" | "BINPACK"),
2622
+ availability_zone_balance: {
2623
+ enforcement_mode: ("PERMISSIVE"),
2624
+ max_imbalance: ::Integer?
2625
+ }?
2609
2626
  }?
2610
2627
  },
2611
2628
  ?runtime_config: {
@@ -10156,6 +10173,13 @@ module Aws
10156
10173
  base_inference_component_name: ::String?,
10157
10174
  data_cache_config: {
10158
10175
  enable_caching: bool
10176
+ }?,
10177
+ scheduling_config: {
10178
+ placement_strategy: ("SPREAD" | "BINPACK"),
10179
+ availability_zone_balance: {
10180
+ enforcement_mode: ("PERMISSIVE"),
10181
+ max_imbalance: ::Integer?
10182
+ }?
10159
10183
  }?
10160
10184
  },
10161
10185
  ?runtime_config: {
data/sig/types.rbs CHANGED
@@ -5919,6 +5919,12 @@ module Aws::SageMaker
5919
5919
  SENSITIVE: []
5920
5920
  end
5921
5921
 
5922
+ class InferenceComponentAvailabilityZoneBalance
5923
+ attr_accessor enforcement_mode: ("PERMISSIVE")
5924
+ attr_accessor max_imbalance: ::Integer
5925
+ SENSITIVE: []
5926
+ end
5927
+
5922
5928
  class InferenceComponentCapacitySize
5923
5929
  attr_accessor type: ("COPY_COUNT" | "CAPACITY_PERCENT")
5924
5930
  attr_accessor value: ::Integer
@@ -5987,6 +5993,12 @@ module Aws::SageMaker
5987
5993
  SENSITIVE: []
5988
5994
  end
5989
5995
 
5996
+ class InferenceComponentSchedulingConfig
5997
+ attr_accessor placement_strategy: ("SPREAD" | "BINPACK")
5998
+ attr_accessor availability_zone_balance: Types::InferenceComponentAvailabilityZoneBalance
5999
+ SENSITIVE: []
6000
+ end
6001
+
5990
6002
  class InferenceComponentSpecification
5991
6003
  attr_accessor model_name: ::String
5992
6004
  attr_accessor container: Types::InferenceComponentContainerSpecification
@@ -5994,6 +6006,7 @@ module Aws::SageMaker
5994
6006
  attr_accessor compute_resource_requirements: Types::InferenceComponentComputeResourceRequirements
5995
6007
  attr_accessor base_inference_component_name: ::String
5996
6008
  attr_accessor data_cache_config: Types::InferenceComponentDataCacheConfig
6009
+ attr_accessor scheduling_config: Types::InferenceComponentSchedulingConfig
5997
6010
  SENSITIVE: []
5998
6011
  end
5999
6012
 
@@ -6004,6 +6017,7 @@ module Aws::SageMaker
6004
6017
  attr_accessor compute_resource_requirements: Types::InferenceComponentComputeResourceRequirements
6005
6018
  attr_accessor base_inference_component_name: ::String
6006
6019
  attr_accessor data_cache_config: Types::InferenceComponentDataCacheConfigSummary
6020
+ attr_accessor scheduling_config: Types::InferenceComponentSchedulingConfig
6007
6021
  SENSITIVE: []
6008
6022
  end
6009
6023
 
@@ -9312,6 +9326,14 @@ module Aws::SageMaker
9312
9326
  attr_accessor status: ("ENABLED" | "DISABLED")
9313
9327
  attr_accessor min_instance_count: ::Integer
9314
9328
  attr_accessor max_instance_count: ::Integer
9329
+ attr_accessor scale_in_policy: Types::ProductionVariantManagedInstanceScalingScaleInPolicy
9330
+ SENSITIVE: []
9331
+ end
9332
+
9333
+ class ProductionVariantManagedInstanceScalingScaleInPolicy
9334
+ attr_accessor strategy: ("IDLE_RELEASE" | "CONSOLIDATION")
9335
+ attr_accessor maximum_step_size: ::Integer
9336
+ attr_accessor cooldown_in_minutes: ::Integer
9315
9337
  SENSITIVE: []
9316
9338
  end
9317
9339
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aws-sdk-sagemaker
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.356.0
4
+ version: 1.357.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Amazon Web Services