pulumi-aws 7.11.1__py3-none-any.whl → 7.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. pulumi_aws/__init__.py +57 -0
  2. pulumi_aws/acm/certificate.py +20 -20
  3. pulumi_aws/acmpca/certificate.py +8 -8
  4. pulumi_aws/apigateway/account.py +0 -4
  5. pulumi_aws/appflow/_inputs.py +33 -0
  6. pulumi_aws/appflow/outputs.py +22 -0
  7. pulumi_aws/appsync/graph_ql_api.py +84 -0
  8. pulumi_aws/bedrock/_inputs.py +97 -21
  9. pulumi_aws/bedrock/agent_agent_alias.py +94 -0
  10. pulumi_aws/bedrock/agentcore_agent_runtime.py +4 -4
  11. pulumi_aws/bedrock/agentcore_browser.py +42 -0
  12. pulumi_aws/bedrock/agentcore_gateway_target.py +217 -7
  13. pulumi_aws/bedrock/agentcore_memory.py +37 -9
  14. pulumi_aws/bedrock/agentcore_oauth2_credential_provider.py +38 -0
  15. pulumi_aws/bedrock/outputs.py +64 -17
  16. pulumi_aws/cloudfront/_inputs.py +15 -14
  17. pulumi_aws/cloudfront/distribution.py +28 -0
  18. pulumi_aws/cloudfront/outputs.py +10 -9
  19. pulumi_aws/cloudhsmv2/cluster.py +64 -0
  20. pulumi_aws/cloudwatch/_inputs.py +9 -8
  21. pulumi_aws/cloudwatch/contributor_managed_insight_rule.py +20 -0
  22. pulumi_aws/cloudwatch/log_delivery_destination.py +53 -12
  23. pulumi_aws/cloudwatch/outputs.py +6 -5
  24. pulumi_aws/codebuild/webhook.py +16 -16
  25. pulumi_aws/codepipeline/webhook.py +16 -16
  26. pulumi_aws/connect/_inputs.py +50 -4
  27. pulumi_aws/connect/outputs.py +95 -4
  28. pulumi_aws/connect/routing_profile.py +42 -18
  29. pulumi_aws/datasync/location_fsx_ontap_file_system.py +34 -0
  30. pulumi_aws/datazone/project.py +24 -0
  31. pulumi_aws/detective/organization_configuration.py +20 -0
  32. pulumi_aws/dms/_inputs.py +3 -3
  33. pulumi_aws/dms/outputs.py +2 -2
  34. pulumi_aws/ec2/__init__.py +1 -0
  35. pulumi_aws/ec2/_inputs.py +182 -0
  36. pulumi_aws/ec2/allowed_images_settings.py +338 -0
  37. pulumi_aws/ec2/get_coip_pools.py +24 -0
  38. pulumi_aws/ec2/image_block_public_access.py +48 -1
  39. pulumi_aws/ec2/outputs.py +167 -0
  40. pulumi_aws/ec2/security_group.py +6 -6
  41. pulumi_aws/ec2/serial_console_access.py +50 -3
  42. pulumi_aws/ec2/vpc_endpoint.py +92 -0
  43. pulumi_aws/ec2clientvpn/authorization_rule.py +7 -7
  44. pulumi_aws/ec2clientvpn/route.py +7 -7
  45. pulumi_aws/ec2transitgateway/instance_connect_endpoint.py +47 -0
  46. pulumi_aws/ecrpublic/get_images.py +24 -0
  47. pulumi_aws/ecs/_inputs.py +172 -33
  48. pulumi_aws/ecs/get_service.py +318 -7
  49. pulumi_aws/ecs/outputs.py +957 -86
  50. pulumi_aws/ecs/service.py +76 -0
  51. pulumi_aws/eks/_inputs.py +195 -5
  52. pulumi_aws/eks/outputs.py +164 -4
  53. pulumi_aws/elasticache/_inputs.py +154 -0
  54. pulumi_aws/elasticache/get_replication_group.py +23 -9
  55. pulumi_aws/elasticache/outputs.py +204 -0
  56. pulumi_aws/elasticache/replication_group.py +115 -0
  57. pulumi_aws/elasticache/reserved_cache_node.py +28 -0
  58. pulumi_aws/finspace/kx_cluster.py +76 -0
  59. pulumi_aws/fis/__init__.py +1 -0
  60. pulumi_aws/fis/target_account_configuration.py +401 -0
  61. pulumi_aws/glue/job.py +7 -7
  62. pulumi_aws/guardduty/malware_protection_plan.py +50 -0
  63. pulumi_aws/guardduty/member_detector_feature.py +42 -0
  64. pulumi_aws/invoicing/__init__.py +11 -0
  65. pulumi_aws/invoicing/_inputs.py +128 -0
  66. pulumi_aws/invoicing/invoice_unit.py +620 -0
  67. pulumi_aws/invoicing/outputs.py +99 -0
  68. pulumi_aws/iot/ca_certificate.py +32 -32
  69. pulumi_aws/iot/get_registration_code.py +8 -8
  70. pulumi_aws/ivschat/logging_configuration.py +28 -0
  71. pulumi_aws/kinesis/get_stream.py +15 -1
  72. pulumi_aws/kinesis/stream.py +47 -0
  73. pulumi_aws/kms/key.py +7 -7
  74. pulumi_aws/licensemanager/license_grant.py +36 -0
  75. pulumi_aws/m2/environment.py +150 -0
  76. pulumi_aws/networkfirewall/tls_inspection_configuration.py +84 -0
  77. pulumi_aws/networkflowmonitor/__init__.py +12 -0
  78. pulumi_aws/networkflowmonitor/_inputs.py +412 -0
  79. pulumi_aws/networkflowmonitor/monitor.py +568 -0
  80. pulumi_aws/networkflowmonitor/outputs.py +302 -0
  81. pulumi_aws/networkflowmonitor/scope.py +443 -0
  82. pulumi_aws/observabilityadmin/__init__.py +11 -0
  83. pulumi_aws/observabilityadmin/_inputs.py +506 -0
  84. pulumi_aws/observabilityadmin/centralization_rule_for_organization.py +637 -0
  85. pulumi_aws/observabilityadmin/outputs.py +415 -0
  86. pulumi_aws/opensearch/_inputs.py +92 -133
  87. pulumi_aws/opensearch/authorize_vpc_endpoint_access.py +4 -4
  88. pulumi_aws/opensearch/domain.py +60 -0
  89. pulumi_aws/opensearch/get_domain.py +16 -9
  90. pulumi_aws/opensearch/outputs.py +131 -2
  91. pulumi_aws/organizations/get_policies.py +2 -2
  92. pulumi_aws/organizations/get_policies_for_target.py +2 -2
  93. pulumi_aws/organizations/get_policy.py +1 -1
  94. pulumi_aws/organizations/organization.py +7 -7
  95. pulumi_aws/organizations/policy.py +35 -7
  96. pulumi_aws/pulumi-plugin.json +1 -1
  97. pulumi_aws/quicksight/analysis.py +108 -0
  98. pulumi_aws/quicksight/dashboard.py +110 -0
  99. pulumi_aws/quicksight/template.py +126 -0
  100. pulumi_aws/redshift/cluster_snapshot.py +28 -0
  101. pulumi_aws/redshift/get_cluster.py +52 -0
  102. pulumi_aws/redshift/snapshot_copy_grant.py +22 -0
  103. pulumi_aws/route53/profiles_association.py +30 -0
  104. pulumi_aws/route53/profiles_resource_association.py +34 -0
  105. pulumi_aws/s3control/multi_region_access_point_policy.py +76 -0
  106. pulumi_aws/s3tables/table.py +76 -1
  107. pulumi_aws/s3tables/table_bucket.py +78 -3
  108. pulumi_aws/sagemaker/_inputs.py +225 -232
  109. pulumi_aws/sagemaker/endpoint_configuration.py +111 -64
  110. pulumi_aws/sagemaker/outputs.py +154 -158
  111. pulumi_aws/ssmincidents/get_response_plan.py +14 -0
  112. pulumi_aws/ssoadmin/account_assignment.py +4 -4
  113. pulumi_aws/transfer/profile.py +20 -0
  114. pulumi_aws/transfer/server.py +7 -0
  115. pulumi_aws/transfer/ssh_key.py +6 -6
  116. pulumi_aws/wafv2/rule_group.py +440 -0
  117. pulumi_aws/workspacesweb/session_logger_association.py +80 -0
  118. pulumi_aws/workspacesweb/trust_store_association.py +24 -0
  119. {pulumi_aws-7.11.1.dist-info → pulumi_aws-7.12.0.dist-info}/METADATA +1 -1
  120. {pulumi_aws-7.11.1.dist-info → pulumi_aws-7.12.0.dist-info}/RECORD +122 -107
  121. {pulumi_aws-7.11.1.dist-info → pulumi_aws-7.12.0.dist-info}/WHEEL +0 -0
  122. {pulumi_aws-7.11.1.dist-info → pulumi_aws-7.12.0.dist-info}/top_level.txt +0 -0
@@ -7285,11 +7285,11 @@ if not MYPY:
7285
7285
  class EndpointConfigurationAsyncInferenceConfigArgsDict(TypedDict):
7286
7286
  output_config: pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigArgsDict']
7287
7287
  """
7288
- Specifies the configuration for asynchronous inference invocation outputs.
7288
+ Configuration for asynchronous inference invocation outputs.
7289
7289
  """
7290
7290
  client_config: NotRequired[pulumi.Input['EndpointConfigurationAsyncInferenceConfigClientConfigArgsDict']]
7291
7291
  """
7292
- Configures the behavior of the client used by Amazon SageMaker AI to interact with the model container during asynchronous inference.
7292
+ Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
7293
7293
  """
7294
7294
  elif False:
7295
7295
  EndpointConfigurationAsyncInferenceConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -7300,8 +7300,8 @@ class EndpointConfigurationAsyncInferenceConfigArgs:
7300
7300
  output_config: pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigArgs'],
7301
7301
  client_config: Optional[pulumi.Input['EndpointConfigurationAsyncInferenceConfigClientConfigArgs']] = None):
7302
7302
  """
7303
- :param pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigArgs'] output_config: Specifies the configuration for asynchronous inference invocation outputs.
7304
- :param pulumi.Input['EndpointConfigurationAsyncInferenceConfigClientConfigArgs'] client_config: Configures the behavior of the client used by Amazon SageMaker AI to interact with the model container during asynchronous inference.
7303
+ :param pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigArgs'] output_config: Configuration for asynchronous inference invocation outputs.
7304
+ :param pulumi.Input['EndpointConfigurationAsyncInferenceConfigClientConfigArgs'] client_config: Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
7305
7305
  """
7306
7306
  pulumi.set(__self__, "output_config", output_config)
7307
7307
  if client_config is not None:
@@ -7311,7 +7311,7 @@ class EndpointConfigurationAsyncInferenceConfigArgs:
7311
7311
  @pulumi.getter(name="outputConfig")
7312
7312
  def output_config(self) -> pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigArgs']:
7313
7313
  """
7314
- Specifies the configuration for asynchronous inference invocation outputs.
7314
+ Configuration for asynchronous inference invocation outputs.
7315
7315
  """
7316
7316
  return pulumi.get(self, "output_config")
7317
7317
 
@@ -7323,7 +7323,7 @@ class EndpointConfigurationAsyncInferenceConfigArgs:
7323
7323
  @pulumi.getter(name="clientConfig")
7324
7324
  def client_config(self) -> Optional[pulumi.Input['EndpointConfigurationAsyncInferenceConfigClientConfigArgs']]:
7325
7325
  """
7326
- Configures the behavior of the client used by Amazon SageMaker AI to interact with the model container during asynchronous inference.
7326
+ Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
7327
7327
  """
7328
7328
  return pulumi.get(self, "client_config")
7329
7329
 
@@ -7336,7 +7336,7 @@ if not MYPY:
7336
7336
  class EndpointConfigurationAsyncInferenceConfigClientConfigArgsDict(TypedDict):
7337
7337
  max_concurrent_invocations_per_instance: NotRequired[pulumi.Input[_builtins.int]]
7338
7338
  """
7339
- The maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, Amazon SageMaker AI will choose an optimal value for you.
7339
+ Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
7340
7340
  """
7341
7341
  elif False:
7342
7342
  EndpointConfigurationAsyncInferenceConfigClientConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -7346,7 +7346,7 @@ class EndpointConfigurationAsyncInferenceConfigClientConfigArgs:
7346
7346
  def __init__(__self__, *,
7347
7347
  max_concurrent_invocations_per_instance: Optional[pulumi.Input[_builtins.int]] = None):
7348
7348
  """
7349
- :param pulumi.Input[_builtins.int] max_concurrent_invocations_per_instance: The maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, Amazon SageMaker AI will choose an optimal value for you.
7349
+ :param pulumi.Input[_builtins.int] max_concurrent_invocations_per_instance: Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
7350
7350
  """
7351
7351
  if max_concurrent_invocations_per_instance is not None:
7352
7352
  pulumi.set(__self__, "max_concurrent_invocations_per_instance", max_concurrent_invocations_per_instance)
@@ -7355,7 +7355,7 @@ class EndpointConfigurationAsyncInferenceConfigClientConfigArgs:
7355
7355
  @pulumi.getter(name="maxConcurrentInvocationsPerInstance")
7356
7356
  def max_concurrent_invocations_per_instance(self) -> Optional[pulumi.Input[_builtins.int]]:
7357
7357
  """
7358
- The maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, Amazon SageMaker AI will choose an optimal value for you.
7358
+ Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
7359
7359
  """
7360
7360
  return pulumi.get(self, "max_concurrent_invocations_per_instance")
7361
7361
 
@@ -7368,19 +7368,19 @@ if not MYPY:
7368
7368
  class EndpointConfigurationAsyncInferenceConfigOutputConfigArgsDict(TypedDict):
7369
7369
  s3_output_path: pulumi.Input[_builtins.str]
7370
7370
  """
7371
- The Amazon S3 location to upload inference responses to.
7371
+ S3 location to upload inference responses to.
7372
7372
  """
7373
7373
  kms_key_id: NotRequired[pulumi.Input[_builtins.str]]
7374
7374
  """
7375
- The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker AI uses to encrypt the asynchronous inference output in Amazon S3.
7375
+ KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
7376
7376
  """
7377
7377
  notification_config: NotRequired[pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgsDict']]
7378
7378
  """
7379
- Specifies the configuration for notifications of inference results for asynchronous inference.
7379
+ Configuration for notifications of inference results for asynchronous inference.
7380
7380
  """
7381
7381
  s3_failure_path: NotRequired[pulumi.Input[_builtins.str]]
7382
7382
  """
7383
- The Amazon S3 location to upload failure inference responses to.
7383
+ S3 location to upload failure inference responses to.
7384
7384
  """
7385
7385
  elif False:
7386
7386
  EndpointConfigurationAsyncInferenceConfigOutputConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -7393,10 +7393,10 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigArgs:
7393
7393
  notification_config: Optional[pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs']] = None,
7394
7394
  s3_failure_path: Optional[pulumi.Input[_builtins.str]] = None):
7395
7395
  """
7396
- :param pulumi.Input[_builtins.str] s3_output_path: The Amazon S3 location to upload inference responses to.
7397
- :param pulumi.Input[_builtins.str] kms_key_id: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker AI uses to encrypt the asynchronous inference output in Amazon S3.
7398
- :param pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs'] notification_config: Specifies the configuration for notifications of inference results for asynchronous inference.
7399
- :param pulumi.Input[_builtins.str] s3_failure_path: The Amazon S3 location to upload failure inference responses to.
7396
+ :param pulumi.Input[_builtins.str] s3_output_path: S3 location to upload inference responses to.
7397
+ :param pulumi.Input[_builtins.str] kms_key_id: KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
7398
+ :param pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs'] notification_config: Configuration for notifications of inference results for asynchronous inference.
7399
+ :param pulumi.Input[_builtins.str] s3_failure_path: S3 location to upload failure inference responses to.
7400
7400
  """
7401
7401
  pulumi.set(__self__, "s3_output_path", s3_output_path)
7402
7402
  if kms_key_id is not None:
@@ -7410,7 +7410,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigArgs:
7410
7410
  @pulumi.getter(name="s3OutputPath")
7411
7411
  def s3_output_path(self) -> pulumi.Input[_builtins.str]:
7412
7412
  """
7413
- The Amazon S3 location to upload inference responses to.
7413
+ S3 location to upload inference responses to.
7414
7414
  """
7415
7415
  return pulumi.get(self, "s3_output_path")
7416
7416
 
@@ -7422,7 +7422,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigArgs:
7422
7422
  @pulumi.getter(name="kmsKeyId")
7423
7423
  def kms_key_id(self) -> Optional[pulumi.Input[_builtins.str]]:
7424
7424
  """
7425
- The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker AI uses to encrypt the asynchronous inference output in Amazon S3.
7425
+ KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
7426
7426
  """
7427
7427
  return pulumi.get(self, "kms_key_id")
7428
7428
 
@@ -7434,7 +7434,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigArgs:
7434
7434
  @pulumi.getter(name="notificationConfig")
7435
7435
  def notification_config(self) -> Optional[pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs']]:
7436
7436
  """
7437
- Specifies the configuration for notifications of inference results for asynchronous inference.
7437
+ Configuration for notifications of inference results for asynchronous inference.
7438
7438
  """
7439
7439
  return pulumi.get(self, "notification_config")
7440
7440
 
@@ -7446,7 +7446,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigArgs:
7446
7446
  @pulumi.getter(name="s3FailurePath")
7447
7447
  def s3_failure_path(self) -> Optional[pulumi.Input[_builtins.str]]:
7448
7448
  """
7449
- The Amazon S3 location to upload failure inference responses to.
7449
+ S3 location to upload failure inference responses to.
7450
7450
  """
7451
7451
  return pulumi.get(self, "s3_failure_path")
7452
7452
 
@@ -7459,15 +7459,15 @@ if not MYPY:
7459
7459
  class EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgsDict(TypedDict):
7460
7460
  error_topic: NotRequired[pulumi.Input[_builtins.str]]
7461
7461
  """
7462
- Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
7462
+ SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
7463
7463
  """
7464
7464
  include_inference_response_ins: NotRequired[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]
7465
7465
  """
7466
- The Amazon SNS topics where you want the inference response to be included. Valid values are `SUCCESS_NOTIFICATION_TOPIC` and `ERROR_NOTIFICATION_TOPIC`.
7466
+ SNS topics where you want the inference response to be included. Valid values are `SUCCESS_NOTIFICATION_TOPIC` and `ERROR_NOTIFICATION_TOPIC`.
7467
7467
  """
7468
7468
  success_topic: NotRequired[pulumi.Input[_builtins.str]]
7469
7469
  """
7470
- Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
7470
+ SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
7471
7471
  """
7472
7472
  elif False:
7473
7473
  EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -7479,9 +7479,9 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArg
7479
7479
  include_inference_response_ins: Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]] = None,
7480
7480
  success_topic: Optional[pulumi.Input[_builtins.str]] = None):
7481
7481
  """
7482
- :param pulumi.Input[_builtins.str] error_topic: Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
7483
- :param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] include_inference_response_ins: The Amazon SNS topics where you want the inference response to be included. Valid values are `SUCCESS_NOTIFICATION_TOPIC` and `ERROR_NOTIFICATION_TOPIC`.
7484
- :param pulumi.Input[_builtins.str] success_topic: Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
7482
+ :param pulumi.Input[_builtins.str] error_topic: SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
7483
+ :param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] include_inference_response_ins: SNS topics where you want the inference response to be included. Valid values are `SUCCESS_NOTIFICATION_TOPIC` and `ERROR_NOTIFICATION_TOPIC`.
7484
+ :param pulumi.Input[_builtins.str] success_topic: SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
7485
7485
  """
7486
7486
  if error_topic is not None:
7487
7487
  pulumi.set(__self__, "error_topic", error_topic)
@@ -7494,7 +7494,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArg
7494
7494
  @pulumi.getter(name="errorTopic")
7495
7495
  def error_topic(self) -> Optional[pulumi.Input[_builtins.str]]:
7496
7496
  """
7497
- Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
7497
+ SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
7498
7498
  """
7499
7499
  return pulumi.get(self, "error_topic")
7500
7500
 
@@ -7506,7 +7506,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArg
7506
7506
  @pulumi.getter(name="includeInferenceResponseIns")
7507
7507
  def include_inference_response_ins(self) -> Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]:
7508
7508
  """
7509
- The Amazon SNS topics where you want the inference response to be included. Valid values are `SUCCESS_NOTIFICATION_TOPIC` and `ERROR_NOTIFICATION_TOPIC`.
7509
+ SNS topics where you want the inference response to be included. Valid values are `SUCCESS_NOTIFICATION_TOPIC` and `ERROR_NOTIFICATION_TOPIC`.
7510
7510
  """
7511
7511
  return pulumi.get(self, "include_inference_response_ins")
7512
7512
 
@@ -7518,7 +7518,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArg
7518
7518
  @pulumi.getter(name="successTopic")
7519
7519
  def success_topic(self) -> Optional[pulumi.Input[_builtins.str]]:
7520
7520
  """
7521
- Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
7521
+ SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
7522
7522
  """
7523
7523
  return pulumi.get(self, "success_topic")
7524
7524
 
@@ -7531,11 +7531,11 @@ if not MYPY:
7531
7531
  class EndpointConfigurationDataCaptureConfigArgsDict(TypedDict):
7532
7532
  capture_options: pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureOptionArgsDict']]]
7533
7533
  """
7534
- Specifies what data to capture. Fields are documented below.
7534
+ What data to capture. Fields are documented below.
7535
7535
  """
7536
7536
  destination_s3_uri: pulumi.Input[_builtins.str]
7537
7537
  """
7538
- The URL for S3 location where the captured data is stored.
7538
+ URL for S3 location where the captured data is stored.
7539
7539
  """
7540
7540
  initial_sampling_percentage: pulumi.Input[_builtins.int]
7541
7541
  """
@@ -7543,8 +7543,7 @@ if not MYPY:
7543
7543
  """
7544
7544
  capture_content_type_header: NotRequired[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgsDict']]
7545
7545
  """
7546
- The content type headers to capture.
7547
- See `capture_content_type_header` below.
7546
+ Content type headers to capture. See `capture_content_type_header` below.
7548
7547
  """
7549
7548
  enable_capture: NotRequired[pulumi.Input[_builtins.bool]]
7550
7549
  """
@@ -7552,7 +7551,7 @@ if not MYPY:
7552
7551
  """
7553
7552
  kms_key_id: NotRequired[pulumi.Input[_builtins.str]]
7554
7553
  """
7555
- Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker AI uses to encrypt the captured data on Amazon S3.
7554
+ ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
7556
7555
  """
7557
7556
  elif False:
7558
7557
  EndpointConfigurationDataCaptureConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -7567,13 +7566,12 @@ class EndpointConfigurationDataCaptureConfigArgs:
7567
7566
  enable_capture: Optional[pulumi.Input[_builtins.bool]] = None,
7568
7567
  kms_key_id: Optional[pulumi.Input[_builtins.str]] = None):
7569
7568
  """
7570
- :param pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureOptionArgs']]] capture_options: Specifies what data to capture. Fields are documented below.
7571
- :param pulumi.Input[_builtins.str] destination_s3_uri: The URL for S3 location where the captured data is stored.
7569
+ :param pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureOptionArgs']]] capture_options: What data to capture. Fields are documented below.
7570
+ :param pulumi.Input[_builtins.str] destination_s3_uri: URL for S3 location where the captured data is stored.
7572
7571
  :param pulumi.Input[_builtins.int] initial_sampling_percentage: Portion of data to capture. Should be between 0 and 100.
7573
- :param pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs'] capture_content_type_header: The content type headers to capture.
7574
- See `capture_content_type_header` below.
7572
+ :param pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs'] capture_content_type_header: Content type headers to capture. See `capture_content_type_header` below.
7575
7573
  :param pulumi.Input[_builtins.bool] enable_capture: Flag to enable data capture. Defaults to `false`.
7576
- :param pulumi.Input[_builtins.str] kms_key_id: Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker AI uses to encrypt the captured data on Amazon S3.
7574
+ :param pulumi.Input[_builtins.str] kms_key_id: ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
7577
7575
  """
7578
7576
  pulumi.set(__self__, "capture_options", capture_options)
7579
7577
  pulumi.set(__self__, "destination_s3_uri", destination_s3_uri)
@@ -7589,7 +7587,7 @@ class EndpointConfigurationDataCaptureConfigArgs:
7589
7587
  @pulumi.getter(name="captureOptions")
7590
7588
  def capture_options(self) -> pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureOptionArgs']]]:
7591
7589
  """
7592
- Specifies what data to capture. Fields are documented below.
7590
+ What data to capture. Fields are documented below.
7593
7591
  """
7594
7592
  return pulumi.get(self, "capture_options")
7595
7593
 
@@ -7601,7 +7599,7 @@ class EndpointConfigurationDataCaptureConfigArgs:
7601
7599
  @pulumi.getter(name="destinationS3Uri")
7602
7600
  def destination_s3_uri(self) -> pulumi.Input[_builtins.str]:
7603
7601
  """
7604
- The URL for S3 location where the captured data is stored.
7602
+ URL for S3 location where the captured data is stored.
7605
7603
  """
7606
7604
  return pulumi.get(self, "destination_s3_uri")
7607
7605
 
@@ -7625,8 +7623,7 @@ class EndpointConfigurationDataCaptureConfigArgs:
7625
7623
  @pulumi.getter(name="captureContentTypeHeader")
7626
7624
  def capture_content_type_header(self) -> Optional[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs']]:
7627
7625
  """
7628
- The content type headers to capture.
7629
- See `capture_content_type_header` below.
7626
+ Content type headers to capture. See `capture_content_type_header` below.
7630
7627
  """
7631
7628
  return pulumi.get(self, "capture_content_type_header")
7632
7629
 
@@ -7650,7 +7647,7 @@ class EndpointConfigurationDataCaptureConfigArgs:
7650
7647
  @pulumi.getter(name="kmsKeyId")
7651
7648
  def kms_key_id(self) -> Optional[pulumi.Input[_builtins.str]]:
7652
7649
  """
7653
- Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker AI uses to encrypt the captured data on Amazon S3.
7650
+ ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
7654
7651
  """
7655
7652
  return pulumi.get(self, "kms_key_id")
7656
7653
 
@@ -7663,13 +7660,11 @@ if not MYPY:
7663
7660
  class EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgsDict(TypedDict):
7664
7661
  csv_content_types: NotRequired[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]
7665
7662
  """
7666
- The CSV content type headers to capture.
7667
- One of `csv_content_types` or `json_content_types` is required.
7663
+ CSV content type headers to capture. One of `csv_content_types` or `json_content_types` is required.
7668
7664
  """
7669
7665
  json_content_types: NotRequired[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]
7670
7666
  """
7671
- The JSON content type headers to capture.
7672
- One of `json_content_types` or `csv_content_types` is required.
7667
+ The JSON content type headers to capture. One of `json_content_types` or `csv_content_types` is required.
7673
7668
  """
7674
7669
  elif False:
7675
7670
  EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgsDict: TypeAlias = Mapping[str, Any]
@@ -7680,10 +7675,8 @@ class EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs:
7680
7675
  csv_content_types: Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]] = None,
7681
7676
  json_content_types: Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]] = None):
7682
7677
  """
7683
- :param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] csv_content_types: The CSV content type headers to capture.
7684
- One of `csv_content_types` or `json_content_types` is required.
7685
- :param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] json_content_types: The JSON content type headers to capture.
7686
- One of `json_content_types` or `csv_content_types` is required.
7678
+ :param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] csv_content_types: CSV content type headers to capture. One of `csv_content_types` or `json_content_types` is required.
7679
+ :param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] json_content_types: The JSON content type headers to capture. One of `json_content_types` or `csv_content_types` is required.
7687
7680
  """
7688
7681
  if csv_content_types is not None:
7689
7682
  pulumi.set(__self__, "csv_content_types", csv_content_types)
@@ -7694,8 +7687,7 @@ class EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs:
7694
7687
  @pulumi.getter(name="csvContentTypes")
7695
7688
  def csv_content_types(self) -> Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]:
7696
7689
  """
7697
- The CSV content type headers to capture.
7698
- One of `csv_content_types` or `json_content_types` is required.
7690
+ CSV content type headers to capture. One of `csv_content_types` or `json_content_types` is required.
7699
7691
  """
7700
7692
  return pulumi.get(self, "csv_content_types")
7701
7693
 
@@ -7707,8 +7699,7 @@ class EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs:
7707
7699
  @pulumi.getter(name="jsonContentTypes")
7708
7700
  def json_content_types(self) -> Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]:
7709
7701
  """
7710
- The JSON content type headers to capture.
7711
- One of `json_content_types` or `csv_content_types` is required.
7702
+ The JSON content type headers to capture. One of `json_content_types` or `csv_content_types` is required.
7712
7703
  """
7713
7704
  return pulumi.get(self, "json_content_types")
7714
7705
 
@@ -7721,7 +7712,7 @@ if not MYPY:
7721
7712
  class EndpointConfigurationDataCaptureConfigCaptureOptionArgsDict(TypedDict):
7722
7713
  capture_mode: pulumi.Input[_builtins.str]
7723
7714
  """
7724
- Specifies the data to be captured. Should be one of `Input`, `Output` or `InputAndOutput`.
7715
+ Data to be captured. Should be one of `Input`, `Output` or `InputAndOutput`.
7725
7716
  """
7726
7717
  elif False:
7727
7718
  EndpointConfigurationDataCaptureConfigCaptureOptionArgsDict: TypeAlias = Mapping[str, Any]
@@ -7731,7 +7722,7 @@ class EndpointConfigurationDataCaptureConfigCaptureOptionArgs:
7731
7722
  def __init__(__self__, *,
7732
7723
  capture_mode: pulumi.Input[_builtins.str]):
7733
7724
  """
7734
- :param pulumi.Input[_builtins.str] capture_mode: Specifies the data to be captured. Should be one of `Input`, `Output` or `InputAndOutput`.
7725
+ :param pulumi.Input[_builtins.str] capture_mode: Data to be captured. Should be one of `Input`, `Output` or `InputAndOutput`.
7735
7726
  """
7736
7727
  pulumi.set(__self__, "capture_mode", capture_mode)
7737
7728
 
@@ -7739,7 +7730,7 @@ class EndpointConfigurationDataCaptureConfigCaptureOptionArgs:
7739
7730
  @pulumi.getter(name="captureMode")
7740
7731
  def capture_mode(self) -> pulumi.Input[_builtins.str]:
7741
7732
  """
7742
- Specifies the data to be captured. Should be one of `Input`, `Output` or `InputAndOutput`.
7733
+ Data to be captured. Should be one of `Input`, `Output` or `InputAndOutput`.
7743
7734
  """
7744
7735
  return pulumi.get(self, "capture_mode")
7745
7736
 
@@ -7750,29 +7741,25 @@ class EndpointConfigurationDataCaptureConfigCaptureOptionArgs:
7750
7741
 
7751
7742
  if not MYPY:
7752
7743
  class EndpointConfigurationProductionVariantArgsDict(TypedDict):
7753
- model_name: pulumi.Input[_builtins.str]
7754
- """
7755
- The name of the model to use.
7756
- """
7757
7744
  accelerator_type: NotRequired[pulumi.Input[_builtins.str]]
7758
7745
  """
7759
- The size of the Elastic Inference (EI) instance to use for the production variant.
7746
+ Size of the Elastic Inference (EI) instance to use for the production variant.
7760
7747
  """
7761
7748
  container_startup_health_check_timeout_in_seconds: NotRequired[pulumi.Input[_builtins.int]]
7762
7749
  """
7763
- The timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
7750
+ Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
7764
7751
  """
7765
7752
  core_dump_config: NotRequired[pulumi.Input['EndpointConfigurationProductionVariantCoreDumpConfigArgsDict']]
7766
7753
  """
7767
- Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
7754
+ Core dump configuration from the model container when the process crashes. Fields are documented below.
7768
7755
  """
7769
7756
  enable_ssm_access: NotRequired[pulumi.Input[_builtins.bool]]
7770
7757
  """
7771
- You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
7758
+ Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
7772
7759
  """
7773
7760
  inference_ami_version: NotRequired[pulumi.Input[_builtins.str]]
7774
7761
  """
7775
- Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
7762
+ Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
7776
7763
  """
7777
7764
  initial_instance_count: NotRequired[pulumi.Input[_builtins.int]]
7778
7765
  """
@@ -7780,35 +7767,39 @@ if not MYPY:
7780
7767
  """
7781
7768
  initial_variant_weight: NotRequired[pulumi.Input[_builtins.float]]
7782
7769
  """
7783
- Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to `1.0`.
7770
+ Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
7784
7771
  """
7785
7772
  instance_type: NotRequired[pulumi.Input[_builtins.str]]
7786
7773
  """
7787
- The type of instance to start.
7774
+ Type of instance to start.
7788
7775
  """
7789
7776
  managed_instance_scaling: NotRequired[pulumi.Input['EndpointConfigurationProductionVariantManagedInstanceScalingArgsDict']]
7790
7777
  """
7791
- Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
7778
+ Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
7792
7779
  """
7793
7780
  model_data_download_timeout_in_seconds: NotRequired[pulumi.Input[_builtins.int]]
7794
7781
  """
7795
- The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
7782
+ Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
7783
+ """
7784
+ model_name: NotRequired[pulumi.Input[_builtins.str]]
7785
+ """
7786
+ Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
7796
7787
  """
7797
7788
  routing_configs: NotRequired[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationProductionVariantRoutingConfigArgsDict']]]]
7798
7789
  """
7799
- Sets how the endpoint routes incoming traffic. See routing_config below.
7790
+ How the endpoint routes incoming traffic. See routing_config below.
7800
7791
  """
7801
7792
  serverless_config: NotRequired[pulumi.Input['EndpointConfigurationProductionVariantServerlessConfigArgsDict']]
7802
7793
  """
7803
- Specifies configuration for how an endpoint performs asynchronous inference.
7794
+ How an endpoint performs asynchronous inference.
7804
7795
  """
7805
7796
  variant_name: NotRequired[pulumi.Input[_builtins.str]]
7806
7797
  """
7807
- The name of the variant. If omitted, this provider will assign a random, unique name.
7798
+ Name of the variant. If omitted, the provider will assign a random, unique name.
7808
7799
  """
7809
7800
  volume_size_in_gb: NotRequired[pulumi.Input[_builtins.int]]
7810
7801
  """
7811
- The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
7802
+ Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
7812
7803
  """
7813
7804
  elif False:
7814
7805
  EndpointConfigurationProductionVariantArgsDict: TypeAlias = Mapping[str, Any]
@@ -7816,7 +7807,6 @@ elif False:
7816
7807
  @pulumi.input_type
7817
7808
  class EndpointConfigurationProductionVariantArgs:
7818
7809
  def __init__(__self__, *,
7819
- model_name: pulumi.Input[_builtins.str],
7820
7810
  accelerator_type: Optional[pulumi.Input[_builtins.str]] = None,
7821
7811
  container_startup_health_check_timeout_in_seconds: Optional[pulumi.Input[_builtins.int]] = None,
7822
7812
  core_dump_config: Optional[pulumi.Input['EndpointConfigurationProductionVariantCoreDumpConfigArgs']] = None,
@@ -7827,28 +7817,28 @@ class EndpointConfigurationProductionVariantArgs:
7827
7817
  instance_type: Optional[pulumi.Input[_builtins.str]] = None,
7828
7818
  managed_instance_scaling: Optional[pulumi.Input['EndpointConfigurationProductionVariantManagedInstanceScalingArgs']] = None,
7829
7819
  model_data_download_timeout_in_seconds: Optional[pulumi.Input[_builtins.int]] = None,
7820
+ model_name: Optional[pulumi.Input[_builtins.str]] = None,
7830
7821
  routing_configs: Optional[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationProductionVariantRoutingConfigArgs']]]] = None,
7831
7822
  serverless_config: Optional[pulumi.Input['EndpointConfigurationProductionVariantServerlessConfigArgs']] = None,
7832
7823
  variant_name: Optional[pulumi.Input[_builtins.str]] = None,
7833
7824
  volume_size_in_gb: Optional[pulumi.Input[_builtins.int]] = None):
7834
7825
  """
7835
- :param pulumi.Input[_builtins.str] model_name: The name of the model to use.
7836
- :param pulumi.Input[_builtins.str] accelerator_type: The size of the Elastic Inference (EI) instance to use for the production variant.
7837
- :param pulumi.Input[_builtins.int] container_startup_health_check_timeout_in_seconds: The timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
7838
- :param pulumi.Input['EndpointConfigurationProductionVariantCoreDumpConfigArgs'] core_dump_config: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
7839
- :param pulumi.Input[_builtins.bool] enable_ssm_access: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
7840
- :param pulumi.Input[_builtins.str] inference_ami_version: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
7826
+ :param pulumi.Input[_builtins.str] accelerator_type: Size of the Elastic Inference (EI) instance to use for the production variant.
7827
+ :param pulumi.Input[_builtins.int] container_startup_health_check_timeout_in_seconds: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
7828
+ :param pulumi.Input['EndpointConfigurationProductionVariantCoreDumpConfigArgs'] core_dump_config: Core dump configuration from the model container when the process crashes. Fields are documented below.
7829
+ :param pulumi.Input[_builtins.bool] enable_ssm_access: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
7830
+ :param pulumi.Input[_builtins.str] inference_ami_version: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
7841
7831
  :param pulumi.Input[_builtins.int] initial_instance_count: Initial number of instances used for auto-scaling.
7842
- :param pulumi.Input[_builtins.float] initial_variant_weight: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to `1.0`.
7843
- :param pulumi.Input[_builtins.str] instance_type: The type of instance to start.
7844
- :param pulumi.Input['EndpointConfigurationProductionVariantManagedInstanceScalingArgs'] managed_instance_scaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
7845
- :param pulumi.Input[_builtins.int] model_data_download_timeout_in_seconds: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
7846
- :param pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationProductionVariantRoutingConfigArgs']]] routing_configs: Sets how the endpoint routes incoming traffic. See routing_config below.
7847
- :param pulumi.Input['EndpointConfigurationProductionVariantServerlessConfigArgs'] serverless_config: Specifies configuration for how an endpoint performs asynchronous inference.
7848
- :param pulumi.Input[_builtins.str] variant_name: The name of the variant. If omitted, this provider will assign a random, unique name.
7849
- :param pulumi.Input[_builtins.int] volume_size_in_gb: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
7850
- """
7851
- pulumi.set(__self__, "model_name", model_name)
7832
+ :param pulumi.Input[_builtins.float] initial_variant_weight: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
7833
+ :param pulumi.Input[_builtins.str] instance_type: Type of instance to start.
7834
+ :param pulumi.Input['EndpointConfigurationProductionVariantManagedInstanceScalingArgs'] managed_instance_scaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
7835
+ :param pulumi.Input[_builtins.int] model_data_download_timeout_in_seconds: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
7836
+ :param pulumi.Input[_builtins.str] model_name: Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
7837
+ :param pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationProductionVariantRoutingConfigArgs']]] routing_configs: How the endpoint routes incoming traffic. See routing_config below.
7838
+ :param pulumi.Input['EndpointConfigurationProductionVariantServerlessConfigArgs'] serverless_config: How an endpoint performs asynchronous inference.
7839
+ :param pulumi.Input[_builtins.str] variant_name: Name of the variant. If omitted, the provider will assign a random, unique name.
7840
+ :param pulumi.Input[_builtins.int] volume_size_in_gb: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
7841
+ """
7852
7842
  if accelerator_type is not None:
7853
7843
  pulumi.set(__self__, "accelerator_type", accelerator_type)
7854
7844
  if container_startup_health_check_timeout_in_seconds is not None:
@@ -7869,6 +7859,8 @@ class EndpointConfigurationProductionVariantArgs:
7869
7859
  pulumi.set(__self__, "managed_instance_scaling", managed_instance_scaling)
7870
7860
  if model_data_download_timeout_in_seconds is not None:
7871
7861
  pulumi.set(__self__, "model_data_download_timeout_in_seconds", model_data_download_timeout_in_seconds)
7862
+ if model_name is not None:
7863
+ pulumi.set(__self__, "model_name", model_name)
7872
7864
  if routing_configs is not None:
7873
7865
  pulumi.set(__self__, "routing_configs", routing_configs)
7874
7866
  if serverless_config is not None:
@@ -7878,23 +7870,11 @@ class EndpointConfigurationProductionVariantArgs:
7878
7870
  if volume_size_in_gb is not None:
7879
7871
  pulumi.set(__self__, "volume_size_in_gb", volume_size_in_gb)
7880
7872
 
7881
- @_builtins.property
7882
- @pulumi.getter(name="modelName")
7883
- def model_name(self) -> pulumi.Input[_builtins.str]:
7884
- """
7885
- The name of the model to use.
7886
- """
7887
- return pulumi.get(self, "model_name")
7888
-
7889
- @model_name.setter
7890
- def model_name(self, value: pulumi.Input[_builtins.str]):
7891
- pulumi.set(self, "model_name", value)
7892
-
7893
7873
  @_builtins.property
7894
7874
  @pulumi.getter(name="acceleratorType")
7895
7875
  def accelerator_type(self) -> Optional[pulumi.Input[_builtins.str]]:
7896
7876
  """
7897
- The size of the Elastic Inference (EI) instance to use for the production variant.
7877
+ Size of the Elastic Inference (EI) instance to use for the production variant.
7898
7878
  """
7899
7879
  return pulumi.get(self, "accelerator_type")
7900
7880
 
@@ -7906,7 +7886,7 @@ class EndpointConfigurationProductionVariantArgs:
7906
7886
  @pulumi.getter(name="containerStartupHealthCheckTimeoutInSeconds")
7907
7887
  def container_startup_health_check_timeout_in_seconds(self) -> Optional[pulumi.Input[_builtins.int]]:
7908
7888
  """
7909
- The timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
7889
+ Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
7910
7890
  """
7911
7891
  return pulumi.get(self, "container_startup_health_check_timeout_in_seconds")
7912
7892
 
@@ -7918,7 +7898,7 @@ class EndpointConfigurationProductionVariantArgs:
7918
7898
  @pulumi.getter(name="coreDumpConfig")
7919
7899
  def core_dump_config(self) -> Optional[pulumi.Input['EndpointConfigurationProductionVariantCoreDumpConfigArgs']]:
7920
7900
  """
7921
- Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
7901
+ Core dump configuration from the model container when the process crashes. Fields are documented below.
7922
7902
  """
7923
7903
  return pulumi.get(self, "core_dump_config")
7924
7904
 
@@ -7930,7 +7910,7 @@ class EndpointConfigurationProductionVariantArgs:
7930
7910
  @pulumi.getter(name="enableSsmAccess")
7931
7911
  def enable_ssm_access(self) -> Optional[pulumi.Input[_builtins.bool]]:
7932
7912
  """
7933
- You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
7913
+ Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
7934
7914
  """
7935
7915
  return pulumi.get(self, "enable_ssm_access")
7936
7916
 
@@ -7942,7 +7922,7 @@ class EndpointConfigurationProductionVariantArgs:
7942
7922
  @pulumi.getter(name="inferenceAmiVersion")
7943
7923
  def inference_ami_version(self) -> Optional[pulumi.Input[_builtins.str]]:
7944
7924
  """
7945
- Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
7925
+ Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
7946
7926
  """
7947
7927
  return pulumi.get(self, "inference_ami_version")
7948
7928
 
@@ -7966,7 +7946,7 @@ class EndpointConfigurationProductionVariantArgs:
7966
7946
  @pulumi.getter(name="initialVariantWeight")
7967
7947
  def initial_variant_weight(self) -> Optional[pulumi.Input[_builtins.float]]:
7968
7948
  """
7969
- Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to `1.0`.
7949
+ Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
7970
7950
  """
7971
7951
  return pulumi.get(self, "initial_variant_weight")
7972
7952
 
@@ -7978,7 +7958,7 @@ class EndpointConfigurationProductionVariantArgs:
7978
7958
  @pulumi.getter(name="instanceType")
7979
7959
  def instance_type(self) -> Optional[pulumi.Input[_builtins.str]]:
7980
7960
  """
7981
- The type of instance to start.
7961
+ Type of instance to start.
7982
7962
  """
7983
7963
  return pulumi.get(self, "instance_type")
7984
7964
 
@@ -7990,7 +7970,7 @@ class EndpointConfigurationProductionVariantArgs:
7990
7970
  @pulumi.getter(name="managedInstanceScaling")
7991
7971
  def managed_instance_scaling(self) -> Optional[pulumi.Input['EndpointConfigurationProductionVariantManagedInstanceScalingArgs']]:
7992
7972
  """
7993
- Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
7973
+ Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
7994
7974
  """
7995
7975
  return pulumi.get(self, "managed_instance_scaling")
7996
7976
 
@@ -8002,7 +7982,7 @@ class EndpointConfigurationProductionVariantArgs:
8002
7982
  @pulumi.getter(name="modelDataDownloadTimeoutInSeconds")
8003
7983
  def model_data_download_timeout_in_seconds(self) -> Optional[pulumi.Input[_builtins.int]]:
8004
7984
  """
8005
- The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
7985
+ Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
8006
7986
  """
8007
7987
  return pulumi.get(self, "model_data_download_timeout_in_seconds")
8008
7988
 
@@ -8010,11 +7990,23 @@ class EndpointConfigurationProductionVariantArgs:
8010
7990
  def model_data_download_timeout_in_seconds(self, value: Optional[pulumi.Input[_builtins.int]]):
8011
7991
  pulumi.set(self, "model_data_download_timeout_in_seconds", value)
8012
7992
 
7993
+ @_builtins.property
7994
+ @pulumi.getter(name="modelName")
7995
+ def model_name(self) -> Optional[pulumi.Input[_builtins.str]]:
7996
+ """
7997
+ Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
7998
+ """
7999
+ return pulumi.get(self, "model_name")
8000
+
8001
+ @model_name.setter
8002
+ def model_name(self, value: Optional[pulumi.Input[_builtins.str]]):
8003
+ pulumi.set(self, "model_name", value)
8004
+
8013
8005
  @_builtins.property
8014
8006
  @pulumi.getter(name="routingConfigs")
8015
8007
  def routing_configs(self) -> Optional[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationProductionVariantRoutingConfigArgs']]]]:
8016
8008
  """
8017
- Sets how the endpoint routes incoming traffic. See routing_config below.
8009
+ How the endpoint routes incoming traffic. See routing_config below.
8018
8010
  """
8019
8011
  return pulumi.get(self, "routing_configs")
8020
8012
 
@@ -8026,7 +8018,7 @@ class EndpointConfigurationProductionVariantArgs:
8026
8018
  @pulumi.getter(name="serverlessConfig")
8027
8019
  def serverless_config(self) -> Optional[pulumi.Input['EndpointConfigurationProductionVariantServerlessConfigArgs']]:
8028
8020
  """
8029
- Specifies configuration for how an endpoint performs asynchronous inference.
8021
+ How an endpoint performs asynchronous inference.
8030
8022
  """
8031
8023
  return pulumi.get(self, "serverless_config")
8032
8024
 
@@ -8038,7 +8030,7 @@ class EndpointConfigurationProductionVariantArgs:
8038
8030
  @pulumi.getter(name="variantName")
8039
8031
  def variant_name(self) -> Optional[pulumi.Input[_builtins.str]]:
8040
8032
  """
8041
- The name of the variant. If omitted, this provider will assign a random, unique name.
8033
+ Name of the variant. If omitted, the provider will assign a random, unique name.
8042
8034
  """
8043
8035
  return pulumi.get(self, "variant_name")
8044
8036
 
@@ -8050,7 +8042,7 @@ class EndpointConfigurationProductionVariantArgs:
8050
8042
  @pulumi.getter(name="volumeSizeInGb")
8051
8043
  def volume_size_in_gb(self) -> Optional[pulumi.Input[_builtins.int]]:
8052
8044
  """
8053
- The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
8045
+ Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
8054
8046
  """
8055
8047
  return pulumi.get(self, "volume_size_in_gb")
8056
8048
 
@@ -8063,11 +8055,11 @@ if not MYPY:
8063
8055
  class EndpointConfigurationProductionVariantCoreDumpConfigArgsDict(TypedDict):
8064
8056
  destination_s3_uri: pulumi.Input[_builtins.str]
8065
8057
  """
8066
- The Amazon S3 bucket to send the core dump to.
8058
+ S3 bucket to send the core dump to.
8067
8059
  """
8068
8060
  kms_key_id: NotRequired[pulumi.Input[_builtins.str]]
8069
8061
  """
8070
- The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker AI uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
8062
+ KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
8071
8063
  """
8072
8064
  elif False:
8073
8065
  EndpointConfigurationProductionVariantCoreDumpConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -8078,8 +8070,8 @@ class EndpointConfigurationProductionVariantCoreDumpConfigArgs:
8078
8070
  destination_s3_uri: pulumi.Input[_builtins.str],
8079
8071
  kms_key_id: Optional[pulumi.Input[_builtins.str]] = None):
8080
8072
  """
8081
- :param pulumi.Input[_builtins.str] destination_s3_uri: The Amazon S3 bucket to send the core dump to.
8082
- :param pulumi.Input[_builtins.str] kms_key_id: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker AI uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
8073
+ :param pulumi.Input[_builtins.str] destination_s3_uri: S3 bucket to send the core dump to.
8074
+ :param pulumi.Input[_builtins.str] kms_key_id: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
8083
8075
  """
8084
8076
  pulumi.set(__self__, "destination_s3_uri", destination_s3_uri)
8085
8077
  if kms_key_id is not None:
@@ -8089,7 +8081,7 @@ class EndpointConfigurationProductionVariantCoreDumpConfigArgs:
8089
8081
  @pulumi.getter(name="destinationS3Uri")
8090
8082
  def destination_s3_uri(self) -> pulumi.Input[_builtins.str]:
8091
8083
  """
8092
- The Amazon S3 bucket to send the core dump to.
8084
+ S3 bucket to send the core dump to.
8093
8085
  """
8094
8086
  return pulumi.get(self, "destination_s3_uri")
8095
8087
 
@@ -8101,7 +8093,7 @@ class EndpointConfigurationProductionVariantCoreDumpConfigArgs:
8101
8093
  @pulumi.getter(name="kmsKeyId")
8102
8094
  def kms_key_id(self) -> Optional[pulumi.Input[_builtins.str]]:
8103
8095
  """
8104
- The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker AI uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
8096
+ KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
8105
8097
  """
8106
8098
  return pulumi.get(self, "kms_key_id")
8107
8099
 
@@ -8114,15 +8106,15 @@ if not MYPY:
8114
8106
  class EndpointConfigurationProductionVariantManagedInstanceScalingArgsDict(TypedDict):
8115
8107
  max_instance_count: NotRequired[pulumi.Input[_builtins.int]]
8116
8108
  """
8117
- The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8109
+ Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8118
8110
  """
8119
8111
  min_instance_count: NotRequired[pulumi.Input[_builtins.int]]
8120
8112
  """
8121
- The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8113
+ Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8122
8114
  """
8123
8115
  status: NotRequired[pulumi.Input[_builtins.str]]
8124
8116
  """
8125
- Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8117
+ Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8126
8118
  """
8127
8119
  elif False:
8128
8120
  EndpointConfigurationProductionVariantManagedInstanceScalingArgsDict: TypeAlias = Mapping[str, Any]
@@ -8134,9 +8126,9 @@ class EndpointConfigurationProductionVariantManagedInstanceScalingArgs:
8134
8126
  min_instance_count: Optional[pulumi.Input[_builtins.int]] = None,
8135
8127
  status: Optional[pulumi.Input[_builtins.str]] = None):
8136
8128
  """
8137
- :param pulumi.Input[_builtins.int] max_instance_count: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8138
- :param pulumi.Input[_builtins.int] min_instance_count: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8139
- :param pulumi.Input[_builtins.str] status: Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8129
+ :param pulumi.Input[_builtins.int] max_instance_count: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8130
+ :param pulumi.Input[_builtins.int] min_instance_count: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8131
+ :param pulumi.Input[_builtins.str] status: Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8140
8132
  """
8141
8133
  if max_instance_count is not None:
8142
8134
  pulumi.set(__self__, "max_instance_count", max_instance_count)
@@ -8149,7 +8141,7 @@ class EndpointConfigurationProductionVariantManagedInstanceScalingArgs:
8149
8141
  @pulumi.getter(name="maxInstanceCount")
8150
8142
  def max_instance_count(self) -> Optional[pulumi.Input[_builtins.int]]:
8151
8143
  """
8152
- The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8144
+ Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8153
8145
  """
8154
8146
  return pulumi.get(self, "max_instance_count")
8155
8147
 
@@ -8161,7 +8153,7 @@ class EndpointConfigurationProductionVariantManagedInstanceScalingArgs:
8161
8153
  @pulumi.getter(name="minInstanceCount")
8162
8154
  def min_instance_count(self) -> Optional[pulumi.Input[_builtins.int]]:
8163
8155
  """
8164
- The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8156
+ Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8165
8157
  """
8166
8158
  return pulumi.get(self, "min_instance_count")
8167
8159
 
@@ -8173,7 +8165,7 @@ class EndpointConfigurationProductionVariantManagedInstanceScalingArgs:
8173
8165
  @pulumi.getter
8174
8166
  def status(self) -> Optional[pulumi.Input[_builtins.str]]:
8175
8167
  """
8176
- Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8168
+ Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8177
8169
  """
8178
8170
  return pulumi.get(self, "status")
8179
8171
 
@@ -8186,7 +8178,7 @@ if not MYPY:
8186
8178
  class EndpointConfigurationProductionVariantRoutingConfigArgsDict(TypedDict):
8187
8179
  routing_strategy: pulumi.Input[_builtins.str]
8188
8180
  """
8189
- Sets how the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8181
+ How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8190
8182
  """
8191
8183
  elif False:
8192
8184
  EndpointConfigurationProductionVariantRoutingConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -8196,7 +8188,7 @@ class EndpointConfigurationProductionVariantRoutingConfigArgs:
8196
8188
  def __init__(__self__, *,
8197
8189
  routing_strategy: pulumi.Input[_builtins.str]):
8198
8190
  """
8199
- :param pulumi.Input[_builtins.str] routing_strategy: Sets how the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8191
+ :param pulumi.Input[_builtins.str] routing_strategy: How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8200
8192
  """
8201
8193
  pulumi.set(__self__, "routing_strategy", routing_strategy)
8202
8194
 
@@ -8204,7 +8196,7 @@ class EndpointConfigurationProductionVariantRoutingConfigArgs:
8204
8196
  @pulumi.getter(name="routingStrategy")
8205
8197
  def routing_strategy(self) -> pulumi.Input[_builtins.str]:
8206
8198
  """
8207
- Sets how the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8199
+ How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8208
8200
  """
8209
8201
  return pulumi.get(self, "routing_strategy")
8210
8202
 
@@ -8217,15 +8209,15 @@ if not MYPY:
8217
8209
  class EndpointConfigurationProductionVariantServerlessConfigArgsDict(TypedDict):
8218
8210
  max_concurrency: pulumi.Input[_builtins.int]
8219
8211
  """
8220
- The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8212
+ Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8221
8213
  """
8222
8214
  memory_size_in_mb: pulumi.Input[_builtins.int]
8223
8215
  """
8224
- The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8216
+ Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8225
8217
  """
8226
8218
  provisioned_concurrency: NotRequired[pulumi.Input[_builtins.int]]
8227
8219
  """
8228
- The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8220
+ Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8229
8221
  """
8230
8222
  elif False:
8231
8223
  EndpointConfigurationProductionVariantServerlessConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -8237,9 +8229,9 @@ class EndpointConfigurationProductionVariantServerlessConfigArgs:
8237
8229
  memory_size_in_mb: pulumi.Input[_builtins.int],
8238
8230
  provisioned_concurrency: Optional[pulumi.Input[_builtins.int]] = None):
8239
8231
  """
8240
- :param pulumi.Input[_builtins.int] max_concurrency: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8241
- :param pulumi.Input[_builtins.int] memory_size_in_mb: The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8242
- :param pulumi.Input[_builtins.int] provisioned_concurrency: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8232
+ :param pulumi.Input[_builtins.int] max_concurrency: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8233
+ :param pulumi.Input[_builtins.int] memory_size_in_mb: Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8234
+ :param pulumi.Input[_builtins.int] provisioned_concurrency: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8243
8235
  """
8244
8236
  pulumi.set(__self__, "max_concurrency", max_concurrency)
8245
8237
  pulumi.set(__self__, "memory_size_in_mb", memory_size_in_mb)
@@ -8250,7 +8242,7 @@ class EndpointConfigurationProductionVariantServerlessConfigArgs:
8250
8242
  @pulumi.getter(name="maxConcurrency")
8251
8243
  def max_concurrency(self) -> pulumi.Input[_builtins.int]:
8252
8244
  """
8253
- The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8245
+ Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8254
8246
  """
8255
8247
  return pulumi.get(self, "max_concurrency")
8256
8248
 
@@ -8262,7 +8254,7 @@ class EndpointConfigurationProductionVariantServerlessConfigArgs:
8262
8254
  @pulumi.getter(name="memorySizeInMb")
8263
8255
  def memory_size_in_mb(self) -> pulumi.Input[_builtins.int]:
8264
8256
  """
8265
- The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8257
+ Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8266
8258
  """
8267
8259
  return pulumi.get(self, "memory_size_in_mb")
8268
8260
 
@@ -8274,7 +8266,7 @@ class EndpointConfigurationProductionVariantServerlessConfigArgs:
8274
8266
  @pulumi.getter(name="provisionedConcurrency")
8275
8267
  def provisioned_concurrency(self) -> Optional[pulumi.Input[_builtins.int]]:
8276
8268
  """
8277
- The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8269
+ Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8278
8270
  """
8279
8271
  return pulumi.get(self, "provisioned_concurrency")
8280
8272
 
@@ -8285,29 +8277,25 @@ class EndpointConfigurationProductionVariantServerlessConfigArgs:
8285
8277
 
8286
8278
  if not MYPY:
8287
8279
  class EndpointConfigurationShadowProductionVariantArgsDict(TypedDict):
8288
- model_name: pulumi.Input[_builtins.str]
8289
- """
8290
- The name of the model to use.
8291
- """
8292
8280
  accelerator_type: NotRequired[pulumi.Input[_builtins.str]]
8293
8281
  """
8294
- The size of the Elastic Inference (EI) instance to use for the production variant.
8282
+ Size of the Elastic Inference (EI) instance to use for the production variant.
8295
8283
  """
8296
8284
  container_startup_health_check_timeout_in_seconds: NotRequired[pulumi.Input[_builtins.int]]
8297
8285
  """
8298
- The timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
8286
+ Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
8299
8287
  """
8300
8288
  core_dump_config: NotRequired[pulumi.Input['EndpointConfigurationShadowProductionVariantCoreDumpConfigArgsDict']]
8301
8289
  """
8302
- Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
8290
+ Core dump configuration from the model container when the process crashes. Fields are documented below.
8303
8291
  """
8304
8292
  enable_ssm_access: NotRequired[pulumi.Input[_builtins.bool]]
8305
8293
  """
8306
- You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
8294
+ Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
8307
8295
  """
8308
8296
  inference_ami_version: NotRequired[pulumi.Input[_builtins.str]]
8309
8297
  """
8310
- Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
8298
+ Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
8311
8299
  """
8312
8300
  initial_instance_count: NotRequired[pulumi.Input[_builtins.int]]
8313
8301
  """
@@ -8315,35 +8303,39 @@ if not MYPY:
8315
8303
  """
8316
8304
  initial_variant_weight: NotRequired[pulumi.Input[_builtins.float]]
8317
8305
  """
8318
- Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to `1.0`.
8306
+ Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
8319
8307
  """
8320
8308
  instance_type: NotRequired[pulumi.Input[_builtins.str]]
8321
8309
  """
8322
- The type of instance to start.
8310
+ Type of instance to start.
8323
8311
  """
8324
8312
  managed_instance_scaling: NotRequired[pulumi.Input['EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgsDict']]
8325
8313
  """
8326
- Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
8314
+ Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
8327
8315
  """
8328
8316
  model_data_download_timeout_in_seconds: NotRequired[pulumi.Input[_builtins.int]]
8329
8317
  """
8330
- The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
8318
+ Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
8319
+ """
8320
+ model_name: NotRequired[pulumi.Input[_builtins.str]]
8321
+ """
8322
+ Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
8331
8323
  """
8332
8324
  routing_configs: NotRequired[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationShadowProductionVariantRoutingConfigArgsDict']]]]
8333
8325
  """
8334
- Sets how the endpoint routes incoming traffic. See routing_config below.
8326
+ How the endpoint routes incoming traffic. See routing_config below.
8335
8327
  """
8336
8328
  serverless_config: NotRequired[pulumi.Input['EndpointConfigurationShadowProductionVariantServerlessConfigArgsDict']]
8337
8329
  """
8338
- Specifies configuration for how an endpoint performs asynchronous inference.
8330
+ How an endpoint performs asynchronous inference.
8339
8331
  """
8340
8332
  variant_name: NotRequired[pulumi.Input[_builtins.str]]
8341
8333
  """
8342
- The name of the variant. If omitted, this provider will assign a random, unique name.
8334
+ Name of the variant. If omitted, the provider will assign a random, unique name.
8343
8335
  """
8344
8336
  volume_size_in_gb: NotRequired[pulumi.Input[_builtins.int]]
8345
8337
  """
8346
- The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
8338
+ Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
8347
8339
  """
8348
8340
  elif False:
8349
8341
  EndpointConfigurationShadowProductionVariantArgsDict: TypeAlias = Mapping[str, Any]
@@ -8351,7 +8343,6 @@ elif False:
8351
8343
  @pulumi.input_type
8352
8344
  class EndpointConfigurationShadowProductionVariantArgs:
8353
8345
  def __init__(__self__, *,
8354
- model_name: pulumi.Input[_builtins.str],
8355
8346
  accelerator_type: Optional[pulumi.Input[_builtins.str]] = None,
8356
8347
  container_startup_health_check_timeout_in_seconds: Optional[pulumi.Input[_builtins.int]] = None,
8357
8348
  core_dump_config: Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs']] = None,
@@ -8362,28 +8353,28 @@ class EndpointConfigurationShadowProductionVariantArgs:
8362
8353
  instance_type: Optional[pulumi.Input[_builtins.str]] = None,
8363
8354
  managed_instance_scaling: Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs']] = None,
8364
8355
  model_data_download_timeout_in_seconds: Optional[pulumi.Input[_builtins.int]] = None,
8356
+ model_name: Optional[pulumi.Input[_builtins.str]] = None,
8365
8357
  routing_configs: Optional[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationShadowProductionVariantRoutingConfigArgs']]]] = None,
8366
8358
  serverless_config: Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantServerlessConfigArgs']] = None,
8367
8359
  variant_name: Optional[pulumi.Input[_builtins.str]] = None,
8368
8360
  volume_size_in_gb: Optional[pulumi.Input[_builtins.int]] = None):
8369
8361
  """
8370
- :param pulumi.Input[_builtins.str] model_name: The name of the model to use.
8371
- :param pulumi.Input[_builtins.str] accelerator_type: The size of the Elastic Inference (EI) instance to use for the production variant.
8372
- :param pulumi.Input[_builtins.int] container_startup_health_check_timeout_in_seconds: The timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
8373
- :param pulumi.Input['EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs'] core_dump_config: Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
8374
- :param pulumi.Input[_builtins.bool] enable_ssm_access: You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
8375
- :param pulumi.Input[_builtins.str] inference_ami_version: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
8362
+ :param pulumi.Input[_builtins.str] accelerator_type: Size of the Elastic Inference (EI) instance to use for the production variant.
8363
+ :param pulumi.Input[_builtins.int] container_startup_health_check_timeout_in_seconds: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
8364
+ :param pulumi.Input['EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs'] core_dump_config: Core dump configuration from the model container when the process crashes. Fields are documented below.
8365
+ :param pulumi.Input[_builtins.bool] enable_ssm_access: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
8366
+ :param pulumi.Input[_builtins.str] inference_ami_version: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
8376
8367
  :param pulumi.Input[_builtins.int] initial_instance_count: Initial number of instances used for auto-scaling.
8377
- :param pulumi.Input[_builtins.float] initial_variant_weight: Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to `1.0`.
8378
- :param pulumi.Input[_builtins.str] instance_type: The type of instance to start.
8379
- :param pulumi.Input['EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs'] managed_instance_scaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
8380
- :param pulumi.Input[_builtins.int] model_data_download_timeout_in_seconds: The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
8381
- :param pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationShadowProductionVariantRoutingConfigArgs']]] routing_configs: Sets how the endpoint routes incoming traffic. See routing_config below.
8382
- :param pulumi.Input['EndpointConfigurationShadowProductionVariantServerlessConfigArgs'] serverless_config: Specifies configuration for how an endpoint performs asynchronous inference.
8383
- :param pulumi.Input[_builtins.str] variant_name: The name of the variant. If omitted, this provider will assign a random, unique name.
8384
- :param pulumi.Input[_builtins.int] volume_size_in_gb: The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
8385
- """
8386
- pulumi.set(__self__, "model_name", model_name)
8368
+ :param pulumi.Input[_builtins.float] initial_variant_weight: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
8369
+ :param pulumi.Input[_builtins.str] instance_type: Type of instance to start.
8370
+ :param pulumi.Input['EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs'] managed_instance_scaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
8371
+ :param pulumi.Input[_builtins.int] model_data_download_timeout_in_seconds: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
8372
+ :param pulumi.Input[_builtins.str] model_name: Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
8373
+ :param pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationShadowProductionVariantRoutingConfigArgs']]] routing_configs: How the endpoint routes incoming traffic. See routing_config below.
8374
+ :param pulumi.Input['EndpointConfigurationShadowProductionVariantServerlessConfigArgs'] serverless_config: How an endpoint performs asynchronous inference.
8375
+ :param pulumi.Input[_builtins.str] variant_name: Name of the variant. If omitted, the provider will assign a random, unique name.
8376
+ :param pulumi.Input[_builtins.int] volume_size_in_gb: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
8377
+ """
8387
8378
  if accelerator_type is not None:
8388
8379
  pulumi.set(__self__, "accelerator_type", accelerator_type)
8389
8380
  if container_startup_health_check_timeout_in_seconds is not None:
@@ -8404,6 +8395,8 @@ class EndpointConfigurationShadowProductionVariantArgs:
8404
8395
  pulumi.set(__self__, "managed_instance_scaling", managed_instance_scaling)
8405
8396
  if model_data_download_timeout_in_seconds is not None:
8406
8397
  pulumi.set(__self__, "model_data_download_timeout_in_seconds", model_data_download_timeout_in_seconds)
8398
+ if model_name is not None:
8399
+ pulumi.set(__self__, "model_name", model_name)
8407
8400
  if routing_configs is not None:
8408
8401
  pulumi.set(__self__, "routing_configs", routing_configs)
8409
8402
  if serverless_config is not None:
@@ -8413,23 +8406,11 @@ class EndpointConfigurationShadowProductionVariantArgs:
8413
8406
  if volume_size_in_gb is not None:
8414
8407
  pulumi.set(__self__, "volume_size_in_gb", volume_size_in_gb)
8415
8408
 
8416
- @_builtins.property
8417
- @pulumi.getter(name="modelName")
8418
- def model_name(self) -> pulumi.Input[_builtins.str]:
8419
- """
8420
- The name of the model to use.
8421
- """
8422
- return pulumi.get(self, "model_name")
8423
-
8424
- @model_name.setter
8425
- def model_name(self, value: pulumi.Input[_builtins.str]):
8426
- pulumi.set(self, "model_name", value)
8427
-
8428
8409
  @_builtins.property
8429
8410
  @pulumi.getter(name="acceleratorType")
8430
8411
  def accelerator_type(self) -> Optional[pulumi.Input[_builtins.str]]:
8431
8412
  """
8432
- The size of the Elastic Inference (EI) instance to use for the production variant.
8413
+ Size of the Elastic Inference (EI) instance to use for the production variant.
8433
8414
  """
8434
8415
  return pulumi.get(self, "accelerator_type")
8435
8416
 
@@ -8441,7 +8422,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8441
8422
  @pulumi.getter(name="containerStartupHealthCheckTimeoutInSeconds")
8442
8423
  def container_startup_health_check_timeout_in_seconds(self) -> Optional[pulumi.Input[_builtins.int]]:
8443
8424
  """
8444
- The timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
8425
+ Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
8445
8426
  """
8446
8427
  return pulumi.get(self, "container_startup_health_check_timeout_in_seconds")
8447
8428
 
@@ -8453,7 +8434,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8453
8434
  @pulumi.getter(name="coreDumpConfig")
8454
8435
  def core_dump_config(self) -> Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs']]:
8455
8436
  """
8456
- Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
8437
+ Core dump configuration from the model container when the process crashes. Fields are documented below.
8457
8438
  """
8458
8439
  return pulumi.get(self, "core_dump_config")
8459
8440
 
@@ -8465,7 +8446,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8465
8446
  @pulumi.getter(name="enableSsmAccess")
8466
8447
  def enable_ssm_access(self) -> Optional[pulumi.Input[_builtins.bool]]:
8467
8448
  """
8468
- You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
8449
+ Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
8469
8450
  """
8470
8451
  return pulumi.get(self, "enable_ssm_access")
8471
8452
 
@@ -8477,7 +8458,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8477
8458
  @pulumi.getter(name="inferenceAmiVersion")
8478
8459
  def inference_ami_version(self) -> Optional[pulumi.Input[_builtins.str]]:
8479
8460
  """
8480
- Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
8461
+ Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
8481
8462
  """
8482
8463
  return pulumi.get(self, "inference_ami_version")
8483
8464
 
@@ -8501,7 +8482,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8501
8482
  @pulumi.getter(name="initialVariantWeight")
8502
8483
  def initial_variant_weight(self) -> Optional[pulumi.Input[_builtins.float]]:
8503
8484
  """
8504
- Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to `1.0`.
8485
+ Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
8505
8486
  """
8506
8487
  return pulumi.get(self, "initial_variant_weight")
8507
8488
 
@@ -8513,7 +8494,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8513
8494
  @pulumi.getter(name="instanceType")
8514
8495
  def instance_type(self) -> Optional[pulumi.Input[_builtins.str]]:
8515
8496
  """
8516
- The type of instance to start.
8497
+ Type of instance to start.
8517
8498
  """
8518
8499
  return pulumi.get(self, "instance_type")
8519
8500
 
@@ -8525,7 +8506,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8525
8506
  @pulumi.getter(name="managedInstanceScaling")
8526
8507
  def managed_instance_scaling(self) -> Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs']]:
8527
8508
  """
8528
- Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
8509
+ Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
8529
8510
  """
8530
8511
  return pulumi.get(self, "managed_instance_scaling")
8531
8512
 
@@ -8537,7 +8518,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8537
8518
  @pulumi.getter(name="modelDataDownloadTimeoutInSeconds")
8538
8519
  def model_data_download_timeout_in_seconds(self) -> Optional[pulumi.Input[_builtins.int]]:
8539
8520
  """
8540
- The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
8521
+ Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
8541
8522
  """
8542
8523
  return pulumi.get(self, "model_data_download_timeout_in_seconds")
8543
8524
 
@@ -8545,11 +8526,23 @@ class EndpointConfigurationShadowProductionVariantArgs:
8545
8526
  def model_data_download_timeout_in_seconds(self, value: Optional[pulumi.Input[_builtins.int]]):
8546
8527
  pulumi.set(self, "model_data_download_timeout_in_seconds", value)
8547
8528
 
8529
+ @_builtins.property
8530
+ @pulumi.getter(name="modelName")
8531
+ def model_name(self) -> Optional[pulumi.Input[_builtins.str]]:
8532
+ """
8533
+ Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
8534
+ """
8535
+ return pulumi.get(self, "model_name")
8536
+
8537
+ @model_name.setter
8538
+ def model_name(self, value: Optional[pulumi.Input[_builtins.str]]):
8539
+ pulumi.set(self, "model_name", value)
8540
+
8548
8541
  @_builtins.property
8549
8542
  @pulumi.getter(name="routingConfigs")
8550
8543
  def routing_configs(self) -> Optional[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationShadowProductionVariantRoutingConfigArgs']]]]:
8551
8544
  """
8552
- Sets how the endpoint routes incoming traffic. See routing_config below.
8545
+ How the endpoint routes incoming traffic. See routing_config below.
8553
8546
  """
8554
8547
  return pulumi.get(self, "routing_configs")
8555
8548
 
@@ -8561,7 +8554,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8561
8554
  @pulumi.getter(name="serverlessConfig")
8562
8555
  def serverless_config(self) -> Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantServerlessConfigArgs']]:
8563
8556
  """
8564
- Specifies configuration for how an endpoint performs asynchronous inference.
8557
+ How an endpoint performs asynchronous inference.
8565
8558
  """
8566
8559
  return pulumi.get(self, "serverless_config")
8567
8560
 
@@ -8573,7 +8566,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8573
8566
  @pulumi.getter(name="variantName")
8574
8567
  def variant_name(self) -> Optional[pulumi.Input[_builtins.str]]:
8575
8568
  """
8576
- The name of the variant. If omitted, this provider will assign a random, unique name.
8569
+ Name of the variant. If omitted, the provider will assign a random, unique name.
8577
8570
  """
8578
8571
  return pulumi.get(self, "variant_name")
8579
8572
 
@@ -8585,7 +8578,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
8585
8578
  @pulumi.getter(name="volumeSizeInGb")
8586
8579
  def volume_size_in_gb(self) -> Optional[pulumi.Input[_builtins.int]]:
8587
8580
  """
8588
- The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
8581
+ Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
8589
8582
  """
8590
8583
  return pulumi.get(self, "volume_size_in_gb")
8591
8584
 
@@ -8598,11 +8591,11 @@ if not MYPY:
8598
8591
  class EndpointConfigurationShadowProductionVariantCoreDumpConfigArgsDict(TypedDict):
8599
8592
  destination_s3_uri: pulumi.Input[_builtins.str]
8600
8593
  """
8601
- The Amazon S3 bucket to send the core dump to.
8594
+ S3 bucket to send the core dump to.
8602
8595
  """
8603
8596
  kms_key_id: pulumi.Input[_builtins.str]
8604
8597
  """
8605
- The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker AI uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
8598
+ KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
8606
8599
  """
8607
8600
  elif False:
8608
8601
  EndpointConfigurationShadowProductionVariantCoreDumpConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -8613,8 +8606,8 @@ class EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs:
8613
8606
  destination_s3_uri: pulumi.Input[_builtins.str],
8614
8607
  kms_key_id: pulumi.Input[_builtins.str]):
8615
8608
  """
8616
- :param pulumi.Input[_builtins.str] destination_s3_uri: The Amazon S3 bucket to send the core dump to.
8617
- :param pulumi.Input[_builtins.str] kms_key_id: The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker AI uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
8609
+ :param pulumi.Input[_builtins.str] destination_s3_uri: S3 bucket to send the core dump to.
8610
+ :param pulumi.Input[_builtins.str] kms_key_id: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
8618
8611
  """
8619
8612
  pulumi.set(__self__, "destination_s3_uri", destination_s3_uri)
8620
8613
  pulumi.set(__self__, "kms_key_id", kms_key_id)
@@ -8623,7 +8616,7 @@ class EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs:
8623
8616
  @pulumi.getter(name="destinationS3Uri")
8624
8617
  def destination_s3_uri(self) -> pulumi.Input[_builtins.str]:
8625
8618
  """
8626
- The Amazon S3 bucket to send the core dump to.
8619
+ S3 bucket to send the core dump to.
8627
8620
  """
8628
8621
  return pulumi.get(self, "destination_s3_uri")
8629
8622
 
@@ -8635,7 +8628,7 @@ class EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs:
8635
8628
  @pulumi.getter(name="kmsKeyId")
8636
8629
  def kms_key_id(self) -> pulumi.Input[_builtins.str]:
8637
8630
  """
8638
- The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker AI uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
8631
+ KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
8639
8632
  """
8640
8633
  return pulumi.get(self, "kms_key_id")
8641
8634
 
@@ -8648,15 +8641,15 @@ if not MYPY:
8648
8641
  class EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgsDict(TypedDict):
8649
8642
  max_instance_count: NotRequired[pulumi.Input[_builtins.int]]
8650
8643
  """
8651
- The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8644
+ Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8652
8645
  """
8653
8646
  min_instance_count: NotRequired[pulumi.Input[_builtins.int]]
8654
8647
  """
8655
- The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8648
+ Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8656
8649
  """
8657
8650
  status: NotRequired[pulumi.Input[_builtins.str]]
8658
8651
  """
8659
- Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8652
+ Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8660
8653
  """
8661
8654
  elif False:
8662
8655
  EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgsDict: TypeAlias = Mapping[str, Any]
@@ -8668,9 +8661,9 @@ class EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs:
8668
8661
  min_instance_count: Optional[pulumi.Input[_builtins.int]] = None,
8669
8662
  status: Optional[pulumi.Input[_builtins.str]] = None):
8670
8663
  """
8671
- :param pulumi.Input[_builtins.int] max_instance_count: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8672
- :param pulumi.Input[_builtins.int] min_instance_count: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8673
- :param pulumi.Input[_builtins.str] status: Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8664
+ :param pulumi.Input[_builtins.int] max_instance_count: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8665
+ :param pulumi.Input[_builtins.int] min_instance_count: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8666
+ :param pulumi.Input[_builtins.str] status: Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8674
8667
  """
8675
8668
  if max_instance_count is not None:
8676
8669
  pulumi.set(__self__, "max_instance_count", max_instance_count)
@@ -8683,7 +8676,7 @@ class EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs:
8683
8676
  @pulumi.getter(name="maxInstanceCount")
8684
8677
  def max_instance_count(self) -> Optional[pulumi.Input[_builtins.int]]:
8685
8678
  """
8686
- The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8679
+ Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
8687
8680
  """
8688
8681
  return pulumi.get(self, "max_instance_count")
8689
8682
 
@@ -8695,7 +8688,7 @@ class EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs:
8695
8688
  @pulumi.getter(name="minInstanceCount")
8696
8689
  def min_instance_count(self) -> Optional[pulumi.Input[_builtins.int]]:
8697
8690
  """
8698
- The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8691
+ Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
8699
8692
  """
8700
8693
  return pulumi.get(self, "min_instance_count")
8701
8694
 
@@ -8707,7 +8700,7 @@ class EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs:
8707
8700
  @pulumi.getter
8708
8701
  def status(self) -> Optional[pulumi.Input[_builtins.str]]:
8709
8702
  """
8710
- Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8703
+ Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
8711
8704
  """
8712
8705
  return pulumi.get(self, "status")
8713
8706
 
@@ -8720,7 +8713,7 @@ if not MYPY:
8720
8713
  class EndpointConfigurationShadowProductionVariantRoutingConfigArgsDict(TypedDict):
8721
8714
  routing_strategy: pulumi.Input[_builtins.str]
8722
8715
  """
8723
- Sets how the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8716
+ How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8724
8717
  """
8725
8718
  elif False:
8726
8719
  EndpointConfigurationShadowProductionVariantRoutingConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -8730,7 +8723,7 @@ class EndpointConfigurationShadowProductionVariantRoutingConfigArgs:
8730
8723
  def __init__(__self__, *,
8731
8724
  routing_strategy: pulumi.Input[_builtins.str]):
8732
8725
  """
8733
- :param pulumi.Input[_builtins.str] routing_strategy: Sets how the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8726
+ :param pulumi.Input[_builtins.str] routing_strategy: How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8734
8727
  """
8735
8728
  pulumi.set(__self__, "routing_strategy", routing_strategy)
8736
8729
 
@@ -8738,7 +8731,7 @@ class EndpointConfigurationShadowProductionVariantRoutingConfigArgs:
8738
8731
  @pulumi.getter(name="routingStrategy")
8739
8732
  def routing_strategy(self) -> pulumi.Input[_builtins.str]:
8740
8733
  """
8741
- Sets how the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8734
+ How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
8742
8735
  """
8743
8736
  return pulumi.get(self, "routing_strategy")
8744
8737
 
@@ -8751,15 +8744,15 @@ if not MYPY:
8751
8744
  class EndpointConfigurationShadowProductionVariantServerlessConfigArgsDict(TypedDict):
8752
8745
  max_concurrency: pulumi.Input[_builtins.int]
8753
8746
  """
8754
- The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8747
+ Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8755
8748
  """
8756
8749
  memory_size_in_mb: pulumi.Input[_builtins.int]
8757
8750
  """
8758
- The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8751
+ Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8759
8752
  """
8760
8753
  provisioned_concurrency: NotRequired[pulumi.Input[_builtins.int]]
8761
8754
  """
8762
- The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8755
+ Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8763
8756
  """
8764
8757
  elif False:
8765
8758
  EndpointConfigurationShadowProductionVariantServerlessConfigArgsDict: TypeAlias = Mapping[str, Any]
@@ -8771,9 +8764,9 @@ class EndpointConfigurationShadowProductionVariantServerlessConfigArgs:
8771
8764
  memory_size_in_mb: pulumi.Input[_builtins.int],
8772
8765
  provisioned_concurrency: Optional[pulumi.Input[_builtins.int]] = None):
8773
8766
  """
8774
- :param pulumi.Input[_builtins.int] max_concurrency: The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8775
- :param pulumi.Input[_builtins.int] memory_size_in_mb: The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8776
- :param pulumi.Input[_builtins.int] provisioned_concurrency: The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8767
+ :param pulumi.Input[_builtins.int] max_concurrency: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8768
+ :param pulumi.Input[_builtins.int] memory_size_in_mb: Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8769
+ :param pulumi.Input[_builtins.int] provisioned_concurrency: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8777
8770
  """
8778
8771
  pulumi.set(__self__, "max_concurrency", max_concurrency)
8779
8772
  pulumi.set(__self__, "memory_size_in_mb", memory_size_in_mb)
@@ -8784,7 +8777,7 @@ class EndpointConfigurationShadowProductionVariantServerlessConfigArgs:
8784
8777
  @pulumi.getter(name="maxConcurrency")
8785
8778
  def max_concurrency(self) -> pulumi.Input[_builtins.int]:
8786
8779
  """
8787
- The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8780
+ Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
8788
8781
  """
8789
8782
  return pulumi.get(self, "max_concurrency")
8790
8783
 
@@ -8796,7 +8789,7 @@ class EndpointConfigurationShadowProductionVariantServerlessConfigArgs:
8796
8789
  @pulumi.getter(name="memorySizeInMb")
8797
8790
  def memory_size_in_mb(self) -> pulumi.Input[_builtins.int]:
8798
8791
  """
8799
- The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8792
+ Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
8800
8793
  """
8801
8794
  return pulumi.get(self, "memory_size_in_mb")
8802
8795
 
@@ -8808,7 +8801,7 @@ class EndpointConfigurationShadowProductionVariantServerlessConfigArgs:
8808
8801
  @pulumi.getter(name="provisionedConcurrency")
8809
8802
  def provisioned_concurrency(self) -> Optional[pulumi.Input[_builtins.int]]:
8810
8803
  """
8811
- The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8804
+ Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
8812
8805
  """
8813
8806
  return pulumi.get(self, "provisioned_concurrency")
8814
8807