pulumi-aws 7.11.1__py3-none-any.whl → 7.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pulumi_aws/__init__.py +57 -0
- pulumi_aws/acm/certificate.py +20 -20
- pulumi_aws/acmpca/certificate.py +8 -8
- pulumi_aws/apigateway/account.py +0 -4
- pulumi_aws/appflow/_inputs.py +33 -0
- pulumi_aws/appflow/outputs.py +22 -0
- pulumi_aws/appsync/graph_ql_api.py +84 -0
- pulumi_aws/bedrock/_inputs.py +97 -21
- pulumi_aws/bedrock/agent_agent_alias.py +94 -0
- pulumi_aws/bedrock/agentcore_agent_runtime.py +4 -4
- pulumi_aws/bedrock/agentcore_browser.py +42 -0
- pulumi_aws/bedrock/agentcore_gateway_target.py +217 -7
- pulumi_aws/bedrock/agentcore_memory.py +37 -9
- pulumi_aws/bedrock/agentcore_oauth2_credential_provider.py +38 -0
- pulumi_aws/bedrock/outputs.py +64 -17
- pulumi_aws/cloudfront/_inputs.py +15 -14
- pulumi_aws/cloudfront/distribution.py +28 -0
- pulumi_aws/cloudfront/outputs.py +10 -9
- pulumi_aws/cloudhsmv2/cluster.py +64 -0
- pulumi_aws/cloudwatch/_inputs.py +9 -8
- pulumi_aws/cloudwatch/contributor_managed_insight_rule.py +20 -0
- pulumi_aws/cloudwatch/log_delivery_destination.py +53 -12
- pulumi_aws/cloudwatch/outputs.py +6 -5
- pulumi_aws/codebuild/webhook.py +16 -16
- pulumi_aws/codepipeline/webhook.py +16 -16
- pulumi_aws/connect/_inputs.py +50 -4
- pulumi_aws/connect/outputs.py +95 -4
- pulumi_aws/connect/routing_profile.py +42 -18
- pulumi_aws/datasync/location_fsx_ontap_file_system.py +34 -0
- pulumi_aws/datazone/project.py +24 -0
- pulumi_aws/detective/organization_configuration.py +20 -0
- pulumi_aws/dms/_inputs.py +3 -3
- pulumi_aws/dms/outputs.py +2 -2
- pulumi_aws/ec2/__init__.py +1 -0
- pulumi_aws/ec2/_inputs.py +182 -0
- pulumi_aws/ec2/allowed_images_settings.py +338 -0
- pulumi_aws/ec2/get_coip_pools.py +24 -0
- pulumi_aws/ec2/image_block_public_access.py +48 -1
- pulumi_aws/ec2/outputs.py +167 -0
- pulumi_aws/ec2/security_group.py +6 -6
- pulumi_aws/ec2/serial_console_access.py +50 -3
- pulumi_aws/ec2/vpc_endpoint.py +92 -0
- pulumi_aws/ec2clientvpn/authorization_rule.py +7 -7
- pulumi_aws/ec2clientvpn/route.py +7 -7
- pulumi_aws/ec2transitgateway/instance_connect_endpoint.py +47 -0
- pulumi_aws/ecrpublic/get_images.py +24 -0
- pulumi_aws/ecs/_inputs.py +172 -33
- pulumi_aws/ecs/get_service.py +318 -7
- pulumi_aws/ecs/outputs.py +957 -86
- pulumi_aws/ecs/service.py +76 -0
- pulumi_aws/eks/_inputs.py +195 -5
- pulumi_aws/eks/outputs.py +164 -4
- pulumi_aws/elasticache/_inputs.py +154 -0
- pulumi_aws/elasticache/get_replication_group.py +23 -9
- pulumi_aws/elasticache/outputs.py +204 -0
- pulumi_aws/elasticache/replication_group.py +115 -0
- pulumi_aws/elasticache/reserved_cache_node.py +28 -0
- pulumi_aws/finspace/kx_cluster.py +76 -0
- pulumi_aws/fis/__init__.py +1 -0
- pulumi_aws/fis/target_account_configuration.py +401 -0
- pulumi_aws/glue/job.py +7 -7
- pulumi_aws/guardduty/malware_protection_plan.py +50 -0
- pulumi_aws/guardduty/member_detector_feature.py +42 -0
- pulumi_aws/invoicing/__init__.py +11 -0
- pulumi_aws/invoicing/_inputs.py +128 -0
- pulumi_aws/invoicing/invoice_unit.py +620 -0
- pulumi_aws/invoicing/outputs.py +99 -0
- pulumi_aws/iot/ca_certificate.py +32 -32
- pulumi_aws/iot/get_registration_code.py +8 -8
- pulumi_aws/ivschat/logging_configuration.py +28 -0
- pulumi_aws/kinesis/get_stream.py +15 -1
- pulumi_aws/kinesis/stream.py +47 -0
- pulumi_aws/kms/key.py +7 -7
- pulumi_aws/licensemanager/license_grant.py +36 -0
- pulumi_aws/m2/environment.py +150 -0
- pulumi_aws/networkfirewall/tls_inspection_configuration.py +84 -0
- pulumi_aws/networkflowmonitor/__init__.py +12 -0
- pulumi_aws/networkflowmonitor/_inputs.py +412 -0
- pulumi_aws/networkflowmonitor/monitor.py +568 -0
- pulumi_aws/networkflowmonitor/outputs.py +302 -0
- pulumi_aws/networkflowmonitor/scope.py +443 -0
- pulumi_aws/observabilityadmin/__init__.py +11 -0
- pulumi_aws/observabilityadmin/_inputs.py +506 -0
- pulumi_aws/observabilityadmin/centralization_rule_for_organization.py +637 -0
- pulumi_aws/observabilityadmin/outputs.py +415 -0
- pulumi_aws/opensearch/_inputs.py +92 -133
- pulumi_aws/opensearch/authorize_vpc_endpoint_access.py +4 -4
- pulumi_aws/opensearch/domain.py +60 -0
- pulumi_aws/opensearch/get_domain.py +16 -9
- pulumi_aws/opensearch/outputs.py +131 -2
- pulumi_aws/organizations/get_policies.py +2 -2
- pulumi_aws/organizations/get_policies_for_target.py +2 -2
- pulumi_aws/organizations/get_policy.py +1 -1
- pulumi_aws/organizations/organization.py +7 -7
- pulumi_aws/organizations/policy.py +35 -7
- pulumi_aws/pulumi-plugin.json +1 -1
- pulumi_aws/quicksight/analysis.py +108 -0
- pulumi_aws/quicksight/dashboard.py +110 -0
- pulumi_aws/quicksight/template.py +126 -0
- pulumi_aws/redshift/cluster_snapshot.py +28 -0
- pulumi_aws/redshift/get_cluster.py +52 -0
- pulumi_aws/redshift/snapshot_copy_grant.py +22 -0
- pulumi_aws/route53/profiles_association.py +30 -0
- pulumi_aws/route53/profiles_resource_association.py +34 -0
- pulumi_aws/s3control/multi_region_access_point_policy.py +76 -0
- pulumi_aws/s3tables/table.py +76 -1
- pulumi_aws/s3tables/table_bucket.py +78 -3
- pulumi_aws/sagemaker/_inputs.py +225 -232
- pulumi_aws/sagemaker/endpoint_configuration.py +111 -64
- pulumi_aws/sagemaker/outputs.py +154 -158
- pulumi_aws/ssmincidents/get_response_plan.py +14 -0
- pulumi_aws/ssoadmin/account_assignment.py +4 -4
- pulumi_aws/transfer/profile.py +20 -0
- pulumi_aws/transfer/server.py +7 -0
- pulumi_aws/transfer/ssh_key.py +6 -6
- pulumi_aws/wafv2/rule_group.py +440 -0
- pulumi_aws/workspacesweb/session_logger_association.py +80 -0
- pulumi_aws/workspacesweb/trust_store_association.py +24 -0
- {pulumi_aws-7.11.1.dist-info → pulumi_aws-7.12.0.dist-info}/METADATA +1 -1
- {pulumi_aws-7.11.1.dist-info → pulumi_aws-7.12.0.dist-info}/RECORD +122 -107
- {pulumi_aws-7.11.1.dist-info → pulumi_aws-7.12.0.dist-info}/WHEEL +0 -0
- {pulumi_aws-7.11.1.dist-info → pulumi_aws-7.12.0.dist-info}/top_level.txt +0 -0
pulumi_aws/sagemaker/_inputs.py
CHANGED
|
@@ -7285,11 +7285,11 @@ if not MYPY:
|
|
|
7285
7285
|
class EndpointConfigurationAsyncInferenceConfigArgsDict(TypedDict):
|
|
7286
7286
|
output_config: pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigArgsDict']
|
|
7287
7287
|
"""
|
|
7288
|
-
|
|
7288
|
+
Configuration for asynchronous inference invocation outputs.
|
|
7289
7289
|
"""
|
|
7290
7290
|
client_config: NotRequired[pulumi.Input['EndpointConfigurationAsyncInferenceConfigClientConfigArgsDict']]
|
|
7291
7291
|
"""
|
|
7292
|
-
Configures the behavior of the client used by
|
|
7292
|
+
Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
|
|
7293
7293
|
"""
|
|
7294
7294
|
elif False:
|
|
7295
7295
|
EndpointConfigurationAsyncInferenceConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -7300,8 +7300,8 @@ class EndpointConfigurationAsyncInferenceConfigArgs:
|
|
|
7300
7300
|
output_config: pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigArgs'],
|
|
7301
7301
|
client_config: Optional[pulumi.Input['EndpointConfigurationAsyncInferenceConfigClientConfigArgs']] = None):
|
|
7302
7302
|
"""
|
|
7303
|
-
:param pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigArgs'] output_config:
|
|
7304
|
-
:param pulumi.Input['EndpointConfigurationAsyncInferenceConfigClientConfigArgs'] client_config: Configures the behavior of the client used by
|
|
7303
|
+
:param pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigArgs'] output_config: Configuration for asynchronous inference invocation outputs.
|
|
7304
|
+
:param pulumi.Input['EndpointConfigurationAsyncInferenceConfigClientConfigArgs'] client_config: Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
|
|
7305
7305
|
"""
|
|
7306
7306
|
pulumi.set(__self__, "output_config", output_config)
|
|
7307
7307
|
if client_config is not None:
|
|
@@ -7311,7 +7311,7 @@ class EndpointConfigurationAsyncInferenceConfigArgs:
|
|
|
7311
7311
|
@pulumi.getter(name="outputConfig")
|
|
7312
7312
|
def output_config(self) -> pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigArgs']:
|
|
7313
7313
|
"""
|
|
7314
|
-
|
|
7314
|
+
Configuration for asynchronous inference invocation outputs.
|
|
7315
7315
|
"""
|
|
7316
7316
|
return pulumi.get(self, "output_config")
|
|
7317
7317
|
|
|
@@ -7323,7 +7323,7 @@ class EndpointConfigurationAsyncInferenceConfigArgs:
|
|
|
7323
7323
|
@pulumi.getter(name="clientConfig")
|
|
7324
7324
|
def client_config(self) -> Optional[pulumi.Input['EndpointConfigurationAsyncInferenceConfigClientConfigArgs']]:
|
|
7325
7325
|
"""
|
|
7326
|
-
Configures the behavior of the client used by
|
|
7326
|
+
Configures the behavior of the client used by SageMaker AI to interact with the model container during asynchronous inference.
|
|
7327
7327
|
"""
|
|
7328
7328
|
return pulumi.get(self, "client_config")
|
|
7329
7329
|
|
|
@@ -7336,7 +7336,7 @@ if not MYPY:
|
|
|
7336
7336
|
class EndpointConfigurationAsyncInferenceConfigClientConfigArgsDict(TypedDict):
|
|
7337
7337
|
max_concurrent_invocations_per_instance: NotRequired[pulumi.Input[_builtins.int]]
|
|
7338
7338
|
"""
|
|
7339
|
-
|
|
7339
|
+
Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
|
|
7340
7340
|
"""
|
|
7341
7341
|
elif False:
|
|
7342
7342
|
EndpointConfigurationAsyncInferenceConfigClientConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -7346,7 +7346,7 @@ class EndpointConfigurationAsyncInferenceConfigClientConfigArgs:
|
|
|
7346
7346
|
def __init__(__self__, *,
|
|
7347
7347
|
max_concurrent_invocations_per_instance: Optional[pulumi.Input[_builtins.int]] = None):
|
|
7348
7348
|
"""
|
|
7349
|
-
:param pulumi.Input[_builtins.int] max_concurrent_invocations_per_instance:
|
|
7349
|
+
:param pulumi.Input[_builtins.int] max_concurrent_invocations_per_instance: Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
|
|
7350
7350
|
"""
|
|
7351
7351
|
if max_concurrent_invocations_per_instance is not None:
|
|
7352
7352
|
pulumi.set(__self__, "max_concurrent_invocations_per_instance", max_concurrent_invocations_per_instance)
|
|
@@ -7355,7 +7355,7 @@ class EndpointConfigurationAsyncInferenceConfigClientConfigArgs:
|
|
|
7355
7355
|
@pulumi.getter(name="maxConcurrentInvocationsPerInstance")
|
|
7356
7356
|
def max_concurrent_invocations_per_instance(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
7357
7357
|
"""
|
|
7358
|
-
|
|
7358
|
+
Maximum number of concurrent requests sent by the SageMaker AI client to the model container. If no value is provided, SageMaker AI will choose an optimal value for you.
|
|
7359
7359
|
"""
|
|
7360
7360
|
return pulumi.get(self, "max_concurrent_invocations_per_instance")
|
|
7361
7361
|
|
|
@@ -7368,19 +7368,19 @@ if not MYPY:
|
|
|
7368
7368
|
class EndpointConfigurationAsyncInferenceConfigOutputConfigArgsDict(TypedDict):
|
|
7369
7369
|
s3_output_path: pulumi.Input[_builtins.str]
|
|
7370
7370
|
"""
|
|
7371
|
-
|
|
7371
|
+
S3 location to upload inference responses to.
|
|
7372
7372
|
"""
|
|
7373
7373
|
kms_key_id: NotRequired[pulumi.Input[_builtins.str]]
|
|
7374
7374
|
"""
|
|
7375
|
-
|
|
7375
|
+
KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
|
|
7376
7376
|
"""
|
|
7377
7377
|
notification_config: NotRequired[pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgsDict']]
|
|
7378
7378
|
"""
|
|
7379
|
-
|
|
7379
|
+
Configuration for notifications of inference results for asynchronous inference.
|
|
7380
7380
|
"""
|
|
7381
7381
|
s3_failure_path: NotRequired[pulumi.Input[_builtins.str]]
|
|
7382
7382
|
"""
|
|
7383
|
-
|
|
7383
|
+
S3 location to upload failure inference responses to.
|
|
7384
7384
|
"""
|
|
7385
7385
|
elif False:
|
|
7386
7386
|
EndpointConfigurationAsyncInferenceConfigOutputConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -7393,10 +7393,10 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigArgs:
|
|
|
7393
7393
|
notification_config: Optional[pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs']] = None,
|
|
7394
7394
|
s3_failure_path: Optional[pulumi.Input[_builtins.str]] = None):
|
|
7395
7395
|
"""
|
|
7396
|
-
:param pulumi.Input[_builtins.str] s3_output_path:
|
|
7397
|
-
:param pulumi.Input[_builtins.str] kms_key_id:
|
|
7398
|
-
:param pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs'] notification_config:
|
|
7399
|
-
:param pulumi.Input[_builtins.str] s3_failure_path:
|
|
7396
|
+
:param pulumi.Input[_builtins.str] s3_output_path: S3 location to upload inference responses to.
|
|
7397
|
+
:param pulumi.Input[_builtins.str] kms_key_id: KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
|
|
7398
|
+
:param pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs'] notification_config: Configuration for notifications of inference results for asynchronous inference.
|
|
7399
|
+
:param pulumi.Input[_builtins.str] s3_failure_path: S3 location to upload failure inference responses to.
|
|
7400
7400
|
"""
|
|
7401
7401
|
pulumi.set(__self__, "s3_output_path", s3_output_path)
|
|
7402
7402
|
if kms_key_id is not None:
|
|
@@ -7410,7 +7410,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigArgs:
|
|
|
7410
7410
|
@pulumi.getter(name="s3OutputPath")
|
|
7411
7411
|
def s3_output_path(self) -> pulumi.Input[_builtins.str]:
|
|
7412
7412
|
"""
|
|
7413
|
-
|
|
7413
|
+
S3 location to upload inference responses to.
|
|
7414
7414
|
"""
|
|
7415
7415
|
return pulumi.get(self, "s3_output_path")
|
|
7416
7416
|
|
|
@@ -7422,7 +7422,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigArgs:
|
|
|
7422
7422
|
@pulumi.getter(name="kmsKeyId")
|
|
7423
7423
|
def kms_key_id(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
7424
7424
|
"""
|
|
7425
|
-
|
|
7425
|
+
KMS key that SageMaker AI uses to encrypt the asynchronous inference output in S3.
|
|
7426
7426
|
"""
|
|
7427
7427
|
return pulumi.get(self, "kms_key_id")
|
|
7428
7428
|
|
|
@@ -7434,7 +7434,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigArgs:
|
|
|
7434
7434
|
@pulumi.getter(name="notificationConfig")
|
|
7435
7435
|
def notification_config(self) -> Optional[pulumi.Input['EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs']]:
|
|
7436
7436
|
"""
|
|
7437
|
-
|
|
7437
|
+
Configuration for notifications of inference results for asynchronous inference.
|
|
7438
7438
|
"""
|
|
7439
7439
|
return pulumi.get(self, "notification_config")
|
|
7440
7440
|
|
|
@@ -7446,7 +7446,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigArgs:
|
|
|
7446
7446
|
@pulumi.getter(name="s3FailurePath")
|
|
7447
7447
|
def s3_failure_path(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
7448
7448
|
"""
|
|
7449
|
-
|
|
7449
|
+
S3 location to upload failure inference responses to.
|
|
7450
7450
|
"""
|
|
7451
7451
|
return pulumi.get(self, "s3_failure_path")
|
|
7452
7452
|
|
|
@@ -7459,15 +7459,15 @@ if not MYPY:
|
|
|
7459
7459
|
class EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgsDict(TypedDict):
|
|
7460
7460
|
error_topic: NotRequired[pulumi.Input[_builtins.str]]
|
|
7461
7461
|
"""
|
|
7462
|
-
|
|
7462
|
+
SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
|
|
7463
7463
|
"""
|
|
7464
7464
|
include_inference_response_ins: NotRequired[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]
|
|
7465
7465
|
"""
|
|
7466
|
-
|
|
7466
|
+
SNS topics where you want the inference response to be included. Valid values are `SUCCESS_NOTIFICATION_TOPIC` and `ERROR_NOTIFICATION_TOPIC`.
|
|
7467
7467
|
"""
|
|
7468
7468
|
success_topic: NotRequired[pulumi.Input[_builtins.str]]
|
|
7469
7469
|
"""
|
|
7470
|
-
|
|
7470
|
+
SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
|
|
7471
7471
|
"""
|
|
7472
7472
|
elif False:
|
|
7473
7473
|
EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -7479,9 +7479,9 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArg
|
|
|
7479
7479
|
include_inference_response_ins: Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]] = None,
|
|
7480
7480
|
success_topic: Optional[pulumi.Input[_builtins.str]] = None):
|
|
7481
7481
|
"""
|
|
7482
|
-
:param pulumi.Input[_builtins.str] error_topic:
|
|
7483
|
-
:param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] include_inference_response_ins:
|
|
7484
|
-
:param pulumi.Input[_builtins.str] success_topic:
|
|
7482
|
+
:param pulumi.Input[_builtins.str] error_topic: SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
|
|
7483
|
+
:param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] include_inference_response_ins: SNS topics where you want the inference response to be included. Valid values are `SUCCESS_NOTIFICATION_TOPIC` and `ERROR_NOTIFICATION_TOPIC`.
|
|
7484
|
+
:param pulumi.Input[_builtins.str] success_topic: SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
|
|
7485
7485
|
"""
|
|
7486
7486
|
if error_topic is not None:
|
|
7487
7487
|
pulumi.set(__self__, "error_topic", error_topic)
|
|
@@ -7494,7 +7494,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArg
|
|
|
7494
7494
|
@pulumi.getter(name="errorTopic")
|
|
7495
7495
|
def error_topic(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
7496
7496
|
"""
|
|
7497
|
-
|
|
7497
|
+
SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
|
|
7498
7498
|
"""
|
|
7499
7499
|
return pulumi.get(self, "error_topic")
|
|
7500
7500
|
|
|
@@ -7506,7 +7506,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArg
|
|
|
7506
7506
|
@pulumi.getter(name="includeInferenceResponseIns")
|
|
7507
7507
|
def include_inference_response_ins(self) -> Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]:
|
|
7508
7508
|
"""
|
|
7509
|
-
|
|
7509
|
+
SNS topics where you want the inference response to be included. Valid values are `SUCCESS_NOTIFICATION_TOPIC` and `ERROR_NOTIFICATION_TOPIC`.
|
|
7510
7510
|
"""
|
|
7511
7511
|
return pulumi.get(self, "include_inference_response_ins")
|
|
7512
7512
|
|
|
@@ -7518,7 +7518,7 @@ class EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArg
|
|
|
7518
7518
|
@pulumi.getter(name="successTopic")
|
|
7519
7519
|
def success_topic(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
7520
7520
|
"""
|
|
7521
|
-
|
|
7521
|
+
SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
|
|
7522
7522
|
"""
|
|
7523
7523
|
return pulumi.get(self, "success_topic")
|
|
7524
7524
|
|
|
@@ -7531,11 +7531,11 @@ if not MYPY:
|
|
|
7531
7531
|
class EndpointConfigurationDataCaptureConfigArgsDict(TypedDict):
|
|
7532
7532
|
capture_options: pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureOptionArgsDict']]]
|
|
7533
7533
|
"""
|
|
7534
|
-
|
|
7534
|
+
What data to capture. Fields are documented below.
|
|
7535
7535
|
"""
|
|
7536
7536
|
destination_s3_uri: pulumi.Input[_builtins.str]
|
|
7537
7537
|
"""
|
|
7538
|
-
|
|
7538
|
+
URL for S3 location where the captured data is stored.
|
|
7539
7539
|
"""
|
|
7540
7540
|
initial_sampling_percentage: pulumi.Input[_builtins.int]
|
|
7541
7541
|
"""
|
|
@@ -7543,8 +7543,7 @@ if not MYPY:
|
|
|
7543
7543
|
"""
|
|
7544
7544
|
capture_content_type_header: NotRequired[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgsDict']]
|
|
7545
7545
|
"""
|
|
7546
|
-
|
|
7547
|
-
See `capture_content_type_header` below.
|
|
7546
|
+
Content type headers to capture. See `capture_content_type_header` below.
|
|
7548
7547
|
"""
|
|
7549
7548
|
enable_capture: NotRequired[pulumi.Input[_builtins.bool]]
|
|
7550
7549
|
"""
|
|
@@ -7552,7 +7551,7 @@ if not MYPY:
|
|
|
7552
7551
|
"""
|
|
7553
7552
|
kms_key_id: NotRequired[pulumi.Input[_builtins.str]]
|
|
7554
7553
|
"""
|
|
7555
|
-
|
|
7554
|
+
ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
|
|
7556
7555
|
"""
|
|
7557
7556
|
elif False:
|
|
7558
7557
|
EndpointConfigurationDataCaptureConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -7567,13 +7566,12 @@ class EndpointConfigurationDataCaptureConfigArgs:
|
|
|
7567
7566
|
enable_capture: Optional[pulumi.Input[_builtins.bool]] = None,
|
|
7568
7567
|
kms_key_id: Optional[pulumi.Input[_builtins.str]] = None):
|
|
7569
7568
|
"""
|
|
7570
|
-
:param pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureOptionArgs']]] capture_options:
|
|
7571
|
-
:param pulumi.Input[_builtins.str] destination_s3_uri:
|
|
7569
|
+
:param pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureOptionArgs']]] capture_options: What data to capture. Fields are documented below.
|
|
7570
|
+
:param pulumi.Input[_builtins.str] destination_s3_uri: URL for S3 location where the captured data is stored.
|
|
7572
7571
|
:param pulumi.Input[_builtins.int] initial_sampling_percentage: Portion of data to capture. Should be between 0 and 100.
|
|
7573
|
-
:param pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs'] capture_content_type_header:
|
|
7574
|
-
See `capture_content_type_header` below.
|
|
7572
|
+
:param pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs'] capture_content_type_header: Content type headers to capture. See `capture_content_type_header` below.
|
|
7575
7573
|
:param pulumi.Input[_builtins.bool] enable_capture: Flag to enable data capture. Defaults to `false`.
|
|
7576
|
-
:param pulumi.Input[_builtins.str] kms_key_id:
|
|
7574
|
+
:param pulumi.Input[_builtins.str] kms_key_id: ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
|
|
7577
7575
|
"""
|
|
7578
7576
|
pulumi.set(__self__, "capture_options", capture_options)
|
|
7579
7577
|
pulumi.set(__self__, "destination_s3_uri", destination_s3_uri)
|
|
@@ -7589,7 +7587,7 @@ class EndpointConfigurationDataCaptureConfigArgs:
|
|
|
7589
7587
|
@pulumi.getter(name="captureOptions")
|
|
7590
7588
|
def capture_options(self) -> pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureOptionArgs']]]:
|
|
7591
7589
|
"""
|
|
7592
|
-
|
|
7590
|
+
What data to capture. Fields are documented below.
|
|
7593
7591
|
"""
|
|
7594
7592
|
return pulumi.get(self, "capture_options")
|
|
7595
7593
|
|
|
@@ -7601,7 +7599,7 @@ class EndpointConfigurationDataCaptureConfigArgs:
|
|
|
7601
7599
|
@pulumi.getter(name="destinationS3Uri")
|
|
7602
7600
|
def destination_s3_uri(self) -> pulumi.Input[_builtins.str]:
|
|
7603
7601
|
"""
|
|
7604
|
-
|
|
7602
|
+
URL for S3 location where the captured data is stored.
|
|
7605
7603
|
"""
|
|
7606
7604
|
return pulumi.get(self, "destination_s3_uri")
|
|
7607
7605
|
|
|
@@ -7625,8 +7623,7 @@ class EndpointConfigurationDataCaptureConfigArgs:
|
|
|
7625
7623
|
@pulumi.getter(name="captureContentTypeHeader")
|
|
7626
7624
|
def capture_content_type_header(self) -> Optional[pulumi.Input['EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs']]:
|
|
7627
7625
|
"""
|
|
7628
|
-
|
|
7629
|
-
See `capture_content_type_header` below.
|
|
7626
|
+
Content type headers to capture. See `capture_content_type_header` below.
|
|
7630
7627
|
"""
|
|
7631
7628
|
return pulumi.get(self, "capture_content_type_header")
|
|
7632
7629
|
|
|
@@ -7650,7 +7647,7 @@ class EndpointConfigurationDataCaptureConfigArgs:
|
|
|
7650
7647
|
@pulumi.getter(name="kmsKeyId")
|
|
7651
7648
|
def kms_key_id(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
7652
7649
|
"""
|
|
7653
|
-
|
|
7650
|
+
ARN of a KMS key that SageMaker AI uses to encrypt the captured data on S3.
|
|
7654
7651
|
"""
|
|
7655
7652
|
return pulumi.get(self, "kms_key_id")
|
|
7656
7653
|
|
|
@@ -7663,13 +7660,11 @@ if not MYPY:
|
|
|
7663
7660
|
class EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgsDict(TypedDict):
|
|
7664
7661
|
csv_content_types: NotRequired[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]
|
|
7665
7662
|
"""
|
|
7666
|
-
|
|
7667
|
-
One of `csv_content_types` or `json_content_types` is required.
|
|
7663
|
+
CSV content type headers to capture. One of `csv_content_types` or `json_content_types` is required.
|
|
7668
7664
|
"""
|
|
7669
7665
|
json_content_types: NotRequired[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]
|
|
7670
7666
|
"""
|
|
7671
|
-
The JSON content type headers to capture.
|
|
7672
|
-
One of `json_content_types` or `csv_content_types` is required.
|
|
7667
|
+
The JSON content type headers to capture. One of `json_content_types` or `csv_content_types` is required.
|
|
7673
7668
|
"""
|
|
7674
7669
|
elif False:
|
|
7675
7670
|
EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -7680,10 +7675,8 @@ class EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs:
|
|
|
7680
7675
|
csv_content_types: Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]] = None,
|
|
7681
7676
|
json_content_types: Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]] = None):
|
|
7682
7677
|
"""
|
|
7683
|
-
:param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] csv_content_types:
|
|
7684
|
-
|
|
7685
|
-
:param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] json_content_types: The JSON content type headers to capture.
|
|
7686
|
-
One of `json_content_types` or `csv_content_types` is required.
|
|
7678
|
+
:param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] csv_content_types: CSV content type headers to capture. One of `csv_content_types` or `json_content_types` is required.
|
|
7679
|
+
:param pulumi.Input[Sequence[pulumi.Input[_builtins.str]]] json_content_types: The JSON content type headers to capture. One of `json_content_types` or `csv_content_types` is required.
|
|
7687
7680
|
"""
|
|
7688
7681
|
if csv_content_types is not None:
|
|
7689
7682
|
pulumi.set(__self__, "csv_content_types", csv_content_types)
|
|
@@ -7694,8 +7687,7 @@ class EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs:
|
|
|
7694
7687
|
@pulumi.getter(name="csvContentTypes")
|
|
7695
7688
|
def csv_content_types(self) -> Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]:
|
|
7696
7689
|
"""
|
|
7697
|
-
|
|
7698
|
-
One of `csv_content_types` or `json_content_types` is required.
|
|
7690
|
+
CSV content type headers to capture. One of `csv_content_types` or `json_content_types` is required.
|
|
7699
7691
|
"""
|
|
7700
7692
|
return pulumi.get(self, "csv_content_types")
|
|
7701
7693
|
|
|
@@ -7707,8 +7699,7 @@ class EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs:
|
|
|
7707
7699
|
@pulumi.getter(name="jsonContentTypes")
|
|
7708
7700
|
def json_content_types(self) -> Optional[pulumi.Input[Sequence[pulumi.Input[_builtins.str]]]]:
|
|
7709
7701
|
"""
|
|
7710
|
-
The JSON content type headers to capture.
|
|
7711
|
-
One of `json_content_types` or `csv_content_types` is required.
|
|
7702
|
+
The JSON content type headers to capture. One of `json_content_types` or `csv_content_types` is required.
|
|
7712
7703
|
"""
|
|
7713
7704
|
return pulumi.get(self, "json_content_types")
|
|
7714
7705
|
|
|
@@ -7721,7 +7712,7 @@ if not MYPY:
|
|
|
7721
7712
|
class EndpointConfigurationDataCaptureConfigCaptureOptionArgsDict(TypedDict):
|
|
7722
7713
|
capture_mode: pulumi.Input[_builtins.str]
|
|
7723
7714
|
"""
|
|
7724
|
-
|
|
7715
|
+
Data to be captured. Should be one of `Input`, `Output` or `InputAndOutput`.
|
|
7725
7716
|
"""
|
|
7726
7717
|
elif False:
|
|
7727
7718
|
EndpointConfigurationDataCaptureConfigCaptureOptionArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -7731,7 +7722,7 @@ class EndpointConfigurationDataCaptureConfigCaptureOptionArgs:
|
|
|
7731
7722
|
def __init__(__self__, *,
|
|
7732
7723
|
capture_mode: pulumi.Input[_builtins.str]):
|
|
7733
7724
|
"""
|
|
7734
|
-
:param pulumi.Input[_builtins.str] capture_mode:
|
|
7725
|
+
:param pulumi.Input[_builtins.str] capture_mode: Data to be captured. Should be one of `Input`, `Output` or `InputAndOutput`.
|
|
7735
7726
|
"""
|
|
7736
7727
|
pulumi.set(__self__, "capture_mode", capture_mode)
|
|
7737
7728
|
|
|
@@ -7739,7 +7730,7 @@ class EndpointConfigurationDataCaptureConfigCaptureOptionArgs:
|
|
|
7739
7730
|
@pulumi.getter(name="captureMode")
|
|
7740
7731
|
def capture_mode(self) -> pulumi.Input[_builtins.str]:
|
|
7741
7732
|
"""
|
|
7742
|
-
|
|
7733
|
+
Data to be captured. Should be one of `Input`, `Output` or `InputAndOutput`.
|
|
7743
7734
|
"""
|
|
7744
7735
|
return pulumi.get(self, "capture_mode")
|
|
7745
7736
|
|
|
@@ -7750,29 +7741,25 @@ class EndpointConfigurationDataCaptureConfigCaptureOptionArgs:
|
|
|
7750
7741
|
|
|
7751
7742
|
if not MYPY:
|
|
7752
7743
|
class EndpointConfigurationProductionVariantArgsDict(TypedDict):
|
|
7753
|
-
model_name: pulumi.Input[_builtins.str]
|
|
7754
|
-
"""
|
|
7755
|
-
The name of the model to use.
|
|
7756
|
-
"""
|
|
7757
7744
|
accelerator_type: NotRequired[pulumi.Input[_builtins.str]]
|
|
7758
7745
|
"""
|
|
7759
|
-
|
|
7746
|
+
Size of the Elastic Inference (EI) instance to use for the production variant.
|
|
7760
7747
|
"""
|
|
7761
7748
|
container_startup_health_check_timeout_in_seconds: NotRequired[pulumi.Input[_builtins.int]]
|
|
7762
7749
|
"""
|
|
7763
|
-
|
|
7750
|
+
Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
|
|
7764
7751
|
"""
|
|
7765
7752
|
core_dump_config: NotRequired[pulumi.Input['EndpointConfigurationProductionVariantCoreDumpConfigArgsDict']]
|
|
7766
7753
|
"""
|
|
7767
|
-
|
|
7754
|
+
Core dump configuration from the model container when the process crashes. Fields are documented below.
|
|
7768
7755
|
"""
|
|
7769
7756
|
enable_ssm_access: NotRequired[pulumi.Input[_builtins.bool]]
|
|
7770
7757
|
"""
|
|
7771
|
-
|
|
7758
|
+
Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
7772
7759
|
"""
|
|
7773
7760
|
inference_ami_version: NotRequired[pulumi.Input[_builtins.str]]
|
|
7774
7761
|
"""
|
|
7775
|
-
|
|
7762
|
+
Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
|
|
7776
7763
|
"""
|
|
7777
7764
|
initial_instance_count: NotRequired[pulumi.Input[_builtins.int]]
|
|
7778
7765
|
"""
|
|
@@ -7780,35 +7767,39 @@ if not MYPY:
|
|
|
7780
7767
|
"""
|
|
7781
7768
|
initial_variant_weight: NotRequired[pulumi.Input[_builtins.float]]
|
|
7782
7769
|
"""
|
|
7783
|
-
|
|
7770
|
+
Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
7784
7771
|
"""
|
|
7785
7772
|
instance_type: NotRequired[pulumi.Input[_builtins.str]]
|
|
7786
7773
|
"""
|
|
7787
|
-
|
|
7774
|
+
Type of instance to start.
|
|
7788
7775
|
"""
|
|
7789
7776
|
managed_instance_scaling: NotRequired[pulumi.Input['EndpointConfigurationProductionVariantManagedInstanceScalingArgsDict']]
|
|
7790
7777
|
"""
|
|
7791
|
-
|
|
7778
|
+
Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
|
|
7792
7779
|
"""
|
|
7793
7780
|
model_data_download_timeout_in_seconds: NotRequired[pulumi.Input[_builtins.int]]
|
|
7794
7781
|
"""
|
|
7795
|
-
|
|
7782
|
+
Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
|
|
7783
|
+
"""
|
|
7784
|
+
model_name: NotRequired[pulumi.Input[_builtins.str]]
|
|
7785
|
+
"""
|
|
7786
|
+
Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
|
|
7796
7787
|
"""
|
|
7797
7788
|
routing_configs: NotRequired[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationProductionVariantRoutingConfigArgsDict']]]]
|
|
7798
7789
|
"""
|
|
7799
|
-
|
|
7790
|
+
How the endpoint routes incoming traffic. See routing_config below.
|
|
7800
7791
|
"""
|
|
7801
7792
|
serverless_config: NotRequired[pulumi.Input['EndpointConfigurationProductionVariantServerlessConfigArgsDict']]
|
|
7802
7793
|
"""
|
|
7803
|
-
|
|
7794
|
+
How an endpoint performs asynchronous inference.
|
|
7804
7795
|
"""
|
|
7805
7796
|
variant_name: NotRequired[pulumi.Input[_builtins.str]]
|
|
7806
7797
|
"""
|
|
7807
|
-
|
|
7798
|
+
Name of the variant. If omitted, the provider will assign a random, unique name.
|
|
7808
7799
|
"""
|
|
7809
7800
|
volume_size_in_gb: NotRequired[pulumi.Input[_builtins.int]]
|
|
7810
7801
|
"""
|
|
7811
|
-
|
|
7802
|
+
Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
|
|
7812
7803
|
"""
|
|
7813
7804
|
elif False:
|
|
7814
7805
|
EndpointConfigurationProductionVariantArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -7816,7 +7807,6 @@ elif False:
|
|
|
7816
7807
|
@pulumi.input_type
|
|
7817
7808
|
class EndpointConfigurationProductionVariantArgs:
|
|
7818
7809
|
def __init__(__self__, *,
|
|
7819
|
-
model_name: pulumi.Input[_builtins.str],
|
|
7820
7810
|
accelerator_type: Optional[pulumi.Input[_builtins.str]] = None,
|
|
7821
7811
|
container_startup_health_check_timeout_in_seconds: Optional[pulumi.Input[_builtins.int]] = None,
|
|
7822
7812
|
core_dump_config: Optional[pulumi.Input['EndpointConfigurationProductionVariantCoreDumpConfigArgs']] = None,
|
|
@@ -7827,28 +7817,28 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
7827
7817
|
instance_type: Optional[pulumi.Input[_builtins.str]] = None,
|
|
7828
7818
|
managed_instance_scaling: Optional[pulumi.Input['EndpointConfigurationProductionVariantManagedInstanceScalingArgs']] = None,
|
|
7829
7819
|
model_data_download_timeout_in_seconds: Optional[pulumi.Input[_builtins.int]] = None,
|
|
7820
|
+
model_name: Optional[pulumi.Input[_builtins.str]] = None,
|
|
7830
7821
|
routing_configs: Optional[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationProductionVariantRoutingConfigArgs']]]] = None,
|
|
7831
7822
|
serverless_config: Optional[pulumi.Input['EndpointConfigurationProductionVariantServerlessConfigArgs']] = None,
|
|
7832
7823
|
variant_name: Optional[pulumi.Input[_builtins.str]] = None,
|
|
7833
7824
|
volume_size_in_gb: Optional[pulumi.Input[_builtins.int]] = None):
|
|
7834
7825
|
"""
|
|
7835
|
-
:param pulumi.Input[_builtins.str]
|
|
7836
|
-
:param pulumi.Input[_builtins.
|
|
7837
|
-
:param pulumi.Input[
|
|
7838
|
-
:param pulumi.Input[
|
|
7839
|
-
:param pulumi.Input[_builtins.
|
|
7840
|
-
:param pulumi.Input[_builtins.str] inference_ami_version: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
|
|
7826
|
+
:param pulumi.Input[_builtins.str] accelerator_type: Size of the Elastic Inference (EI) instance to use for the production variant.
|
|
7827
|
+
:param pulumi.Input[_builtins.int] container_startup_health_check_timeout_in_seconds: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
|
|
7828
|
+
:param pulumi.Input['EndpointConfigurationProductionVariantCoreDumpConfigArgs'] core_dump_config: Core dump configuration from the model container when the process crashes. Fields are documented below.
|
|
7829
|
+
:param pulumi.Input[_builtins.bool] enable_ssm_access: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
7830
|
+
:param pulumi.Input[_builtins.str] inference_ami_version: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
|
|
7841
7831
|
:param pulumi.Input[_builtins.int] initial_instance_count: Initial number of instances used for auto-scaling.
|
|
7842
|
-
:param pulumi.Input[_builtins.float] initial_variant_weight:
|
|
7843
|
-
:param pulumi.Input[_builtins.str] instance_type:
|
|
7844
|
-
:param pulumi.Input['EndpointConfigurationProductionVariantManagedInstanceScalingArgs'] managed_instance_scaling:
|
|
7845
|
-
:param pulumi.Input[_builtins.int] model_data_download_timeout_in_seconds:
|
|
7846
|
-
:param pulumi.Input[
|
|
7847
|
-
:param pulumi.Input['
|
|
7848
|
-
:param pulumi.Input[
|
|
7849
|
-
:param pulumi.Input[_builtins.
|
|
7850
|
-
|
|
7851
|
-
|
|
7832
|
+
:param pulumi.Input[_builtins.float] initial_variant_weight: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
7833
|
+
:param pulumi.Input[_builtins.str] instance_type: Type of instance to start.
|
|
7834
|
+
:param pulumi.Input['EndpointConfigurationProductionVariantManagedInstanceScalingArgs'] managed_instance_scaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
|
|
7835
|
+
:param pulumi.Input[_builtins.int] model_data_download_timeout_in_seconds: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
|
|
7836
|
+
:param pulumi.Input[_builtins.str] model_name: Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
|
|
7837
|
+
:param pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationProductionVariantRoutingConfigArgs']]] routing_configs: How the endpoint routes incoming traffic. See routing_config below.
|
|
7838
|
+
:param pulumi.Input['EndpointConfigurationProductionVariantServerlessConfigArgs'] serverless_config: How an endpoint performs asynchronous inference.
|
|
7839
|
+
:param pulumi.Input[_builtins.str] variant_name: Name of the variant. If omitted, the provider will assign a random, unique name.
|
|
7840
|
+
:param pulumi.Input[_builtins.int] volume_size_in_gb: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
|
|
7841
|
+
"""
|
|
7852
7842
|
if accelerator_type is not None:
|
|
7853
7843
|
pulumi.set(__self__, "accelerator_type", accelerator_type)
|
|
7854
7844
|
if container_startup_health_check_timeout_in_seconds is not None:
|
|
@@ -7869,6 +7859,8 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
7869
7859
|
pulumi.set(__self__, "managed_instance_scaling", managed_instance_scaling)
|
|
7870
7860
|
if model_data_download_timeout_in_seconds is not None:
|
|
7871
7861
|
pulumi.set(__self__, "model_data_download_timeout_in_seconds", model_data_download_timeout_in_seconds)
|
|
7862
|
+
if model_name is not None:
|
|
7863
|
+
pulumi.set(__self__, "model_name", model_name)
|
|
7872
7864
|
if routing_configs is not None:
|
|
7873
7865
|
pulumi.set(__self__, "routing_configs", routing_configs)
|
|
7874
7866
|
if serverless_config is not None:
|
|
@@ -7878,23 +7870,11 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
7878
7870
|
if volume_size_in_gb is not None:
|
|
7879
7871
|
pulumi.set(__self__, "volume_size_in_gb", volume_size_in_gb)
|
|
7880
7872
|
|
|
7881
|
-
@_builtins.property
|
|
7882
|
-
@pulumi.getter(name="modelName")
|
|
7883
|
-
def model_name(self) -> pulumi.Input[_builtins.str]:
|
|
7884
|
-
"""
|
|
7885
|
-
The name of the model to use.
|
|
7886
|
-
"""
|
|
7887
|
-
return pulumi.get(self, "model_name")
|
|
7888
|
-
|
|
7889
|
-
@model_name.setter
|
|
7890
|
-
def model_name(self, value: pulumi.Input[_builtins.str]):
|
|
7891
|
-
pulumi.set(self, "model_name", value)
|
|
7892
|
-
|
|
7893
7873
|
@_builtins.property
|
|
7894
7874
|
@pulumi.getter(name="acceleratorType")
|
|
7895
7875
|
def accelerator_type(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
7896
7876
|
"""
|
|
7897
|
-
|
|
7877
|
+
Size of the Elastic Inference (EI) instance to use for the production variant.
|
|
7898
7878
|
"""
|
|
7899
7879
|
return pulumi.get(self, "accelerator_type")
|
|
7900
7880
|
|
|
@@ -7906,7 +7886,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
7906
7886
|
@pulumi.getter(name="containerStartupHealthCheckTimeoutInSeconds")
|
|
7907
7887
|
def container_startup_health_check_timeout_in_seconds(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
7908
7888
|
"""
|
|
7909
|
-
|
|
7889
|
+
Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
|
|
7910
7890
|
"""
|
|
7911
7891
|
return pulumi.get(self, "container_startup_health_check_timeout_in_seconds")
|
|
7912
7892
|
|
|
@@ -7918,7 +7898,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
7918
7898
|
@pulumi.getter(name="coreDumpConfig")
|
|
7919
7899
|
def core_dump_config(self) -> Optional[pulumi.Input['EndpointConfigurationProductionVariantCoreDumpConfigArgs']]:
|
|
7920
7900
|
"""
|
|
7921
|
-
|
|
7901
|
+
Core dump configuration from the model container when the process crashes. Fields are documented below.
|
|
7922
7902
|
"""
|
|
7923
7903
|
return pulumi.get(self, "core_dump_config")
|
|
7924
7904
|
|
|
@@ -7930,7 +7910,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
7930
7910
|
@pulumi.getter(name="enableSsmAccess")
|
|
7931
7911
|
def enable_ssm_access(self) -> Optional[pulumi.Input[_builtins.bool]]:
|
|
7932
7912
|
"""
|
|
7933
|
-
|
|
7913
|
+
Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
7934
7914
|
"""
|
|
7935
7915
|
return pulumi.get(self, "enable_ssm_access")
|
|
7936
7916
|
|
|
@@ -7942,7 +7922,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
7942
7922
|
@pulumi.getter(name="inferenceAmiVersion")
|
|
7943
7923
|
def inference_ami_version(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
7944
7924
|
"""
|
|
7945
|
-
|
|
7925
|
+
Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
|
|
7946
7926
|
"""
|
|
7947
7927
|
return pulumi.get(self, "inference_ami_version")
|
|
7948
7928
|
|
|
@@ -7966,7 +7946,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
7966
7946
|
@pulumi.getter(name="initialVariantWeight")
|
|
7967
7947
|
def initial_variant_weight(self) -> Optional[pulumi.Input[_builtins.float]]:
|
|
7968
7948
|
"""
|
|
7969
|
-
|
|
7949
|
+
Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
7970
7950
|
"""
|
|
7971
7951
|
return pulumi.get(self, "initial_variant_weight")
|
|
7972
7952
|
|
|
@@ -7978,7 +7958,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
7978
7958
|
@pulumi.getter(name="instanceType")
|
|
7979
7959
|
def instance_type(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
7980
7960
|
"""
|
|
7981
|
-
|
|
7961
|
+
Type of instance to start.
|
|
7982
7962
|
"""
|
|
7983
7963
|
return pulumi.get(self, "instance_type")
|
|
7984
7964
|
|
|
@@ -7990,7 +7970,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
7990
7970
|
@pulumi.getter(name="managedInstanceScaling")
|
|
7991
7971
|
def managed_instance_scaling(self) -> Optional[pulumi.Input['EndpointConfigurationProductionVariantManagedInstanceScalingArgs']]:
|
|
7992
7972
|
"""
|
|
7993
|
-
|
|
7973
|
+
Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
|
|
7994
7974
|
"""
|
|
7995
7975
|
return pulumi.get(self, "managed_instance_scaling")
|
|
7996
7976
|
|
|
@@ -8002,7 +7982,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
8002
7982
|
@pulumi.getter(name="modelDataDownloadTimeoutInSeconds")
|
|
8003
7983
|
def model_data_download_timeout_in_seconds(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8004
7984
|
"""
|
|
8005
|
-
|
|
7985
|
+
Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
|
|
8006
7986
|
"""
|
|
8007
7987
|
return pulumi.get(self, "model_data_download_timeout_in_seconds")
|
|
8008
7988
|
|
|
@@ -8010,11 +7990,23 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
8010
7990
|
def model_data_download_timeout_in_seconds(self, value: Optional[pulumi.Input[_builtins.int]]):
|
|
8011
7991
|
pulumi.set(self, "model_data_download_timeout_in_seconds", value)
|
|
8012
7992
|
|
|
7993
|
+
@_builtins.property
|
|
7994
|
+
@pulumi.getter(name="modelName")
|
|
7995
|
+
def model_name(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
7996
|
+
"""
|
|
7997
|
+
Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
|
|
7998
|
+
"""
|
|
7999
|
+
return pulumi.get(self, "model_name")
|
|
8000
|
+
|
|
8001
|
+
@model_name.setter
|
|
8002
|
+
def model_name(self, value: Optional[pulumi.Input[_builtins.str]]):
|
|
8003
|
+
pulumi.set(self, "model_name", value)
|
|
8004
|
+
|
|
8013
8005
|
@_builtins.property
|
|
8014
8006
|
@pulumi.getter(name="routingConfigs")
|
|
8015
8007
|
def routing_configs(self) -> Optional[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationProductionVariantRoutingConfigArgs']]]]:
|
|
8016
8008
|
"""
|
|
8017
|
-
|
|
8009
|
+
How the endpoint routes incoming traffic. See routing_config below.
|
|
8018
8010
|
"""
|
|
8019
8011
|
return pulumi.get(self, "routing_configs")
|
|
8020
8012
|
|
|
@@ -8026,7 +8018,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
8026
8018
|
@pulumi.getter(name="serverlessConfig")
|
|
8027
8019
|
def serverless_config(self) -> Optional[pulumi.Input['EndpointConfigurationProductionVariantServerlessConfigArgs']]:
|
|
8028
8020
|
"""
|
|
8029
|
-
|
|
8021
|
+
How an endpoint performs asynchronous inference.
|
|
8030
8022
|
"""
|
|
8031
8023
|
return pulumi.get(self, "serverless_config")
|
|
8032
8024
|
|
|
@@ -8038,7 +8030,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
8038
8030
|
@pulumi.getter(name="variantName")
|
|
8039
8031
|
def variant_name(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
8040
8032
|
"""
|
|
8041
|
-
|
|
8033
|
+
Name of the variant. If omitted, the provider will assign a random, unique name.
|
|
8042
8034
|
"""
|
|
8043
8035
|
return pulumi.get(self, "variant_name")
|
|
8044
8036
|
|
|
@@ -8050,7 +8042,7 @@ class EndpointConfigurationProductionVariantArgs:
|
|
|
8050
8042
|
@pulumi.getter(name="volumeSizeInGb")
|
|
8051
8043
|
def volume_size_in_gb(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8052
8044
|
"""
|
|
8053
|
-
|
|
8045
|
+
Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
|
|
8054
8046
|
"""
|
|
8055
8047
|
return pulumi.get(self, "volume_size_in_gb")
|
|
8056
8048
|
|
|
@@ -8063,11 +8055,11 @@ if not MYPY:
|
|
|
8063
8055
|
class EndpointConfigurationProductionVariantCoreDumpConfigArgsDict(TypedDict):
|
|
8064
8056
|
destination_s3_uri: pulumi.Input[_builtins.str]
|
|
8065
8057
|
"""
|
|
8066
|
-
|
|
8058
|
+
S3 bucket to send the core dump to.
|
|
8067
8059
|
"""
|
|
8068
8060
|
kms_key_id: NotRequired[pulumi.Input[_builtins.str]]
|
|
8069
8061
|
"""
|
|
8070
|
-
|
|
8062
|
+
KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
|
|
8071
8063
|
"""
|
|
8072
8064
|
elif False:
|
|
8073
8065
|
EndpointConfigurationProductionVariantCoreDumpConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -8078,8 +8070,8 @@ class EndpointConfigurationProductionVariantCoreDumpConfigArgs:
|
|
|
8078
8070
|
destination_s3_uri: pulumi.Input[_builtins.str],
|
|
8079
8071
|
kms_key_id: Optional[pulumi.Input[_builtins.str]] = None):
|
|
8080
8072
|
"""
|
|
8081
|
-
:param pulumi.Input[_builtins.str] destination_s3_uri:
|
|
8082
|
-
:param pulumi.Input[_builtins.str] kms_key_id:
|
|
8073
|
+
:param pulumi.Input[_builtins.str] destination_s3_uri: S3 bucket to send the core dump to.
|
|
8074
|
+
:param pulumi.Input[_builtins.str] kms_key_id: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
|
|
8083
8075
|
"""
|
|
8084
8076
|
pulumi.set(__self__, "destination_s3_uri", destination_s3_uri)
|
|
8085
8077
|
if kms_key_id is not None:
|
|
@@ -8089,7 +8081,7 @@ class EndpointConfigurationProductionVariantCoreDumpConfigArgs:
|
|
|
8089
8081
|
@pulumi.getter(name="destinationS3Uri")
|
|
8090
8082
|
def destination_s3_uri(self) -> pulumi.Input[_builtins.str]:
|
|
8091
8083
|
"""
|
|
8092
|
-
|
|
8084
|
+
S3 bucket to send the core dump to.
|
|
8093
8085
|
"""
|
|
8094
8086
|
return pulumi.get(self, "destination_s3_uri")
|
|
8095
8087
|
|
|
@@ -8101,7 +8093,7 @@ class EndpointConfigurationProductionVariantCoreDumpConfigArgs:
|
|
|
8101
8093
|
@pulumi.getter(name="kmsKeyId")
|
|
8102
8094
|
def kms_key_id(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
8103
8095
|
"""
|
|
8104
|
-
|
|
8096
|
+
KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
|
|
8105
8097
|
"""
|
|
8106
8098
|
return pulumi.get(self, "kms_key_id")
|
|
8107
8099
|
|
|
@@ -8114,15 +8106,15 @@ if not MYPY:
|
|
|
8114
8106
|
class EndpointConfigurationProductionVariantManagedInstanceScalingArgsDict(TypedDict):
|
|
8115
8107
|
max_instance_count: NotRequired[pulumi.Input[_builtins.int]]
|
|
8116
8108
|
"""
|
|
8117
|
-
|
|
8109
|
+
Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
|
|
8118
8110
|
"""
|
|
8119
8111
|
min_instance_count: NotRequired[pulumi.Input[_builtins.int]]
|
|
8120
8112
|
"""
|
|
8121
|
-
|
|
8113
|
+
Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
|
|
8122
8114
|
"""
|
|
8123
8115
|
status: NotRequired[pulumi.Input[_builtins.str]]
|
|
8124
8116
|
"""
|
|
8125
|
-
|
|
8117
|
+
Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
|
|
8126
8118
|
"""
|
|
8127
8119
|
elif False:
|
|
8128
8120
|
EndpointConfigurationProductionVariantManagedInstanceScalingArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -8134,9 +8126,9 @@ class EndpointConfigurationProductionVariantManagedInstanceScalingArgs:
|
|
|
8134
8126
|
min_instance_count: Optional[pulumi.Input[_builtins.int]] = None,
|
|
8135
8127
|
status: Optional[pulumi.Input[_builtins.str]] = None):
|
|
8136
8128
|
"""
|
|
8137
|
-
:param pulumi.Input[_builtins.int] max_instance_count:
|
|
8138
|
-
:param pulumi.Input[_builtins.int] min_instance_count:
|
|
8139
|
-
:param pulumi.Input[_builtins.str] status:
|
|
8129
|
+
:param pulumi.Input[_builtins.int] max_instance_count: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
|
|
8130
|
+
:param pulumi.Input[_builtins.int] min_instance_count: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
|
|
8131
|
+
:param pulumi.Input[_builtins.str] status: Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
|
|
8140
8132
|
"""
|
|
8141
8133
|
if max_instance_count is not None:
|
|
8142
8134
|
pulumi.set(__self__, "max_instance_count", max_instance_count)
|
|
@@ -8149,7 +8141,7 @@ class EndpointConfigurationProductionVariantManagedInstanceScalingArgs:
|
|
|
8149
8141
|
@pulumi.getter(name="maxInstanceCount")
|
|
8150
8142
|
def max_instance_count(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8151
8143
|
"""
|
|
8152
|
-
|
|
8144
|
+
Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
|
|
8153
8145
|
"""
|
|
8154
8146
|
return pulumi.get(self, "max_instance_count")
|
|
8155
8147
|
|
|
@@ -8161,7 +8153,7 @@ class EndpointConfigurationProductionVariantManagedInstanceScalingArgs:
|
|
|
8161
8153
|
@pulumi.getter(name="minInstanceCount")
|
|
8162
8154
|
def min_instance_count(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8163
8155
|
"""
|
|
8164
|
-
|
|
8156
|
+
Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
|
|
8165
8157
|
"""
|
|
8166
8158
|
return pulumi.get(self, "min_instance_count")
|
|
8167
8159
|
|
|
@@ -8173,7 +8165,7 @@ class EndpointConfigurationProductionVariantManagedInstanceScalingArgs:
|
|
|
8173
8165
|
@pulumi.getter
|
|
8174
8166
|
def status(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
8175
8167
|
"""
|
|
8176
|
-
|
|
8168
|
+
Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
|
|
8177
8169
|
"""
|
|
8178
8170
|
return pulumi.get(self, "status")
|
|
8179
8171
|
|
|
@@ -8186,7 +8178,7 @@ if not MYPY:
|
|
|
8186
8178
|
class EndpointConfigurationProductionVariantRoutingConfigArgsDict(TypedDict):
|
|
8187
8179
|
routing_strategy: pulumi.Input[_builtins.str]
|
|
8188
8180
|
"""
|
|
8189
|
-
|
|
8181
|
+
How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
|
|
8190
8182
|
"""
|
|
8191
8183
|
elif False:
|
|
8192
8184
|
EndpointConfigurationProductionVariantRoutingConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -8196,7 +8188,7 @@ class EndpointConfigurationProductionVariantRoutingConfigArgs:
|
|
|
8196
8188
|
def __init__(__self__, *,
|
|
8197
8189
|
routing_strategy: pulumi.Input[_builtins.str]):
|
|
8198
8190
|
"""
|
|
8199
|
-
:param pulumi.Input[_builtins.str] routing_strategy:
|
|
8191
|
+
:param pulumi.Input[_builtins.str] routing_strategy: How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
|
|
8200
8192
|
"""
|
|
8201
8193
|
pulumi.set(__self__, "routing_strategy", routing_strategy)
|
|
8202
8194
|
|
|
@@ -8204,7 +8196,7 @@ class EndpointConfigurationProductionVariantRoutingConfigArgs:
|
|
|
8204
8196
|
@pulumi.getter(name="routingStrategy")
|
|
8205
8197
|
def routing_strategy(self) -> pulumi.Input[_builtins.str]:
|
|
8206
8198
|
"""
|
|
8207
|
-
|
|
8199
|
+
How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
|
|
8208
8200
|
"""
|
|
8209
8201
|
return pulumi.get(self, "routing_strategy")
|
|
8210
8202
|
|
|
@@ -8217,15 +8209,15 @@ if not MYPY:
|
|
|
8217
8209
|
class EndpointConfigurationProductionVariantServerlessConfigArgsDict(TypedDict):
|
|
8218
8210
|
max_concurrency: pulumi.Input[_builtins.int]
|
|
8219
8211
|
"""
|
|
8220
|
-
|
|
8212
|
+
Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
|
|
8221
8213
|
"""
|
|
8222
8214
|
memory_size_in_mb: pulumi.Input[_builtins.int]
|
|
8223
8215
|
"""
|
|
8224
|
-
|
|
8216
|
+
Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
|
|
8225
8217
|
"""
|
|
8226
8218
|
provisioned_concurrency: NotRequired[pulumi.Input[_builtins.int]]
|
|
8227
8219
|
"""
|
|
8228
|
-
|
|
8220
|
+
Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
|
|
8229
8221
|
"""
|
|
8230
8222
|
elif False:
|
|
8231
8223
|
EndpointConfigurationProductionVariantServerlessConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -8237,9 +8229,9 @@ class EndpointConfigurationProductionVariantServerlessConfigArgs:
|
|
|
8237
8229
|
memory_size_in_mb: pulumi.Input[_builtins.int],
|
|
8238
8230
|
provisioned_concurrency: Optional[pulumi.Input[_builtins.int]] = None):
|
|
8239
8231
|
"""
|
|
8240
|
-
:param pulumi.Input[_builtins.int] max_concurrency:
|
|
8241
|
-
:param pulumi.Input[_builtins.int] memory_size_in_mb:
|
|
8242
|
-
:param pulumi.Input[_builtins.int] provisioned_concurrency:
|
|
8232
|
+
:param pulumi.Input[_builtins.int] max_concurrency: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
|
|
8233
|
+
:param pulumi.Input[_builtins.int] memory_size_in_mb: Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
|
|
8234
|
+
:param pulumi.Input[_builtins.int] provisioned_concurrency: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
|
|
8243
8235
|
"""
|
|
8244
8236
|
pulumi.set(__self__, "max_concurrency", max_concurrency)
|
|
8245
8237
|
pulumi.set(__self__, "memory_size_in_mb", memory_size_in_mb)
|
|
@@ -8250,7 +8242,7 @@ class EndpointConfigurationProductionVariantServerlessConfigArgs:
|
|
|
8250
8242
|
@pulumi.getter(name="maxConcurrency")
|
|
8251
8243
|
def max_concurrency(self) -> pulumi.Input[_builtins.int]:
|
|
8252
8244
|
"""
|
|
8253
|
-
|
|
8245
|
+
Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
|
|
8254
8246
|
"""
|
|
8255
8247
|
return pulumi.get(self, "max_concurrency")
|
|
8256
8248
|
|
|
@@ -8262,7 +8254,7 @@ class EndpointConfigurationProductionVariantServerlessConfigArgs:
|
|
|
8262
8254
|
@pulumi.getter(name="memorySizeInMb")
|
|
8263
8255
|
def memory_size_in_mb(self) -> pulumi.Input[_builtins.int]:
|
|
8264
8256
|
"""
|
|
8265
|
-
|
|
8257
|
+
Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
|
|
8266
8258
|
"""
|
|
8267
8259
|
return pulumi.get(self, "memory_size_in_mb")
|
|
8268
8260
|
|
|
@@ -8274,7 +8266,7 @@ class EndpointConfigurationProductionVariantServerlessConfigArgs:
|
|
|
8274
8266
|
@pulumi.getter(name="provisionedConcurrency")
|
|
8275
8267
|
def provisioned_concurrency(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8276
8268
|
"""
|
|
8277
|
-
|
|
8269
|
+
Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
|
|
8278
8270
|
"""
|
|
8279
8271
|
return pulumi.get(self, "provisioned_concurrency")
|
|
8280
8272
|
|
|
@@ -8285,29 +8277,25 @@ class EndpointConfigurationProductionVariantServerlessConfigArgs:
|
|
|
8285
8277
|
|
|
8286
8278
|
if not MYPY:
|
|
8287
8279
|
class EndpointConfigurationShadowProductionVariantArgsDict(TypedDict):
|
|
8288
|
-
model_name: pulumi.Input[_builtins.str]
|
|
8289
|
-
"""
|
|
8290
|
-
The name of the model to use.
|
|
8291
|
-
"""
|
|
8292
8280
|
accelerator_type: NotRequired[pulumi.Input[_builtins.str]]
|
|
8293
8281
|
"""
|
|
8294
|
-
|
|
8282
|
+
Size of the Elastic Inference (EI) instance to use for the production variant.
|
|
8295
8283
|
"""
|
|
8296
8284
|
container_startup_health_check_timeout_in_seconds: NotRequired[pulumi.Input[_builtins.int]]
|
|
8297
8285
|
"""
|
|
8298
|
-
|
|
8286
|
+
Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
|
|
8299
8287
|
"""
|
|
8300
8288
|
core_dump_config: NotRequired[pulumi.Input['EndpointConfigurationShadowProductionVariantCoreDumpConfigArgsDict']]
|
|
8301
8289
|
"""
|
|
8302
|
-
|
|
8290
|
+
Core dump configuration from the model container when the process crashes. Fields are documented below.
|
|
8303
8291
|
"""
|
|
8304
8292
|
enable_ssm_access: NotRequired[pulumi.Input[_builtins.bool]]
|
|
8305
8293
|
"""
|
|
8306
|
-
|
|
8294
|
+
Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
8307
8295
|
"""
|
|
8308
8296
|
inference_ami_version: NotRequired[pulumi.Input[_builtins.str]]
|
|
8309
8297
|
"""
|
|
8310
|
-
|
|
8298
|
+
Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
|
|
8311
8299
|
"""
|
|
8312
8300
|
initial_instance_count: NotRequired[pulumi.Input[_builtins.int]]
|
|
8313
8301
|
"""
|
|
@@ -8315,35 +8303,39 @@ if not MYPY:
|
|
|
8315
8303
|
"""
|
|
8316
8304
|
initial_variant_weight: NotRequired[pulumi.Input[_builtins.float]]
|
|
8317
8305
|
"""
|
|
8318
|
-
|
|
8306
|
+
Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
8319
8307
|
"""
|
|
8320
8308
|
instance_type: NotRequired[pulumi.Input[_builtins.str]]
|
|
8321
8309
|
"""
|
|
8322
|
-
|
|
8310
|
+
Type of instance to start.
|
|
8323
8311
|
"""
|
|
8324
8312
|
managed_instance_scaling: NotRequired[pulumi.Input['EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgsDict']]
|
|
8325
8313
|
"""
|
|
8326
|
-
|
|
8314
|
+
Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
|
|
8327
8315
|
"""
|
|
8328
8316
|
model_data_download_timeout_in_seconds: NotRequired[pulumi.Input[_builtins.int]]
|
|
8329
8317
|
"""
|
|
8330
|
-
|
|
8318
|
+
Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
|
|
8319
|
+
"""
|
|
8320
|
+
model_name: NotRequired[pulumi.Input[_builtins.str]]
|
|
8321
|
+
"""
|
|
8322
|
+
Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
|
|
8331
8323
|
"""
|
|
8332
8324
|
routing_configs: NotRequired[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationShadowProductionVariantRoutingConfigArgsDict']]]]
|
|
8333
8325
|
"""
|
|
8334
|
-
|
|
8326
|
+
How the endpoint routes incoming traffic. See routing_config below.
|
|
8335
8327
|
"""
|
|
8336
8328
|
serverless_config: NotRequired[pulumi.Input['EndpointConfigurationShadowProductionVariantServerlessConfigArgsDict']]
|
|
8337
8329
|
"""
|
|
8338
|
-
|
|
8330
|
+
How an endpoint performs asynchronous inference.
|
|
8339
8331
|
"""
|
|
8340
8332
|
variant_name: NotRequired[pulumi.Input[_builtins.str]]
|
|
8341
8333
|
"""
|
|
8342
|
-
|
|
8334
|
+
Name of the variant. If omitted, the provider will assign a random, unique name.
|
|
8343
8335
|
"""
|
|
8344
8336
|
volume_size_in_gb: NotRequired[pulumi.Input[_builtins.int]]
|
|
8345
8337
|
"""
|
|
8346
|
-
|
|
8338
|
+
Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
|
|
8347
8339
|
"""
|
|
8348
8340
|
elif False:
|
|
8349
8341
|
EndpointConfigurationShadowProductionVariantArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -8351,7 +8343,6 @@ elif False:
|
|
|
8351
8343
|
@pulumi.input_type
|
|
8352
8344
|
class EndpointConfigurationShadowProductionVariantArgs:
|
|
8353
8345
|
def __init__(__self__, *,
|
|
8354
|
-
model_name: pulumi.Input[_builtins.str],
|
|
8355
8346
|
accelerator_type: Optional[pulumi.Input[_builtins.str]] = None,
|
|
8356
8347
|
container_startup_health_check_timeout_in_seconds: Optional[pulumi.Input[_builtins.int]] = None,
|
|
8357
8348
|
core_dump_config: Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs']] = None,
|
|
@@ -8362,28 +8353,28 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8362
8353
|
instance_type: Optional[pulumi.Input[_builtins.str]] = None,
|
|
8363
8354
|
managed_instance_scaling: Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs']] = None,
|
|
8364
8355
|
model_data_download_timeout_in_seconds: Optional[pulumi.Input[_builtins.int]] = None,
|
|
8356
|
+
model_name: Optional[pulumi.Input[_builtins.str]] = None,
|
|
8365
8357
|
routing_configs: Optional[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationShadowProductionVariantRoutingConfigArgs']]]] = None,
|
|
8366
8358
|
serverless_config: Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantServerlessConfigArgs']] = None,
|
|
8367
8359
|
variant_name: Optional[pulumi.Input[_builtins.str]] = None,
|
|
8368
8360
|
volume_size_in_gb: Optional[pulumi.Input[_builtins.int]] = None):
|
|
8369
8361
|
"""
|
|
8370
|
-
:param pulumi.Input[_builtins.str]
|
|
8371
|
-
:param pulumi.Input[_builtins.
|
|
8372
|
-
:param pulumi.Input[
|
|
8373
|
-
:param pulumi.Input[
|
|
8374
|
-
:param pulumi.Input[_builtins.
|
|
8375
|
-
:param pulumi.Input[_builtins.str] inference_ami_version: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
|
|
8362
|
+
:param pulumi.Input[_builtins.str] accelerator_type: Size of the Elastic Inference (EI) instance to use for the production variant.
|
|
8363
|
+
:param pulumi.Input[_builtins.int] container_startup_health_check_timeout_in_seconds: Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
|
|
8364
|
+
:param pulumi.Input['EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs'] core_dump_config: Core dump configuration from the model container when the process crashes. Fields are documented below.
|
|
8365
|
+
:param pulumi.Input[_builtins.bool] enable_ssm_access: Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
8366
|
+
:param pulumi.Input[_builtins.str] inference_ami_version: Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
|
|
8376
8367
|
:param pulumi.Input[_builtins.int] initial_instance_count: Initial number of instances used for auto-scaling.
|
|
8377
|
-
:param pulumi.Input[_builtins.float] initial_variant_weight:
|
|
8378
|
-
:param pulumi.Input[_builtins.str] instance_type:
|
|
8379
|
-
:param pulumi.Input['EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs'] managed_instance_scaling:
|
|
8380
|
-
:param pulumi.Input[_builtins.int] model_data_download_timeout_in_seconds:
|
|
8381
|
-
:param pulumi.Input[
|
|
8382
|
-
:param pulumi.Input['
|
|
8383
|
-
:param pulumi.Input[
|
|
8384
|
-
:param pulumi.Input[_builtins.
|
|
8385
|
-
|
|
8386
|
-
|
|
8368
|
+
:param pulumi.Input[_builtins.float] initial_variant_weight: Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
8369
|
+
:param pulumi.Input[_builtins.str] instance_type: Type of instance to start.
|
|
8370
|
+
:param pulumi.Input['EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs'] managed_instance_scaling: Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
|
|
8371
|
+
:param pulumi.Input[_builtins.int] model_data_download_timeout_in_seconds: Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
|
|
8372
|
+
:param pulumi.Input[_builtins.str] model_name: Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
|
|
8373
|
+
:param pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationShadowProductionVariantRoutingConfigArgs']]] routing_configs: How the endpoint routes incoming traffic. See routing_config below.
|
|
8374
|
+
:param pulumi.Input['EndpointConfigurationShadowProductionVariantServerlessConfigArgs'] serverless_config: How an endpoint performs asynchronous inference.
|
|
8375
|
+
:param pulumi.Input[_builtins.str] variant_name: Name of the variant. If omitted, the provider will assign a random, unique name.
|
|
8376
|
+
:param pulumi.Input[_builtins.int] volume_size_in_gb: Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
|
|
8377
|
+
"""
|
|
8387
8378
|
if accelerator_type is not None:
|
|
8388
8379
|
pulumi.set(__self__, "accelerator_type", accelerator_type)
|
|
8389
8380
|
if container_startup_health_check_timeout_in_seconds is not None:
|
|
@@ -8404,6 +8395,8 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8404
8395
|
pulumi.set(__self__, "managed_instance_scaling", managed_instance_scaling)
|
|
8405
8396
|
if model_data_download_timeout_in_seconds is not None:
|
|
8406
8397
|
pulumi.set(__self__, "model_data_download_timeout_in_seconds", model_data_download_timeout_in_seconds)
|
|
8398
|
+
if model_name is not None:
|
|
8399
|
+
pulumi.set(__self__, "model_name", model_name)
|
|
8407
8400
|
if routing_configs is not None:
|
|
8408
8401
|
pulumi.set(__self__, "routing_configs", routing_configs)
|
|
8409
8402
|
if serverless_config is not None:
|
|
@@ -8413,23 +8406,11 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8413
8406
|
if volume_size_in_gb is not None:
|
|
8414
8407
|
pulumi.set(__self__, "volume_size_in_gb", volume_size_in_gb)
|
|
8415
8408
|
|
|
8416
|
-
@_builtins.property
|
|
8417
|
-
@pulumi.getter(name="modelName")
|
|
8418
|
-
def model_name(self) -> pulumi.Input[_builtins.str]:
|
|
8419
|
-
"""
|
|
8420
|
-
The name of the model to use.
|
|
8421
|
-
"""
|
|
8422
|
-
return pulumi.get(self, "model_name")
|
|
8423
|
-
|
|
8424
|
-
@model_name.setter
|
|
8425
|
-
def model_name(self, value: pulumi.Input[_builtins.str]):
|
|
8426
|
-
pulumi.set(self, "model_name", value)
|
|
8427
|
-
|
|
8428
8409
|
@_builtins.property
|
|
8429
8410
|
@pulumi.getter(name="acceleratorType")
|
|
8430
8411
|
def accelerator_type(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
8431
8412
|
"""
|
|
8432
|
-
|
|
8413
|
+
Size of the Elastic Inference (EI) instance to use for the production variant.
|
|
8433
8414
|
"""
|
|
8434
8415
|
return pulumi.get(self, "accelerator_type")
|
|
8435
8416
|
|
|
@@ -8441,7 +8422,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8441
8422
|
@pulumi.getter(name="containerStartupHealthCheckTimeoutInSeconds")
|
|
8442
8423
|
def container_startup_health_check_timeout_in_seconds(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8443
8424
|
"""
|
|
8444
|
-
|
|
8425
|
+
Timeout value, in seconds, for your inference container to pass health check by SageMaker AI Hosting. For more information about health check, see [How Your Container Should Respond to Health Check (Ping) Requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests). Valid values between `60` and `3600`.
|
|
8445
8426
|
"""
|
|
8446
8427
|
return pulumi.get(self, "container_startup_health_check_timeout_in_seconds")
|
|
8447
8428
|
|
|
@@ -8453,7 +8434,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8453
8434
|
@pulumi.getter(name="coreDumpConfig")
|
|
8454
8435
|
def core_dump_config(self) -> Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs']]:
|
|
8455
8436
|
"""
|
|
8456
|
-
|
|
8437
|
+
Core dump configuration from the model container when the process crashes. Fields are documented below.
|
|
8457
8438
|
"""
|
|
8458
8439
|
return pulumi.get(self, "core_dump_config")
|
|
8459
8440
|
|
|
@@ -8465,7 +8446,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8465
8446
|
@pulumi.getter(name="enableSsmAccess")
|
|
8466
8447
|
def enable_ssm_access(self) -> Optional[pulumi.Input[_builtins.bool]]:
|
|
8467
8448
|
"""
|
|
8468
|
-
|
|
8449
|
+
Whether to turn on native AWS SSM access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind endpoints. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
8469
8450
|
"""
|
|
8470
8451
|
return pulumi.get(self, "enable_ssm_access")
|
|
8471
8452
|
|
|
@@ -8477,7 +8458,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8477
8458
|
@pulumi.getter(name="inferenceAmiVersion")
|
|
8478
8459
|
def inference_ami_version(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
8479
8460
|
"""
|
|
8480
|
-
|
|
8461
|
+
Option from a collection of preconfigured AMI images. Each image is configured by AWS with a set of software and driver versions. AWS optimizes these configurations for different machine learning workloads.
|
|
8481
8462
|
"""
|
|
8482
8463
|
return pulumi.get(self, "inference_ami_version")
|
|
8483
8464
|
|
|
@@ -8501,7 +8482,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8501
8482
|
@pulumi.getter(name="initialVariantWeight")
|
|
8502
8483
|
def initial_variant_weight(self) -> Optional[pulumi.Input[_builtins.float]]:
|
|
8503
8484
|
"""
|
|
8504
|
-
|
|
8485
|
+
Initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, defaults to `1.0`. Ignored if `model_name` is not set (Inference Components endpoint).
|
|
8505
8486
|
"""
|
|
8506
8487
|
return pulumi.get(self, "initial_variant_weight")
|
|
8507
8488
|
|
|
@@ -8513,7 +8494,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8513
8494
|
@pulumi.getter(name="instanceType")
|
|
8514
8495
|
def instance_type(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
8515
8496
|
"""
|
|
8516
|
-
|
|
8497
|
+
Type of instance to start.
|
|
8517
8498
|
"""
|
|
8518
8499
|
return pulumi.get(self, "instance_type")
|
|
8519
8500
|
|
|
@@ -8525,7 +8506,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8525
8506
|
@pulumi.getter(name="managedInstanceScaling")
|
|
8526
8507
|
def managed_instance_scaling(self) -> Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs']]:
|
|
8527
8508
|
"""
|
|
8528
|
-
|
|
8509
|
+
Control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
|
|
8529
8510
|
"""
|
|
8530
8511
|
return pulumi.get(self, "managed_instance_scaling")
|
|
8531
8512
|
|
|
@@ -8537,7 +8518,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8537
8518
|
@pulumi.getter(name="modelDataDownloadTimeoutInSeconds")
|
|
8538
8519
|
def model_data_download_timeout_in_seconds(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8539
8520
|
"""
|
|
8540
|
-
|
|
8521
|
+
Timeout value, in seconds, to download and extract the model that you want to host from S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
|
|
8541
8522
|
"""
|
|
8542
8523
|
return pulumi.get(self, "model_data_download_timeout_in_seconds")
|
|
8543
8524
|
|
|
@@ -8545,11 +8526,23 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8545
8526
|
def model_data_download_timeout_in_seconds(self, value: Optional[pulumi.Input[_builtins.int]]):
|
|
8546
8527
|
pulumi.set(self, "model_data_download_timeout_in_seconds", value)
|
|
8547
8528
|
|
|
8529
|
+
@_builtins.property
|
|
8530
|
+
@pulumi.getter(name="modelName")
|
|
8531
|
+
def model_name(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
8532
|
+
"""
|
|
8533
|
+
Name of the model to use. Required unless using Inference Components (in which case `execution_role_arn` must be specified at the endpoint configuration level).
|
|
8534
|
+
"""
|
|
8535
|
+
return pulumi.get(self, "model_name")
|
|
8536
|
+
|
|
8537
|
+
@model_name.setter
|
|
8538
|
+
def model_name(self, value: Optional[pulumi.Input[_builtins.str]]):
|
|
8539
|
+
pulumi.set(self, "model_name", value)
|
|
8540
|
+
|
|
8548
8541
|
@_builtins.property
|
|
8549
8542
|
@pulumi.getter(name="routingConfigs")
|
|
8550
8543
|
def routing_configs(self) -> Optional[pulumi.Input[Sequence[pulumi.Input['EndpointConfigurationShadowProductionVariantRoutingConfigArgs']]]]:
|
|
8551
8544
|
"""
|
|
8552
|
-
|
|
8545
|
+
How the endpoint routes incoming traffic. See routing_config below.
|
|
8553
8546
|
"""
|
|
8554
8547
|
return pulumi.get(self, "routing_configs")
|
|
8555
8548
|
|
|
@@ -8561,7 +8554,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8561
8554
|
@pulumi.getter(name="serverlessConfig")
|
|
8562
8555
|
def serverless_config(self) -> Optional[pulumi.Input['EndpointConfigurationShadowProductionVariantServerlessConfigArgs']]:
|
|
8563
8556
|
"""
|
|
8564
|
-
|
|
8557
|
+
How an endpoint performs asynchronous inference.
|
|
8565
8558
|
"""
|
|
8566
8559
|
return pulumi.get(self, "serverless_config")
|
|
8567
8560
|
|
|
@@ -8573,7 +8566,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8573
8566
|
@pulumi.getter(name="variantName")
|
|
8574
8567
|
def variant_name(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
8575
8568
|
"""
|
|
8576
|
-
|
|
8569
|
+
Name of the variant. If omitted, the provider will assign a random, unique name.
|
|
8577
8570
|
"""
|
|
8578
8571
|
return pulumi.get(self, "variant_name")
|
|
8579
8572
|
|
|
@@ -8585,7 +8578,7 @@ class EndpointConfigurationShadowProductionVariantArgs:
|
|
|
8585
8578
|
@pulumi.getter(name="volumeSizeInGb")
|
|
8586
8579
|
def volume_size_in_gb(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8587
8580
|
"""
|
|
8588
|
-
|
|
8581
|
+
Size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
|
|
8589
8582
|
"""
|
|
8590
8583
|
return pulumi.get(self, "volume_size_in_gb")
|
|
8591
8584
|
|
|
@@ -8598,11 +8591,11 @@ if not MYPY:
|
|
|
8598
8591
|
class EndpointConfigurationShadowProductionVariantCoreDumpConfigArgsDict(TypedDict):
|
|
8599
8592
|
destination_s3_uri: pulumi.Input[_builtins.str]
|
|
8600
8593
|
"""
|
|
8601
|
-
|
|
8594
|
+
S3 bucket to send the core dump to.
|
|
8602
8595
|
"""
|
|
8603
8596
|
kms_key_id: pulumi.Input[_builtins.str]
|
|
8604
8597
|
"""
|
|
8605
|
-
|
|
8598
|
+
KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
|
|
8606
8599
|
"""
|
|
8607
8600
|
elif False:
|
|
8608
8601
|
EndpointConfigurationShadowProductionVariantCoreDumpConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -8613,8 +8606,8 @@ class EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs:
|
|
|
8613
8606
|
destination_s3_uri: pulumi.Input[_builtins.str],
|
|
8614
8607
|
kms_key_id: pulumi.Input[_builtins.str]):
|
|
8615
8608
|
"""
|
|
8616
|
-
:param pulumi.Input[_builtins.str] destination_s3_uri:
|
|
8617
|
-
:param pulumi.Input[_builtins.str] kms_key_id:
|
|
8609
|
+
:param pulumi.Input[_builtins.str] destination_s3_uri: S3 bucket to send the core dump to.
|
|
8610
|
+
:param pulumi.Input[_builtins.str] kms_key_id: KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
|
|
8618
8611
|
"""
|
|
8619
8612
|
pulumi.set(__self__, "destination_s3_uri", destination_s3_uri)
|
|
8620
8613
|
pulumi.set(__self__, "kms_key_id", kms_key_id)
|
|
@@ -8623,7 +8616,7 @@ class EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs:
|
|
|
8623
8616
|
@pulumi.getter(name="destinationS3Uri")
|
|
8624
8617
|
def destination_s3_uri(self) -> pulumi.Input[_builtins.str]:
|
|
8625
8618
|
"""
|
|
8626
|
-
|
|
8619
|
+
S3 bucket to send the core dump to.
|
|
8627
8620
|
"""
|
|
8628
8621
|
return pulumi.get(self, "destination_s3_uri")
|
|
8629
8622
|
|
|
@@ -8635,7 +8628,7 @@ class EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs:
|
|
|
8635
8628
|
@pulumi.getter(name="kmsKeyId")
|
|
8636
8629
|
def kms_key_id(self) -> pulumi.Input[_builtins.str]:
|
|
8637
8630
|
"""
|
|
8638
|
-
|
|
8631
|
+
KMS key that SageMaker AI uses to encrypt the core dump data at rest using S3 server-side encryption.
|
|
8639
8632
|
"""
|
|
8640
8633
|
return pulumi.get(self, "kms_key_id")
|
|
8641
8634
|
|
|
@@ -8648,15 +8641,15 @@ if not MYPY:
|
|
|
8648
8641
|
class EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgsDict(TypedDict):
|
|
8649
8642
|
max_instance_count: NotRequired[pulumi.Input[_builtins.int]]
|
|
8650
8643
|
"""
|
|
8651
|
-
|
|
8644
|
+
Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
|
|
8652
8645
|
"""
|
|
8653
8646
|
min_instance_count: NotRequired[pulumi.Input[_builtins.int]]
|
|
8654
8647
|
"""
|
|
8655
|
-
|
|
8648
|
+
Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
|
|
8656
8649
|
"""
|
|
8657
8650
|
status: NotRequired[pulumi.Input[_builtins.str]]
|
|
8658
8651
|
"""
|
|
8659
|
-
|
|
8652
|
+
Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
|
|
8660
8653
|
"""
|
|
8661
8654
|
elif False:
|
|
8662
8655
|
EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -8668,9 +8661,9 @@ class EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs:
|
|
|
8668
8661
|
min_instance_count: Optional[pulumi.Input[_builtins.int]] = None,
|
|
8669
8662
|
status: Optional[pulumi.Input[_builtins.str]] = None):
|
|
8670
8663
|
"""
|
|
8671
|
-
:param pulumi.Input[_builtins.int] max_instance_count:
|
|
8672
|
-
:param pulumi.Input[_builtins.int] min_instance_count:
|
|
8673
|
-
:param pulumi.Input[_builtins.str] status:
|
|
8664
|
+
:param pulumi.Input[_builtins.int] max_instance_count: Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
|
|
8665
|
+
:param pulumi.Input[_builtins.int] min_instance_count: Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
|
|
8666
|
+
:param pulumi.Input[_builtins.str] status: Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
|
|
8674
8667
|
"""
|
|
8675
8668
|
if max_instance_count is not None:
|
|
8676
8669
|
pulumi.set(__self__, "max_instance_count", max_instance_count)
|
|
@@ -8683,7 +8676,7 @@ class EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs:
|
|
|
8683
8676
|
@pulumi.getter(name="maxInstanceCount")
|
|
8684
8677
|
def max_instance_count(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8685
8678
|
"""
|
|
8686
|
-
|
|
8679
|
+
Maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
|
|
8687
8680
|
"""
|
|
8688
8681
|
return pulumi.get(self, "max_instance_count")
|
|
8689
8682
|
|
|
@@ -8695,7 +8688,7 @@ class EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs:
|
|
|
8695
8688
|
@pulumi.getter(name="minInstanceCount")
|
|
8696
8689
|
def min_instance_count(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8697
8690
|
"""
|
|
8698
|
-
|
|
8691
|
+
Minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
|
|
8699
8692
|
"""
|
|
8700
8693
|
return pulumi.get(self, "min_instance_count")
|
|
8701
8694
|
|
|
@@ -8707,7 +8700,7 @@ class EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs:
|
|
|
8707
8700
|
@pulumi.getter
|
|
8708
8701
|
def status(self) -> Optional[pulumi.Input[_builtins.str]]:
|
|
8709
8702
|
"""
|
|
8710
|
-
|
|
8703
|
+
Whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
|
|
8711
8704
|
"""
|
|
8712
8705
|
return pulumi.get(self, "status")
|
|
8713
8706
|
|
|
@@ -8720,7 +8713,7 @@ if not MYPY:
|
|
|
8720
8713
|
class EndpointConfigurationShadowProductionVariantRoutingConfigArgsDict(TypedDict):
|
|
8721
8714
|
routing_strategy: pulumi.Input[_builtins.str]
|
|
8722
8715
|
"""
|
|
8723
|
-
|
|
8716
|
+
How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
|
|
8724
8717
|
"""
|
|
8725
8718
|
elif False:
|
|
8726
8719
|
EndpointConfigurationShadowProductionVariantRoutingConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -8730,7 +8723,7 @@ class EndpointConfigurationShadowProductionVariantRoutingConfigArgs:
|
|
|
8730
8723
|
def __init__(__self__, *,
|
|
8731
8724
|
routing_strategy: pulumi.Input[_builtins.str]):
|
|
8732
8725
|
"""
|
|
8733
|
-
:param pulumi.Input[_builtins.str] routing_strategy:
|
|
8726
|
+
:param pulumi.Input[_builtins.str] routing_strategy: How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
|
|
8734
8727
|
"""
|
|
8735
8728
|
pulumi.set(__self__, "routing_strategy", routing_strategy)
|
|
8736
8729
|
|
|
@@ -8738,7 +8731,7 @@ class EndpointConfigurationShadowProductionVariantRoutingConfigArgs:
|
|
|
8738
8731
|
@pulumi.getter(name="routingStrategy")
|
|
8739
8732
|
def routing_strategy(self) -> pulumi.Input[_builtins.str]:
|
|
8740
8733
|
"""
|
|
8741
|
-
|
|
8734
|
+
How the endpoint routes incoming traffic. Valid values are `LEAST_OUTSTANDING_REQUESTS` and `RANDOM`. `LEAST_OUTSTANDING_REQUESTS` routes requests to the specific instances that have more capacity to process them. `RANDOM` routes each request to a randomly chosen instance.
|
|
8742
8735
|
"""
|
|
8743
8736
|
return pulumi.get(self, "routing_strategy")
|
|
8744
8737
|
|
|
@@ -8751,15 +8744,15 @@ if not MYPY:
|
|
|
8751
8744
|
class EndpointConfigurationShadowProductionVariantServerlessConfigArgsDict(TypedDict):
|
|
8752
8745
|
max_concurrency: pulumi.Input[_builtins.int]
|
|
8753
8746
|
"""
|
|
8754
|
-
|
|
8747
|
+
Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
|
|
8755
8748
|
"""
|
|
8756
8749
|
memory_size_in_mb: pulumi.Input[_builtins.int]
|
|
8757
8750
|
"""
|
|
8758
|
-
|
|
8751
|
+
Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
|
|
8759
8752
|
"""
|
|
8760
8753
|
provisioned_concurrency: NotRequired[pulumi.Input[_builtins.int]]
|
|
8761
8754
|
"""
|
|
8762
|
-
|
|
8755
|
+
Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
|
|
8763
8756
|
"""
|
|
8764
8757
|
elif False:
|
|
8765
8758
|
EndpointConfigurationShadowProductionVariantServerlessConfigArgsDict: TypeAlias = Mapping[str, Any]
|
|
@@ -8771,9 +8764,9 @@ class EndpointConfigurationShadowProductionVariantServerlessConfigArgs:
|
|
|
8771
8764
|
memory_size_in_mb: pulumi.Input[_builtins.int],
|
|
8772
8765
|
provisioned_concurrency: Optional[pulumi.Input[_builtins.int]] = None):
|
|
8773
8766
|
"""
|
|
8774
|
-
:param pulumi.Input[_builtins.int] max_concurrency:
|
|
8775
|
-
:param pulumi.Input[_builtins.int] memory_size_in_mb:
|
|
8776
|
-
:param pulumi.Input[_builtins.int] provisioned_concurrency:
|
|
8767
|
+
:param pulumi.Input[_builtins.int] max_concurrency: Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
|
|
8768
|
+
:param pulumi.Input[_builtins.int] memory_size_in_mb: Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
|
|
8769
|
+
:param pulumi.Input[_builtins.int] provisioned_concurrency: Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
|
|
8777
8770
|
"""
|
|
8778
8771
|
pulumi.set(__self__, "max_concurrency", max_concurrency)
|
|
8779
8772
|
pulumi.set(__self__, "memory_size_in_mb", memory_size_in_mb)
|
|
@@ -8784,7 +8777,7 @@ class EndpointConfigurationShadowProductionVariantServerlessConfigArgs:
|
|
|
8784
8777
|
@pulumi.getter(name="maxConcurrency")
|
|
8785
8778
|
def max_concurrency(self) -> pulumi.Input[_builtins.int]:
|
|
8786
8779
|
"""
|
|
8787
|
-
|
|
8780
|
+
Maximum number of concurrent invocations your serverless endpoint can process. Valid values are between `1` and `200`.
|
|
8788
8781
|
"""
|
|
8789
8782
|
return pulumi.get(self, "max_concurrency")
|
|
8790
8783
|
|
|
@@ -8796,7 +8789,7 @@ class EndpointConfigurationShadowProductionVariantServerlessConfigArgs:
|
|
|
8796
8789
|
@pulumi.getter(name="memorySizeInMb")
|
|
8797
8790
|
def memory_size_in_mb(self) -> pulumi.Input[_builtins.int]:
|
|
8798
8791
|
"""
|
|
8799
|
-
|
|
8792
|
+
Memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
|
|
8800
8793
|
"""
|
|
8801
8794
|
return pulumi.get(self, "memory_size_in_mb")
|
|
8802
8795
|
|
|
@@ -8808,7 +8801,7 @@ class EndpointConfigurationShadowProductionVariantServerlessConfigArgs:
|
|
|
8808
8801
|
@pulumi.getter(name="provisionedConcurrency")
|
|
8809
8802
|
def provisioned_concurrency(self) -> Optional[pulumi.Input[_builtins.int]]:
|
|
8810
8803
|
"""
|
|
8811
|
-
|
|
8804
|
+
Amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
|
|
8812
8805
|
"""
|
|
8813
8806
|
return pulumi.get(self, "provisioned_concurrency")
|
|
8814
8807
|
|