@aws-sdk/client-sagemaker 3.1054.0 → 3.1055.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist-cjs/index.js +6 -0
- package/dist-cjs/schemas/schemas_0.js +80 -49
- package/dist-es/models/enums.js +5 -0
- package/dist-es/schemas/schemas_0.js +44 -13
- package/dist-types/commands/CreateAIRecommendationJobCommand.d.ts +1 -1
- package/dist-types/commands/CreateClusterCommand.d.ts +9 -0
- package/dist-types/commands/CreateOptimizationJobCommand.d.ts +1 -2
- package/dist-types/commands/DescribeAIRecommendationJobCommand.d.ts +3 -3
- package/dist-types/commands/DescribeClusterCommand.d.ts +14 -0
- package/dist-types/commands/DescribeTrainingPlanCommand.d.ts +1 -0
- package/dist-types/commands/ListModelBiasJobDefinitionsCommand.d.ts +1 -1
- package/dist-types/commands/ListModelCardExportJobsCommand.d.ts +1 -2
- package/dist-types/commands/ListTrainingPlansCommand.d.ts +1 -0
- package/dist-types/commands/UpdateClusterCommand.d.ts +9 -0
- package/dist-types/commands/UpdateTrialCommand.d.ts +2 -1
- package/dist-types/commands/UpdateTrialComponentCommand.d.ts +1 -1
- package/dist-types/commands/UpdateUserProfileCommand.d.ts +1 -2
- package/dist-types/models/enums.d.ts +13 -0
- package/dist-types/models/models_0.d.ts +65 -100
- package/dist-types/models/models_1.d.ts +106 -174
- package/dist-types/models/models_2.d.ts +180 -102
- package/dist-types/models/models_3.d.ts +108 -160
- package/dist-types/models/models_4.d.ts +165 -104
- package/dist-types/models/models_5.d.ts +102 -1
- package/dist-types/schemas/schemas_0.d.ts +4 -0
- package/dist-types/ts3.4/commands/CreateOptimizationJobCommand.d.ts +4 -2
- package/dist-types/ts3.4/commands/ListModelBiasJobDefinitionsCommand.d.ts +1 -1
- package/dist-types/ts3.4/commands/ListModelCardExportJobsCommand.d.ts +4 -2
- package/dist-types/ts3.4/commands/UpdateTrialCommand.d.ts +2 -1
- package/dist-types/ts3.4/commands/UpdateTrialComponentCommand.d.ts +1 -1
- package/dist-types/ts3.4/commands/UpdateUserProfileCommand.d.ts +4 -2
- package/dist-types/ts3.4/models/enums.d.ts +7 -0
- package/dist-types/ts3.4/models/models_0.d.ts +17 -26
- package/dist-types/ts3.4/models/models_1.d.ts +30 -76
- package/dist-types/ts3.4/models/models_2.d.ts +85 -28
- package/dist-types/ts3.4/models/models_3.d.ts +35 -43
- package/dist-types/ts3.4/models/models_4.d.ts +43 -26
- package/dist-types/ts3.4/models/models_5.d.ts +28 -0
- package/dist-types/ts3.4/schemas/schemas_0.d.ts +4 -0
- package/package.json +1 -1
|
@@ -1,6 +1,105 @@
|
|
|
1
1
|
import { AutomaticJsonStringConversion as __AutomaticJsonStringConversion } from "@smithy/core/serde";
|
|
2
|
-
import type { _InstanceType, AccountDefaultStatus, ActionStatus, ActivationState, AppInstanceType, AppNetworkAccessType, AppSecurityGroupManagement, AppType, AuthMode, AutoMountHomeEFS, AvailabilityZoneBalanceEnforcementMode, AwsManagedHumanLoopRequestSource, CapacityReservationPreference, ClusterNodeProvisioningMode, ClusterNodeRecovery, CollectionType, CompleteOnConvergence, ContainerMode, ContentClassifier, DeviceSubsetType, DirectInternetAccess, EdgePresetDeploymentType, ExecutionRoleIdentityConfig, ExecutionRoleSessionNameMode, FailureHandlingPolicy, FairShare, FeatureStatus, FeatureType, FlatInvocations, Framework, HomeEfsFileSystemCreation, HubContentType, HyperParameterScalingType, HyperParameterTuningAllocationStrategy, HyperParameterTuningJobObjectiveType, HyperParameterTuningJobStrategyType, HyperParameterTuningJobWarmStartType, IdleResourceSharing, InferenceComponentPlacementStrategy, InferenceExecutionMode, InferenceExperimentType, IPAddressType, JobType, ManagedInstanceScalingScaleInStrategy, ManagedInstanceScalingStatus, ManagedStorageType, MetricPublishFrequencyInSeconds, MlTools, ModelApprovalStatus, ModelCacheSetting, ModelCardStatus, ModelInfrastructureType, ModelPackageRegistrationType, ModelRegistrationMode, ModelSpeculativeDecodingS3DataType, ModelSpeculativeDecodingTechnique, MonitoringProblemType, MonitoringType, NotebookInstanceAcceleratorType, NotebookOutputOption,
|
|
3
|
-
import type { ActionSource, AdditionalInferenceSpecificationDefinition, AdditionalModelDataSource, AdditionalS3DataSource, AIBenchmarkNetworkConfig, AIBenchmarkOutputConfig, AIBenchmarkTarget, AIDatasetConfig, AIModelSource, AIRecommendationComputeSpec, AIRecommendationInferenceSpecification, AIRecommendationOutputConfig, AIRecommendationPerformanceTarget, AIWorkloadConfigs, AlgorithmValidationSpecification, AmazonQSettings, AnnotationConsolidationConfig, AppLifecycleManagement, ArtifactSource, AsyncInferenceConfig, AuthorizedUrl, AutoMLChannel, AutoMLComputeConfig, AutoMLDataSplitConfig, AutoMLJobChannel, AutoMLJobConfig, AutoMLJobObjective, AutoMLOutputDataConfig, AutoMLProblemTypeConfig, AutoMLSecurityConfig, AutoParameter, AutoRollbackConfig, Autotune, BatchTransformInput, BestObjectiveNotImproving, Bias, BlueGreenUpdatePolicy, CanvasAppSettings, CapacitySize, CaptureContentTypeHeader, CaptureOption, CategoricalParameter, CategoricalParameterRange, CategoricalParameterRangeSpecification, Channel, ChannelSpecification, CheckpointConfig, ClarifyExplainerConfig, ClusterAutoScalingConfig, ClusterInstanceGroupSpecification, ClusterOrchestrator, ClusterRestrictedInstanceGroupSpecification, ClusterTieredStorageConfig, CodeEditorAppImageConfig, CodeEditorAppSettings, CodeRepository, CollectionConfig, ComputeQuotaConfig, ComputeQuotaTarget, CustomImage, GitConfig,
|
|
2
|
+
import type { _InstanceType, AccountDefaultStatus, ActionStatus, ActivationState, AppInstanceType, AppNetworkAccessType, AppSecurityGroupManagement, AppType, AuthMode, AutoMountHomeEFS, AvailabilityZoneBalanceEnforcementMode, AwsManagedHumanLoopRequestSource, CapacityReservationPreference, ClusterNodeProvisioningMode, ClusterNodeRecovery, CollectionType, CompleteOnConvergence, ConditionOutcome, ContainerMode, ContentClassifier, DeviceSubsetType, DirectInternetAccess, EdgePresetDeploymentType, ExecutionRoleIdentityConfig, ExecutionRoleSessionNameMode, FailureHandlingPolicy, FairShare, FeatureStatus, FeatureType, FlatInvocations, Framework, HomeEfsFileSystemCreation, HubContentType, HyperParameterScalingType, HyperParameterTuningAllocationStrategy, HyperParameterTuningJobObjectiveType, HyperParameterTuningJobStrategyType, HyperParameterTuningJobWarmStartType, IdleResourceSharing, InferenceComponentPlacementStrategy, InferenceExecutionMode, InferenceExperimentType, IPAddressType, JobType, ManagedInstanceScalingScaleInStrategy, ManagedInstanceScalingStatus, ManagedStorageType, MetricPublishFrequencyInSeconds, MlTools, ModelApprovalStatus, ModelCacheSetting, ModelCardStatus, ModelInfrastructureType, ModelPackageRegistrationType, ModelRegistrationMode, ModelSpeculativeDecodingS3DataType, ModelSpeculativeDecodingTechnique, MonitoringProblemType, MonitoringType, NotebookInstanceAcceleratorType, NotebookOutputOption, ParameterType, ProblemType, ProcessingInstanceType, ProcessingS3DataDistributionType, ProcessingS3InputMode, ProcessingS3UploadMode, Processor, ProductionVariantAcceleratorType, ProductionVariantInferenceAmiVersion, ProductionVariantInstanceType, RecommendationJobSupportedEndpointType, RecommendationJobType, RepositoryAccessMode, RootAccess, RoutingStrategy, RStudioServerProAccessStatus, RStudioServerProUserGroup, SageMakerImageName, SchedulerResourceStatus, SkipModelValidation, StorageType, StudioWebPortal, TableFormat, TagPropagation, TargetDevice, TargetPlatformAccelerator, TargetPlatformArch, TargetPlatformOs, ThroughputMode, TrackingServerSize, TrafficType, TrainingInputMode, TrainingInstanceType, TrainingJobEarlyStoppingType, TtlDurationUnit, VendorGuidance } from "./enums";
|
|
3
|
+
import type { ActionSource, AdditionalInferenceSpecificationDefinition, AdditionalModelDataSource, AdditionalS3DataSource, AIBenchmarkNetworkConfig, AIBenchmarkOutputConfig, AIBenchmarkTarget, AIDatasetConfig, AIModelSource, AIRecommendationComputeSpec, AIRecommendationInferenceSpecification, AIRecommendationOutputConfig, AIRecommendationPerformanceTarget, AIWorkloadConfigs, AlgorithmValidationSpecification, AmazonQSettings, AnnotationConsolidationConfig, AppLifecycleManagement, ArtifactSource, AsyncInferenceConfig, AuthorizedUrl, AutoMLChannel, AutoMLComputeConfig, AutoMLDataSplitConfig, AutoMLJobChannel, AutoMLJobConfig, AutoMLJobObjective, AutoMLOutputDataConfig, AutoMLProblemTypeConfig, AutoMLSecurityConfig, AutoParameter, AutoRollbackConfig, Autotune, BatchTransformInput, BestObjectiveNotImproving, Bias, BlueGreenUpdatePolicy, CanvasAppSettings, CapacitySize, CaptureContentTypeHeader, CaptureOption, CategoricalParameter, CategoricalParameterRange, CategoricalParameterRangeSpecification, Channel, ChannelSpecification, CheckpointConfig, ClarifyExplainerConfig, ClusterAutoScalingConfig, ClusterInstanceGroupSpecification, ClusterOrchestrator, ClusterRestrictedInstanceGroupsConfig, ClusterRestrictedInstanceGroupSpecification, ClusterTieredStorageConfig, CodeEditorAppImageConfig, CodeEditorAppSettings, CodeRepository, CollectionConfig, ComputeQuotaConfig, ComputeQuotaTarget, CustomImage, GitConfig, InferenceSpecification, JupyterLabAppImageConfig, KernelGatewayImageConfig, MetricDefinition, MetricsSource, ModelDataSource, OutputDataConfig, ResourceConfig, ResourceSpec, StoppingCondition, Tag, TransformJobDefinition, VpcConfig } from "./models_0";
|
|
4
|
+
/**
|
|
5
|
+
* <p>Summary of the compute allocation definition.</p>
|
|
6
|
+
* @public
|
|
7
|
+
*/
|
|
8
|
+
export interface ComputeQuotaSummary {
|
|
9
|
+
/**
|
|
10
|
+
* <p>ARN of the compute allocation definition.</p>
|
|
11
|
+
* @public
|
|
12
|
+
*/
|
|
13
|
+
ComputeQuotaArn: string | undefined;
|
|
14
|
+
/**
|
|
15
|
+
* <p>ID of the compute allocation definition.</p>
|
|
16
|
+
* @public
|
|
17
|
+
*/
|
|
18
|
+
ComputeQuotaId: string | undefined;
|
|
19
|
+
/**
|
|
20
|
+
* <p>Name of the compute allocation definition.</p>
|
|
21
|
+
* @public
|
|
22
|
+
*/
|
|
23
|
+
Name: string | undefined;
|
|
24
|
+
/**
|
|
25
|
+
* <p>Version of the compute allocation definition.</p>
|
|
26
|
+
* @public
|
|
27
|
+
*/
|
|
28
|
+
ComputeQuotaVersion?: number | undefined;
|
|
29
|
+
/**
|
|
30
|
+
* <p>Status of the compute allocation definition.</p>
|
|
31
|
+
* @public
|
|
32
|
+
*/
|
|
33
|
+
Status: SchedulerResourceStatus | undefined;
|
|
34
|
+
/**
|
|
35
|
+
* <p>ARN of the cluster.</p>
|
|
36
|
+
* @public
|
|
37
|
+
*/
|
|
38
|
+
ClusterArn?: string | undefined;
|
|
39
|
+
/**
|
|
40
|
+
* <p>Configuration of the compute allocation definition. This includes the resource sharing option, and the setting to preempt low priority tasks.</p>
|
|
41
|
+
* @public
|
|
42
|
+
*/
|
|
43
|
+
ComputeQuotaConfig?: ComputeQuotaConfig | undefined;
|
|
44
|
+
/**
|
|
45
|
+
* <p>The target entity to allocate compute resources to.</p>
|
|
46
|
+
* @public
|
|
47
|
+
*/
|
|
48
|
+
ComputeQuotaTarget: ComputeQuotaTarget | undefined;
|
|
49
|
+
/**
|
|
50
|
+
* <p>The state of the compute allocation being described. Use to enable or disable compute allocation.</p> <p>Default is <code>Enabled</code>.</p>
|
|
51
|
+
* @public
|
|
52
|
+
*/
|
|
53
|
+
ActivationState?: ActivationState | undefined;
|
|
54
|
+
/**
|
|
55
|
+
* <p>Creation time of the compute allocation definition.</p>
|
|
56
|
+
* @public
|
|
57
|
+
*/
|
|
58
|
+
CreationTime: Date | undefined;
|
|
59
|
+
/**
|
|
60
|
+
* <p>Last modified time of the compute allocation definition.</p>
|
|
61
|
+
* @public
|
|
62
|
+
*/
|
|
63
|
+
LastModifiedTime?: Date | undefined;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* <p>Metadata for a Condition step.</p>
|
|
67
|
+
* @public
|
|
68
|
+
*/
|
|
69
|
+
export interface ConditionStepMetadata {
|
|
70
|
+
/**
|
|
71
|
+
* <p>The outcome of the Condition step evaluation.</p>
|
|
72
|
+
* @public
|
|
73
|
+
*/
|
|
74
|
+
Outcome?: ConditionOutcome | undefined;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* <p>Specifies an authentication configuration for the private docker registry where your model image is hosted. Specify a value for this property only if you specified <code>Vpc</code> as the value for the <code>RepositoryAccessMode</code> field of the <code>ImageConfig</code> object that you passed to a call to <code>CreateModel</code> and the private Docker registry where the model image is hosted requires authentication.</p>
|
|
78
|
+
* @public
|
|
79
|
+
*/
|
|
80
|
+
export interface RepositoryAuthConfig {
|
|
81
|
+
/**
|
|
82
|
+
* <p>The Amazon Resource Name (ARN) of an Amazon Web Services Lambda function that provides credentials to authenticate to the private Docker registry where your model image is hosted. For information about how to create an Amazon Web Services Lambda function, see <a href="https://docs.aws.amazon.com/lambda/latest/dg/getting-started-create-function.html">Create a Lambda function with the console</a> in the <i>Amazon Web Services Lambda Developer Guide</i>.</p>
|
|
83
|
+
* @public
|
|
84
|
+
*/
|
|
85
|
+
RepositoryCredentialsProviderArn: string | undefined;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* <p>Specifies whether the model container is in Amazon ECR or a private Docker registry accessible from your Amazon Virtual Private Cloud (VPC).</p>
|
|
89
|
+
* @public
|
|
90
|
+
*/
|
|
91
|
+
export interface ImageConfig {
|
|
92
|
+
/**
|
|
93
|
+
* <p>Set this to one of the following values:</p> <ul> <li> <p> <code>Platform</code> - The model image is hosted in Amazon ECR.</p> </li> <li> <p> <code>Vpc</code> - The model image is hosted in a private Docker registry in your VPC.</p> </li> </ul>
|
|
94
|
+
* @public
|
|
95
|
+
*/
|
|
96
|
+
RepositoryAccessMode: RepositoryAccessMode | undefined;
|
|
97
|
+
/**
|
|
98
|
+
* <p>(Optional) Specifies an authentication configuration for the private docker registry where your model image is hosted. Specify a value for this property only if you specified <code>Vpc</code> as the value for the <code>RepositoryAccessMode</code> field, and the private Docker registry where the model image is hosted requires authentication.</p>
|
|
99
|
+
* @public
|
|
100
|
+
*/
|
|
101
|
+
RepositoryAuthConfig?: RepositoryAuthConfig | undefined;
|
|
102
|
+
}
|
|
4
103
|
/**
|
|
5
104
|
* <p>Specifies additional configuration for hosting multi-model endpoints.</p>
|
|
6
105
|
* @public
|
|
@@ -919,6 +1018,11 @@ export interface CreateClusterRequest {
|
|
|
919
1018
|
* @public
|
|
920
1019
|
*/
|
|
921
1020
|
RestrictedInstanceGroups?: ClusterRestrictedInstanceGroupSpecification[] | undefined;
|
|
1021
|
+
/**
|
|
1022
|
+
* <p>The configuration for the restricted instance groups (RIG) in the SageMaker HyperPod cluster.</p>
|
|
1023
|
+
* @public
|
|
1024
|
+
*/
|
|
1025
|
+
RestrictedInstanceGroupsConfig?: ClusterRestrictedInstanceGroupsConfig | undefined;
|
|
922
1026
|
/**
|
|
923
1027
|
* <p>Specifies the Amazon Virtual Private Cloud (VPC) that is associated with the Amazon SageMaker HyperPod cluster. You can control access to and from your resources by configuring your VPC. For more information, see <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/infrastructure-give-access.html">Give SageMaker access to resources in your Amazon VPC</a>.</p> <note> <p>When your Amazon VPC and subnets support IPv6, network communications differ based on the cluster orchestration platform:</p> <ul> <li> <p>Slurm-orchestrated clusters automatically configure nodes with dual IPv6 and IPv4 addresses, allowing immediate IPv6 network communications.</p> </li> <li> <p>In Amazon EKS-orchestrated clusters, nodes receive dual-stack addressing, but pods can only use IPv6 when the Amazon EKS cluster is explicitly IPv6-enabled. For information about deploying an IPv6 Amazon EKS cluster, see <a href="https://docs.aws.amazon.com/eks/latest/userguide/deploy-ipv6-cluster.html#_deploy_an_ipv6_cluster_with_eksctl">Amazon EKS IPv6 Cluster Deployment</a>.</p> </li> </ul> <p>Additional resources for IPv6 configuration:</p> <ul> <li> <p>For information about adding IPv6 support to your VPC, see to <a href="https://docs.aws.amazon.com/vpc/latest/userguide/vpc-migrate-ipv6.html">IPv6 Support for VPC</a>.</p> </li> <li> <p>For information about creating a new IPv6-compatible VPC, see <a href="https://docs.aws.amazon.com/vpc/latest/userguide/create-vpc.html">Amazon VPC Creation Guide</a>.</p> </li> <li> <p>To configure SageMaker HyperPod with a custom Amazon VPC, see <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-hyperpod-prerequisites.html#sagemaker-hyperpod-prerequisites-optional-vpc">Custom Amazon VPC Setup for SageMaker HyperPod</a>.</p> </li> </ul> </note>
|
|
924
1028
|
* @public
|
|
@@ -7063,175 +7167,3 @@ export interface ModelSpeculativeDecodingConfig {
|
|
|
7063
7167
|
*/
|
|
7064
7168
|
TrainingDataSource?: ModelSpeculativeDecodingTrainingDataSource | undefined;
|
|
7065
7169
|
}
|
|
7066
|
-
/**
|
|
7067
|
-
* <p>Settings for an optimization technique that you apply with a model optimization job.</p>
|
|
7068
|
-
* @public
|
|
7069
|
-
*/
|
|
7070
|
-
export type OptimizationConfig = OptimizationConfig.ModelCompilationConfigMember | OptimizationConfig.ModelQuantizationConfigMember | OptimizationConfig.ModelShardingConfigMember | OptimizationConfig.ModelSpeculativeDecodingConfigMember | OptimizationConfig.$UnknownMember;
|
|
7071
|
-
/**
|
|
7072
|
-
* @public
|
|
7073
|
-
*/
|
|
7074
|
-
export declare namespace OptimizationConfig {
|
|
7075
|
-
/**
|
|
7076
|
-
* <p>Settings for the model quantization technique that's applied by a model optimization job.</p>
|
|
7077
|
-
* @public
|
|
7078
|
-
*/
|
|
7079
|
-
interface ModelQuantizationConfigMember {
|
|
7080
|
-
ModelQuantizationConfig: ModelQuantizationConfig;
|
|
7081
|
-
ModelCompilationConfig?: never;
|
|
7082
|
-
ModelShardingConfig?: never;
|
|
7083
|
-
ModelSpeculativeDecodingConfig?: never;
|
|
7084
|
-
$unknown?: never;
|
|
7085
|
-
}
|
|
7086
|
-
/**
|
|
7087
|
-
* <p>Settings for the model compilation technique that's applied by a model optimization job.</p>
|
|
7088
|
-
* @public
|
|
7089
|
-
*/
|
|
7090
|
-
interface ModelCompilationConfigMember {
|
|
7091
|
-
ModelQuantizationConfig?: never;
|
|
7092
|
-
ModelCompilationConfig: ModelCompilationConfig;
|
|
7093
|
-
ModelShardingConfig?: never;
|
|
7094
|
-
ModelSpeculativeDecodingConfig?: never;
|
|
7095
|
-
$unknown?: never;
|
|
7096
|
-
}
|
|
7097
|
-
/**
|
|
7098
|
-
* <p>Settings for the model sharding technique that's applied by a model optimization job.</p>
|
|
7099
|
-
* @public
|
|
7100
|
-
*/
|
|
7101
|
-
interface ModelShardingConfigMember {
|
|
7102
|
-
ModelQuantizationConfig?: never;
|
|
7103
|
-
ModelCompilationConfig?: never;
|
|
7104
|
-
ModelShardingConfig: ModelShardingConfig;
|
|
7105
|
-
ModelSpeculativeDecodingConfig?: never;
|
|
7106
|
-
$unknown?: never;
|
|
7107
|
-
}
|
|
7108
|
-
/**
|
|
7109
|
-
* <p>Settings for the model speculative decoding technique that's applied by a model optimization job.</p>
|
|
7110
|
-
* @public
|
|
7111
|
-
*/
|
|
7112
|
-
interface ModelSpeculativeDecodingConfigMember {
|
|
7113
|
-
ModelQuantizationConfig?: never;
|
|
7114
|
-
ModelCompilationConfig?: never;
|
|
7115
|
-
ModelShardingConfig?: never;
|
|
7116
|
-
ModelSpeculativeDecodingConfig: ModelSpeculativeDecodingConfig;
|
|
7117
|
-
$unknown?: never;
|
|
7118
|
-
}
|
|
7119
|
-
/**
|
|
7120
|
-
* @public
|
|
7121
|
-
*/
|
|
7122
|
-
interface $UnknownMember {
|
|
7123
|
-
ModelQuantizationConfig?: never;
|
|
7124
|
-
ModelCompilationConfig?: never;
|
|
7125
|
-
ModelShardingConfig?: never;
|
|
7126
|
-
ModelSpeculativeDecodingConfig?: never;
|
|
7127
|
-
$unknown: [string, any];
|
|
7128
|
-
}
|
|
7129
|
-
/**
|
|
7130
|
-
* @deprecated unused in schema-serde mode.
|
|
7131
|
-
*
|
|
7132
|
-
*/
|
|
7133
|
-
interface Visitor<T> {
|
|
7134
|
-
ModelQuantizationConfig: (value: ModelQuantizationConfig) => T;
|
|
7135
|
-
ModelCompilationConfig: (value: ModelCompilationConfig) => T;
|
|
7136
|
-
ModelShardingConfig: (value: ModelShardingConfig) => T;
|
|
7137
|
-
ModelSpeculativeDecodingConfig: (value: ModelSpeculativeDecodingConfig) => T;
|
|
7138
|
-
_: (name: string, value: any) => T;
|
|
7139
|
-
}
|
|
7140
|
-
}
|
|
7141
|
-
/**
|
|
7142
|
-
* <p>Details for where to store the optimized model that you create with the optimization job.</p>
|
|
7143
|
-
* @public
|
|
7144
|
-
*/
|
|
7145
|
-
export interface OptimizationJobOutputConfig {
|
|
7146
|
-
/**
|
|
7147
|
-
* <p>The Amazon Resource Name (ARN) of a key in Amazon Web Services KMS. SageMaker uses they key to encrypt the artifacts of the optimized model when SageMaker uploads the model to Amazon S3.</p>
|
|
7148
|
-
* @public
|
|
7149
|
-
*/
|
|
7150
|
-
KmsKeyId?: string | undefined;
|
|
7151
|
-
/**
|
|
7152
|
-
* <p>The Amazon S3 URI for where to store the optimized model that you create with an optimization job.</p>
|
|
7153
|
-
* @public
|
|
7154
|
-
*/
|
|
7155
|
-
S3OutputLocation: string | undefined;
|
|
7156
|
-
/**
|
|
7157
|
-
* <p>The name of a SageMaker model to use as the output destination for an optimization job.</p>
|
|
7158
|
-
* @public
|
|
7159
|
-
*/
|
|
7160
|
-
SageMakerModel?: OptimizationSageMakerModel | undefined;
|
|
7161
|
-
}
|
|
7162
|
-
/**
|
|
7163
|
-
* <p>A VPC in Amazon VPC that's accessible to an optimized that you create with an optimization job. You can control access to and from your resources by configuring a VPC. For more information, see <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/infrastructure-give-access.html">Give SageMaker Access to Resources in your Amazon VPC</a>. </p>
|
|
7164
|
-
* @public
|
|
7165
|
-
*/
|
|
7166
|
-
export interface OptimizationVpcConfig {
|
|
7167
|
-
/**
|
|
7168
|
-
* <p>The VPC security group IDs, in the form <code>sg-xxxxxxxx</code>. Specify the security groups for the VPC that is specified in the <code>Subnets</code> field.</p>
|
|
7169
|
-
* @public
|
|
7170
|
-
*/
|
|
7171
|
-
SecurityGroupIds: string[] | undefined;
|
|
7172
|
-
/**
|
|
7173
|
-
* <p>The ID of the subnets in the VPC to which you want to connect your optimized model.</p>
|
|
7174
|
-
* @public
|
|
7175
|
-
*/
|
|
7176
|
-
Subnets: string[] | undefined;
|
|
7177
|
-
}
|
|
7178
|
-
/**
|
|
7179
|
-
* @public
|
|
7180
|
-
*/
|
|
7181
|
-
export interface CreateOptimizationJobRequest {
|
|
7182
|
-
/**
|
|
7183
|
-
* <p>A custom name for the new optimization job.</p>
|
|
7184
|
-
* @public
|
|
7185
|
-
*/
|
|
7186
|
-
OptimizationJobName: string | undefined;
|
|
7187
|
-
/**
|
|
7188
|
-
* <p>The Amazon Resource Name (ARN) of an IAM role that enables Amazon SageMaker AI to perform tasks on your behalf. </p> <p>During model optimization, Amazon SageMaker AI needs your permission to:</p> <ul> <li> <p>Read input data from an S3 bucket</p> </li> <li> <p>Write model artifacts to an S3 bucket</p> </li> <li> <p>Write logs to Amazon CloudWatch Logs</p> </li> <li> <p>Publish metrics to Amazon CloudWatch</p> </li> </ul> <p>You grant permissions for all of these tasks to an IAM role. To pass this role to Amazon SageMaker AI, the caller of this API must have the <code>iam:PassRole</code> permission. For more information, see <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html">Amazon SageMaker AI Roles.</a> </p>
|
|
7189
|
-
* @public
|
|
7190
|
-
*/
|
|
7191
|
-
RoleArn: string | undefined;
|
|
7192
|
-
/**
|
|
7193
|
-
* <p>The location of the source model to optimize with an optimization job.</p>
|
|
7194
|
-
* @public
|
|
7195
|
-
*/
|
|
7196
|
-
ModelSource: OptimizationJobModelSource | undefined;
|
|
7197
|
-
/**
|
|
7198
|
-
* <p>The type of instance that hosts the optimized model that you create with the optimization job.</p>
|
|
7199
|
-
* @public
|
|
7200
|
-
*/
|
|
7201
|
-
DeploymentInstanceType: OptimizationJobDeploymentInstanceType | undefined;
|
|
7202
|
-
/**
|
|
7203
|
-
* <p>The maximum number of instances to use for the optimization job.</p>
|
|
7204
|
-
* @public
|
|
7205
|
-
*/
|
|
7206
|
-
MaxInstanceCount?: number | undefined;
|
|
7207
|
-
/**
|
|
7208
|
-
* <p>The environment variables to set in the model container.</p>
|
|
7209
|
-
* @public
|
|
7210
|
-
*/
|
|
7211
|
-
OptimizationEnvironment?: Record<string, string> | undefined;
|
|
7212
|
-
/**
|
|
7213
|
-
* <p>Settings for each of the optimization techniques that the job applies.</p>
|
|
7214
|
-
* @public
|
|
7215
|
-
*/
|
|
7216
|
-
OptimizationConfigs: OptimizationConfig[] | undefined;
|
|
7217
|
-
/**
|
|
7218
|
-
* <p>Details for where to store the optimized model that you create with the optimization job.</p>
|
|
7219
|
-
* @public
|
|
7220
|
-
*/
|
|
7221
|
-
OutputConfig: OptimizationJobOutputConfig | undefined;
|
|
7222
|
-
/**
|
|
7223
|
-
* <p>Specifies a limit to how long a job can run. When the job reaches the time limit, SageMaker ends the job. Use this API to cap costs.</p> <p>To stop a training job, SageMaker sends the algorithm the <code>SIGTERM</code> signal, which delays job termination for 120 seconds. Algorithms can use this 120-second window to save the model artifacts, so the results of training are not lost. </p> <p>The training algorithms provided by SageMaker automatically save the intermediate results of a model training job when possible. This attempt to save artifacts is only a best effort case as model might not be in a state from which it can be saved. For example, if training has just started, the model might not be ready to save. When saved, this intermediate data is a valid model artifact. You can use it to create a model with <code>CreateModel</code>.</p> <note> <p>The Neural Topic Model (NTM) currently does not support saving intermediate model artifacts. When training NTMs, make sure that the maximum runtime is sufficient for the training job to complete.</p> </note>
|
|
7224
|
-
* @public
|
|
7225
|
-
*/
|
|
7226
|
-
StoppingCondition: StoppingCondition | undefined;
|
|
7227
|
-
/**
|
|
7228
|
-
* <p>A list of key-value pairs associated with the optimization job. For more information, see <a href="https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html">Tagging Amazon Web Services resources</a> in the <i>Amazon Web Services General Reference Guide</i>.</p>
|
|
7229
|
-
* @public
|
|
7230
|
-
*/
|
|
7231
|
-
Tags?: Tag[] | undefined;
|
|
7232
|
-
/**
|
|
7233
|
-
* <p>A VPC in Amazon VPC that your optimized model has access to.</p>
|
|
7234
|
-
* @public
|
|
7235
|
-
*/
|
|
7236
|
-
VpcConfig?: OptimizationVpcConfig | undefined;
|
|
7237
|
-
}
|
|
@@ -1,6 +1,178 @@
|
|
|
1
|
-
import type { ActionStatus, ActivationState, AIBenchmarkJobStatus, AIRecommendationJobStatus, AlgorithmStatus, AppNetworkAccessType, AppSecurityGroupManagement, AppStatus, AppType, AuthMode, AutoMLJobSecondaryStatus, AutoMLJobStatus, AutoMLProblemTypeConfigName, BatchStrategy, CapacityReservationPreference, CaptureStatus, ClusterNodeProvisioningMode, ClusterNodeRecovery, ClusterStatus, CompilationJobStatus, CustomizationTechnique, DataDistributionType, DeepHealthCheckType, DomainStatus, EdgePackagingJobStatus, EdgePresetDeploymentStatus, EdgePresetDeploymentType, EnabledOrDisabled, EndpointStatus, EvaluationType, FeatureGroupStatus, FeatureStatus, FeatureType, FlowDefinitionStatus, HomeEfsFileSystemCreation, HubContentStatus, HubContentSupportStatus, HubContentType, HubStatus, HumanTaskUiStatus, HyperParameterTuningJobObjectiveType, HyperParameterTuningJobStatus, ImageStatus, ImageVersionStatus, InferenceComponentCapacitySizeType, InputMode, JobType, JoinSource, LastUpdateStatusValue, ObjectiveStatus, OfflineStoreStatusValue, PartnerAppAuthType, PartnerAppType, Peft, ProblemType, ProcessingInstanceType, ProcessingS3CompressionType, ProcessingS3DataDistributionType, ProcessingS3DataType, ProcessingS3InputMode, ProcessingS3UploadMode, Processor, ProductionVariantAcceleratorType, ProductionVariantInstanceType, RecommendationStatus, RedshiftResultCompressionType, RedshiftResultFormat, RetentionType, RuleEvaluationStatus, SchedulerConfigComponent, SchedulerResourceStatus, ServerlessJobType, SharingType, StageStatus, Statistic, StudioLifecycleConfigAppType, TagPropagation, ThroughputMode, TrainingJobStatus, TrialComponentPrimaryStatus, VariantStatus, VendorGuidance, WorkforceIpAddressType } from "./enums";
|
|
2
|
-
import type { ActionSource, AIBenchmarkNetworkConfig, AIBenchmarkOutputResult, AIBenchmarkTarget, AIDatasetConfig, AIModelSource, AIRecommendation, AIRecommendationComputeSpec, AIRecommendationInferenceSpecification, AIRecommendationOutputResult, AIRecommendationPerformanceTarget, AIWorkloadConfigs, AlgorithmSpecification, AlgorithmStatusDetails, AlgorithmValidationSpecification, AppSpecification, ArtifactSource, AsyncInferenceConfig, AthenaDatasetDefinition, AutoMLCandidate, AutoMLChannel, AutoMLComputeConfig, AutoMLDataSplitConfig, AutoMLJobArtifacts, AutoMLJobChannel, AutoMLJobCompletionCriteria, AutoMLJobConfig, AutoMLJobObjective, AutoMLOutputDataConfig, AutoMLPartialFailureReason, AutoMLProblemTypeConfig, AutoMLResolvedAttributes, AutoMLSecurityConfig, AutoRollbackConfig, Autotune, BatchDataCaptureConfig, CfnCreateTemplateProvider, Channel, CheckpointConfig, ClusterAutoScalingConfigOutput, ClusterEventDetail, ClusterInstanceGroupDetails, ClusterNodeDetails, ClusterOrchestrator, ClusterRestrictedInstanceGroupDetails, ClusterTieredStorageConfig, CodeEditorAppImageConfig, CodeRepository, CognitoConfig, CognitoMemberDefinition, CollectionConfiguration, ComputeQuotaConfig, ComputeQuotaTarget, GitConfig, InferenceSpecification, JupyterLabAppImageConfig, KernelGatewayImageConfig, OutputDataConfig, ResourceConfig, ResourceSpec, StoppingCondition, Tag, TransformInput, TransformOutput, TransformResources, UserContext, VpcConfig } from "./models_0";
|
|
3
|
-
import type { ContextSource, DataCaptureConfig, DataQualityAppSpecification, DataQualityBaselineConfig, DataQualityJobInput, DefaultSpaceSettings, DeploymentConfig, DeviceSelectionConfig, DomainSettings, EdgeDeploymentConfig, EdgeDeploymentModelConfig, EdgeOutputConfig, ExplainerConfig, FeatureDefinition, FlowDefinitionOutputConfig, HubS3StorageConfig, HumanLoopActivationConfig, HumanLoopConfig, HumanLoopRequestSource, HyperParameterTrainingJobDefinition, HyperParameterTuningJobConfig, HyperParameterTuningJobWarmStartConfig,
|
|
1
|
+
import type { ActionStatus, ActivationState, AIBenchmarkJobStatus, AIRecommendationJobStatus, AlgorithmStatus, AppNetworkAccessType, AppSecurityGroupManagement, AppStatus, AppType, AuthMode, AutoMLJobSecondaryStatus, AutoMLJobStatus, AutoMLProblemTypeConfigName, BatchStrategy, CapacityReservationPreference, CaptureStatus, ClusterNodeProvisioningMode, ClusterNodeRecovery, ClusterStatus, CompilationJobStatus, CustomizationTechnique, DataDistributionType, DeepHealthCheckType, DomainStatus, EdgePackagingJobStatus, EdgePresetDeploymentStatus, EdgePresetDeploymentType, EnabledOrDisabled, EndpointStatus, EvaluationType, FeatureGroupStatus, FeatureStatus, FeatureType, FlowDefinitionStatus, HomeEfsFileSystemCreation, HubContentStatus, HubContentSupportStatus, HubContentType, HubStatus, HumanTaskUiStatus, HyperParameterTuningJobObjectiveType, HyperParameterTuningJobStatus, ImageStatus, ImageVersionStatus, InferenceComponentCapacitySizeType, InputMode, JobType, JoinSource, LastUpdateStatusValue, ObjectiveStatus, OfflineStoreStatusValue, OptimizationJobDeploymentInstanceType, PartnerAppAuthType, PartnerAppType, Peft, ProblemType, ProcessingInstanceType, ProcessingS3CompressionType, ProcessingS3DataDistributionType, ProcessingS3DataType, ProcessingS3InputMode, ProcessingS3UploadMode, Processor, ProductionVariantAcceleratorType, ProductionVariantInstanceType, RecommendationStatus, RedshiftResultCompressionType, RedshiftResultFormat, RetentionType, RuleEvaluationStatus, SchedulerConfigComponent, SchedulerResourceStatus, ServerlessJobType, SharingType, StageStatus, Statistic, StudioLifecycleConfigAppType, TagPropagation, ThroughputMode, TrainingJobStatus, TrialComponentPrimaryStatus, VariantStatus, VendorGuidance, WorkforceIpAddressType } from "./enums";
|
|
2
|
+
import type { ActionSource, AIBenchmarkNetworkConfig, AIBenchmarkOutputResult, AIBenchmarkTarget, AIDatasetConfig, AIModelSource, AIRecommendation, AIRecommendationComputeSpec, AIRecommendationInferenceSpecification, AIRecommendationOutputResult, AIRecommendationPerformanceTarget, AIWorkloadConfigs, AlgorithmSpecification, AlgorithmStatusDetails, AlgorithmValidationSpecification, AppSpecification, ArtifactSource, AsyncInferenceConfig, AthenaDatasetDefinition, AutoMLCandidate, AutoMLChannel, AutoMLComputeConfig, AutoMLDataSplitConfig, AutoMLJobArtifacts, AutoMLJobChannel, AutoMLJobCompletionCriteria, AutoMLJobConfig, AutoMLJobObjective, AutoMLOutputDataConfig, AutoMLPartialFailureReason, AutoMLProblemTypeConfig, AutoMLResolvedAttributes, AutoMLSecurityConfig, AutoRollbackConfig, Autotune, BatchDataCaptureConfig, CfnCreateTemplateProvider, Channel, CheckpointConfig, ClusterAutoScalingConfigOutput, ClusterEventDetail, ClusterInstanceGroupDetails, ClusterNodeDetails, ClusterOrchestrator, ClusterRestrictedInstanceGroupDetails, ClusterRestrictedInstanceGroupsConfigOutput, ClusterTieredStorageConfig, CodeEditorAppImageConfig, CodeRepository, CognitoConfig, CognitoMemberDefinition, CollectionConfiguration, ComputeQuotaConfig, ComputeQuotaTarget, GitConfig, InferenceSpecification, JupyterLabAppImageConfig, KernelGatewayImageConfig, OutputDataConfig, ResourceConfig, ResourceSpec, StoppingCondition, Tag, TransformInput, TransformOutput, TransformResources, UserContext, VpcConfig } from "./models_0";
|
|
3
|
+
import type { ContextSource, DataCaptureConfig, DataQualityAppSpecification, DataQualityBaselineConfig, DataQualityJobInput, DefaultSpaceSettings, DeploymentConfig, DeviceSelectionConfig, DomainSettings, EdgeDeploymentConfig, EdgeDeploymentModelConfig, EdgeOutputConfig, ExplainerConfig, FeatureDefinition, FlowDefinitionOutputConfig, HubS3StorageConfig, HumanLoopActivationConfig, HumanLoopConfig, HumanLoopRequestSource, HyperParameterTrainingJobDefinition, HyperParameterTuningJobConfig, HyperParameterTuningJobWarmStartConfig, InputConfig, JupyterServerAppSettings, KernelGatewayAppSettings, MetadataProperties, MetricsConfig, ModelCompilationConfig, ModelDeployConfig, ModelQuantizationConfig, ModelShardingConfig, ModelSpeculativeDecodingConfig, MonitoringNetworkConfig, MonitoringOutputConfig, MonitoringResources, MonitoringStoppingCondition, NeoVpcConfig, NetworkConfig, OfflineStoreConfig, OnlineStoreConfig, OptimizationJobModelSource, OptimizationSageMakerModel, OutputConfig, ProductionVariant, ProductionVariantManagedInstanceScaling, ProductionVariantRoutingConfig, ProductionVariantServerlessConfig, RetryStrategy, SchedulerConfig, TrainingSpecification, UserSettings } from "./models_1";
|
|
4
|
+
/**
|
|
5
|
+
* <p>Settings for an optimization technique that you apply with a model optimization job.</p>
|
|
6
|
+
* @public
|
|
7
|
+
*/
|
|
8
|
+
export type OptimizationConfig = OptimizationConfig.ModelCompilationConfigMember | OptimizationConfig.ModelQuantizationConfigMember | OptimizationConfig.ModelShardingConfigMember | OptimizationConfig.ModelSpeculativeDecodingConfigMember | OptimizationConfig.$UnknownMember;
|
|
9
|
+
/**
|
|
10
|
+
* @public
|
|
11
|
+
*/
|
|
12
|
+
export declare namespace OptimizationConfig {
|
|
13
|
+
/**
|
|
14
|
+
* <p>Settings for the model quantization technique that's applied by a model optimization job.</p>
|
|
15
|
+
* @public
|
|
16
|
+
*/
|
|
17
|
+
interface ModelQuantizationConfigMember {
|
|
18
|
+
ModelQuantizationConfig: ModelQuantizationConfig;
|
|
19
|
+
ModelCompilationConfig?: never;
|
|
20
|
+
ModelShardingConfig?: never;
|
|
21
|
+
ModelSpeculativeDecodingConfig?: never;
|
|
22
|
+
$unknown?: never;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* <p>Settings for the model compilation technique that's applied by a model optimization job.</p>
|
|
26
|
+
* @public
|
|
27
|
+
*/
|
|
28
|
+
interface ModelCompilationConfigMember {
|
|
29
|
+
ModelQuantizationConfig?: never;
|
|
30
|
+
ModelCompilationConfig: ModelCompilationConfig;
|
|
31
|
+
ModelShardingConfig?: never;
|
|
32
|
+
ModelSpeculativeDecodingConfig?: never;
|
|
33
|
+
$unknown?: never;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* <p>Settings for the model sharding technique that's applied by a model optimization job.</p>
|
|
37
|
+
* @public
|
|
38
|
+
*/
|
|
39
|
+
interface ModelShardingConfigMember {
|
|
40
|
+
ModelQuantizationConfig?: never;
|
|
41
|
+
ModelCompilationConfig?: never;
|
|
42
|
+
ModelShardingConfig: ModelShardingConfig;
|
|
43
|
+
ModelSpeculativeDecodingConfig?: never;
|
|
44
|
+
$unknown?: never;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* <p>Settings for the model speculative decoding technique that's applied by a model optimization job.</p>
|
|
48
|
+
* @public
|
|
49
|
+
*/
|
|
50
|
+
interface ModelSpeculativeDecodingConfigMember {
|
|
51
|
+
ModelQuantizationConfig?: never;
|
|
52
|
+
ModelCompilationConfig?: never;
|
|
53
|
+
ModelShardingConfig?: never;
|
|
54
|
+
ModelSpeculativeDecodingConfig: ModelSpeculativeDecodingConfig;
|
|
55
|
+
$unknown?: never;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* @public
|
|
59
|
+
*/
|
|
60
|
+
interface $UnknownMember {
|
|
61
|
+
ModelQuantizationConfig?: never;
|
|
62
|
+
ModelCompilationConfig?: never;
|
|
63
|
+
ModelShardingConfig?: never;
|
|
64
|
+
ModelSpeculativeDecodingConfig?: never;
|
|
65
|
+
$unknown: [string, any];
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* @deprecated unused in schema-serde mode.
|
|
69
|
+
*
|
|
70
|
+
*/
|
|
71
|
+
interface Visitor<T> {
|
|
72
|
+
ModelQuantizationConfig: (value: ModelQuantizationConfig) => T;
|
|
73
|
+
ModelCompilationConfig: (value: ModelCompilationConfig) => T;
|
|
74
|
+
ModelShardingConfig: (value: ModelShardingConfig) => T;
|
|
75
|
+
ModelSpeculativeDecodingConfig: (value: ModelSpeculativeDecodingConfig) => T;
|
|
76
|
+
_: (name: string, value: any) => T;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* <p>Details for where to store the optimized model that you create with the optimization job.</p>
|
|
81
|
+
* @public
|
|
82
|
+
*/
|
|
83
|
+
export interface OptimizationJobOutputConfig {
|
|
84
|
+
/**
|
|
85
|
+
* <p>The Amazon Resource Name (ARN) of a key in Amazon Web Services KMS. SageMaker uses they key to encrypt the artifacts of the optimized model when SageMaker uploads the model to Amazon S3.</p>
|
|
86
|
+
* @public
|
|
87
|
+
*/
|
|
88
|
+
KmsKeyId?: string | undefined;
|
|
89
|
+
/**
|
|
90
|
+
* <p>The Amazon S3 URI for where to store the optimized model that you create with an optimization job.</p>
|
|
91
|
+
* @public
|
|
92
|
+
*/
|
|
93
|
+
S3OutputLocation: string | undefined;
|
|
94
|
+
/**
|
|
95
|
+
* <p>The name of a SageMaker model to use as the output destination for an optimization job.</p>
|
|
96
|
+
* @public
|
|
97
|
+
*/
|
|
98
|
+
SageMakerModel?: OptimizationSageMakerModel | undefined;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* <p>A VPC in Amazon VPC that's accessible to an optimized that you create with an optimization job. You can control access to and from your resources by configuring a VPC. For more information, see <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/infrastructure-give-access.html">Give SageMaker Access to Resources in your Amazon VPC</a>. </p>
|
|
102
|
+
* @public
|
|
103
|
+
*/
|
|
104
|
+
export interface OptimizationVpcConfig {
|
|
105
|
+
/**
|
|
106
|
+
* <p>The VPC security group IDs, in the form <code>sg-xxxxxxxx</code>. Specify the security groups for the VPC that is specified in the <code>Subnets</code> field.</p>
|
|
107
|
+
* @public
|
|
108
|
+
*/
|
|
109
|
+
SecurityGroupIds: string[] | undefined;
|
|
110
|
+
/**
|
|
111
|
+
* <p>The ID of the subnets in the VPC to which you want to connect your optimized model.</p>
|
|
112
|
+
* @public
|
|
113
|
+
*/
|
|
114
|
+
Subnets: string[] | undefined;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* @public
|
|
118
|
+
*/
|
|
119
|
+
export interface CreateOptimizationJobRequest {
|
|
120
|
+
/**
|
|
121
|
+
* <p>A custom name for the new optimization job.</p>
|
|
122
|
+
* @public
|
|
123
|
+
*/
|
|
124
|
+
OptimizationJobName: string | undefined;
|
|
125
|
+
/**
|
|
126
|
+
* <p>The Amazon Resource Name (ARN) of an IAM role that enables Amazon SageMaker AI to perform tasks on your behalf. </p> <p>During model optimization, Amazon SageMaker AI needs your permission to:</p> <ul> <li> <p>Read input data from an S3 bucket</p> </li> <li> <p>Write model artifacts to an S3 bucket</p> </li> <li> <p>Write logs to Amazon CloudWatch Logs</p> </li> <li> <p>Publish metrics to Amazon CloudWatch</p> </li> </ul> <p>You grant permissions for all of these tasks to an IAM role. To pass this role to Amazon SageMaker AI, the caller of this API must have the <code>iam:PassRole</code> permission. For more information, see <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html">Amazon SageMaker AI Roles.</a> </p>
|
|
127
|
+
* @public
|
|
128
|
+
*/
|
|
129
|
+
RoleArn: string | undefined;
|
|
130
|
+
/**
|
|
131
|
+
* <p>The location of the source model to optimize with an optimization job.</p>
|
|
132
|
+
* @public
|
|
133
|
+
*/
|
|
134
|
+
ModelSource: OptimizationJobModelSource | undefined;
|
|
135
|
+
/**
|
|
136
|
+
* <p>The type of instance that hosts the optimized model that you create with the optimization job.</p>
|
|
137
|
+
* @public
|
|
138
|
+
*/
|
|
139
|
+
DeploymentInstanceType: OptimizationJobDeploymentInstanceType | undefined;
|
|
140
|
+
/**
|
|
141
|
+
* <p>The maximum number of instances to use for the optimization job.</p>
|
|
142
|
+
* @public
|
|
143
|
+
*/
|
|
144
|
+
MaxInstanceCount?: number | undefined;
|
|
145
|
+
/**
|
|
146
|
+
* <p>The environment variables to set in the model container.</p>
|
|
147
|
+
* @public
|
|
148
|
+
*/
|
|
149
|
+
OptimizationEnvironment?: Record<string, string> | undefined;
|
|
150
|
+
/**
|
|
151
|
+
* <p>Settings for each of the optimization techniques that the job applies.</p>
|
|
152
|
+
* @public
|
|
153
|
+
*/
|
|
154
|
+
OptimizationConfigs: OptimizationConfig[] | undefined;
|
|
155
|
+
/**
|
|
156
|
+
* <p>Details for where to store the optimized model that you create with the optimization job.</p>
|
|
157
|
+
* @public
|
|
158
|
+
*/
|
|
159
|
+
OutputConfig: OptimizationJobOutputConfig | undefined;
|
|
160
|
+
/**
|
|
161
|
+
* <p>Specifies a limit to how long a job can run. When the job reaches the time limit, SageMaker ends the job. Use this API to cap costs.</p> <p>To stop a training job, SageMaker sends the algorithm the <code>SIGTERM</code> signal, which delays job termination for 120 seconds. Algorithms can use this 120-second window to save the model artifacts, so the results of training are not lost. </p> <p>The training algorithms provided by SageMaker automatically save the intermediate results of a model training job when possible. This attempt to save artifacts is only a best effort case as model might not be in a state from which it can be saved. For example, if training has just started, the model might not be ready to save. When saved, this intermediate data is a valid model artifact. You can use it to create a model with <code>CreateModel</code>.</p> <note> <p>The Neural Topic Model (NTM) currently does not support saving intermediate model artifacts. When training NTMs, make sure that the maximum runtime is sufficient for the training job to complete.</p> </note>
|
|
162
|
+
* @public
|
|
163
|
+
*/
|
|
164
|
+
StoppingCondition: StoppingCondition | undefined;
|
|
165
|
+
/**
|
|
166
|
+
* <p>A list of key-value pairs associated with the optimization job. For more information, see <a href="https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html">Tagging Amazon Web Services resources</a> in the <i>Amazon Web Services General Reference Guide</i>.</p>
|
|
167
|
+
* @public
|
|
168
|
+
*/
|
|
169
|
+
Tags?: Tag[] | undefined;
|
|
170
|
+
/**
|
|
171
|
+
* <p>A VPC in Amazon VPC that your optimized model has access to.</p>
|
|
172
|
+
* @public
|
|
173
|
+
*/
|
|
174
|
+
VpcConfig?: OptimizationVpcConfig | undefined;
|
|
175
|
+
}
|
|
4
176
|
/**
|
|
5
177
|
* @public
|
|
6
178
|
*/
|
|
@@ -4358,6 +4530,11 @@ export interface DescribeClusterResponse {
|
|
|
4358
4530
|
* @public
|
|
4359
4531
|
*/
|
|
4360
4532
|
RestrictedInstanceGroups?: ClusterRestrictedInstanceGroupDetails[] | undefined;
|
|
4533
|
+
/**
|
|
4534
|
+
* <p>The configuration for the restricted instance groups (RIG) in the SageMaker HyperPod cluster.</p>
|
|
4535
|
+
* @public
|
|
4536
|
+
*/
|
|
4537
|
+
RestrictedInstanceGroupsConfig?: ClusterRestrictedInstanceGroupsConfigOutput | undefined;
|
|
4361
4538
|
/**
|
|
4362
4539
|
* <p>Specifies an Amazon Virtual Private Cloud (VPC) that your SageMaker jobs, hosted models, and compute resources have access to. You can control access to and from your resources by configuring a VPC. For more information, see <a href="https://docs.aws.amazon.com/sagemaker/latest/dg/infrastructure-give-access.html">Give SageMaker Access to Resources in your Amazon VPC</a>. </p>
|
|
4363
4540
|
* @public
|
|
@@ -7044,102 +7221,3 @@ export interface InferenceComponentPlacementStatus {
|
|
|
7044
7221
|
*/
|
|
7045
7222
|
CurrentCopyCount: number | undefined;
|
|
7046
7223
|
}
|
|
7047
|
-
/**
|
|
7048
|
-
* <p>Details about the runtime settings for the model that is deployed with the inference component.</p>
|
|
7049
|
-
* @public
|
|
7050
|
-
*/
|
|
7051
|
-
export interface InferenceComponentRuntimeConfigSummary {
|
|
7052
|
-
/**
|
|
7053
|
-
* <p>The number of runtime copies of the model container that you requested to deploy with the inference component.</p>
|
|
7054
|
-
* @public
|
|
7055
|
-
*/
|
|
7056
|
-
DesiredCopyCount?: number | undefined;
|
|
7057
|
-
/**
|
|
7058
|
-
* <p>The number of runtime copies of the model container that are currently deployed.</p>
|
|
7059
|
-
* @public
|
|
7060
|
-
*/
|
|
7061
|
-
CurrentCopyCount?: number | undefined;
|
|
7062
|
-
/**
|
|
7063
|
-
* <p>The placement status of the inference component across instance types. Shows how the inference component copies are distributed across instance types.</p>
|
|
7064
|
-
* @public
|
|
7065
|
-
*/
|
|
7066
|
-
PlacementStatus?: InferenceComponentPlacementStatus[] | undefined;
|
|
7067
|
-
}
|
|
7068
|
-
/**
|
|
7069
|
-
* <p>Details about the resources that are deployed with this inference component.</p>
|
|
7070
|
-
* @public
|
|
7071
|
-
*/
|
|
7072
|
-
export interface InferenceComponentContainerSpecificationSummary {
|
|
7073
|
-
/**
|
|
7074
|
-
* <p>Gets the Amazon EC2 Container Registry path of the docker image of the model that is hosted in this <a href="https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_ProductionVariant.html">ProductionVariant</a>.</p> <p>If you used the <code>registry/repository[:tag]</code> form to specify the image path of the primary container when you created the model hosted in this <code>ProductionVariant</code>, the path resolves to a path of the form <code>registry/repository[@digest]</code>. A digest is a hash value that identifies a specific version of an image. For information about Amazon ECR paths, see <a href="https://docs.aws.amazon.com/AmazonECR/latest/userguide/docker-pull-ecr-image.html">Pulling an Image</a> in the <i>Amazon ECR User Guide</i>.</p>
|
|
7075
|
-
* @public
|
|
7076
|
-
*/
|
|
7077
|
-
DeployedImage?: DeployedImage | undefined;
|
|
7078
|
-
/**
|
|
7079
|
-
* <p>The Amazon S3 path where the model artifacts are stored.</p>
|
|
7080
|
-
* @public
|
|
7081
|
-
*/
|
|
7082
|
-
ArtifactUrl?: string | undefined;
|
|
7083
|
-
/**
|
|
7084
|
-
* <p>The environment variables to set in the Docker container.</p>
|
|
7085
|
-
* @public
|
|
7086
|
-
*/
|
|
7087
|
-
Environment?: Record<string, string> | undefined;
|
|
7088
|
-
}
|
|
7089
|
-
/**
|
|
7090
|
-
* <p>Settings that affect how the inference component caches data.</p>
|
|
7091
|
-
* @public
|
|
7092
|
-
*/
|
|
7093
|
-
export interface InferenceComponentDataCacheConfigSummary {
|
|
7094
|
-
/**
|
|
7095
|
-
* <p>Indicates whether the inference component caches model artifacts as part of the auto scaling process.</p>
|
|
7096
|
-
* @public
|
|
7097
|
-
*/
|
|
7098
|
-
EnableCaching: boolean | undefined;
|
|
7099
|
-
}
|
|
7100
|
-
/**
|
|
7101
|
-
* <p>Details about the resources that are deployed with this inference component.</p>
|
|
7102
|
-
* @public
|
|
7103
|
-
*/
|
|
7104
|
-
export interface InferenceComponentSpecificationSummary {
|
|
7105
|
-
/**
|
|
7106
|
-
* <p>The ML compute instance type associated with this inference component specification.</p>
|
|
7107
|
-
* @public
|
|
7108
|
-
*/
|
|
7109
|
-
InstanceType?: ProductionVariantInstanceType | undefined;
|
|
7110
|
-
/**
|
|
7111
|
-
* <p>The name of the SageMaker AI model object that is deployed with the inference component.</p>
|
|
7112
|
-
* @public
|
|
7113
|
-
*/
|
|
7114
|
-
ModelName?: string | undefined;
|
|
7115
|
-
/**
|
|
7116
|
-
* <p>Details about the container that provides the runtime environment for the model that is deployed with the inference component.</p>
|
|
7117
|
-
* @public
|
|
7118
|
-
*/
|
|
7119
|
-
Container?: InferenceComponentContainerSpecificationSummary | undefined;
|
|
7120
|
-
/**
|
|
7121
|
-
* <p>Settings that take effect while the model container starts up.</p>
|
|
7122
|
-
* @public
|
|
7123
|
-
*/
|
|
7124
|
-
StartupParameters?: InferenceComponentStartupParameters | undefined;
|
|
7125
|
-
/**
|
|
7126
|
-
* <p>The compute resources allocated to run the model, plus any adapter models, that you assign to the inference component.</p>
|
|
7127
|
-
* @public
|
|
7128
|
-
*/
|
|
7129
|
-
ComputeResourceRequirements?: InferenceComponentComputeResourceRequirements | undefined;
|
|
7130
|
-
/**
|
|
7131
|
-
* <p>The name of the base inference component that contains this inference component.</p>
|
|
7132
|
-
* @public
|
|
7133
|
-
*/
|
|
7134
|
-
BaseInferenceComponentName?: string | undefined;
|
|
7135
|
-
/**
|
|
7136
|
-
* <p>Settings that affect how the inference component caches data.</p>
|
|
7137
|
-
* @public
|
|
7138
|
-
*/
|
|
7139
|
-
DataCacheConfig?: InferenceComponentDataCacheConfigSummary | undefined;
|
|
7140
|
-
/**
|
|
7141
|
-
* <p>The scheduling configuration that determines how inference component copies are placed across available instances when copies are added or removed.</p>
|
|
7142
|
-
* @public
|
|
7143
|
-
*/
|
|
7144
|
-
SchedulingConfig?: InferenceComponentSchedulingConfig | undefined;
|
|
7145
|
-
}
|