@aws/ml-container-creator 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +31 -137
- package/config/parameter-schema-v2.json +2065 -0
- package/package.json +6 -3
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/models.json +182 -26
- package/src/app.js +6 -389
- package/src/lib/bootstrap-command-handler.js +75 -1078
- package/src/lib/bootstrap-profile-manager.js +634 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/config-loader.js +405 -0
- package/src/lib/config-manager.js +59 -1668
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +140 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +471 -0
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +70 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +398 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
- package/config/parameter-schema.json +0 -88
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Feature prompt definitions.
|
|
6
|
+
* Covers: module prompts (sample model, test types), LoRA, benchmark.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const modulePrompts = [
|
|
10
|
+
{
|
|
11
|
+
type: 'confirm',
|
|
12
|
+
name: 'includeSampleModel',
|
|
13
|
+
message: 'Include sample Abalone classifier?',
|
|
14
|
+
default: true,
|
|
15
|
+
when: (answers) => {
|
|
16
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
17
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
18
|
+
|
|
19
|
+
// Never for transformers
|
|
20
|
+
if (architecture === 'transformers') {
|
|
21
|
+
return false;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Never for diffusors (diffusion models cannot be trained inline)
|
|
25
|
+
if (architecture === 'diffusors') {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// For Triton, check if backend supports sample model
|
|
30
|
+
if (architecture === 'triton') {
|
|
31
|
+
// Triton LLM backends don't support sample model
|
|
32
|
+
if (backend === 'vllm' || backend === 'tensorrtllm' || backend === 'pytorch') {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
// Other Triton backends support sample model
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// For http architecture, always show
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
type: 'checkbox',
|
|
45
|
+
name: 'testTypes',
|
|
46
|
+
message: 'Test type?',
|
|
47
|
+
choices: (answers) => {
|
|
48
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
49
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
50
|
+
|
|
51
|
+
// Transformers and Triton LLM backends only support hosted endpoint tests
|
|
52
|
+
if (architecture === 'transformers') {
|
|
53
|
+
return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
|
|
54
|
+
}
|
|
55
|
+
if (architecture === 'diffusors') {
|
|
56
|
+
return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
|
|
57
|
+
}
|
|
58
|
+
if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
|
|
59
|
+
return ['hosted-model-endpoint'];
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'];
|
|
63
|
+
},
|
|
64
|
+
default: (answers) => {
|
|
65
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
66
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
67
|
+
|
|
68
|
+
if (architecture === 'transformers') {
|
|
69
|
+
return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
|
|
70
|
+
}
|
|
71
|
+
if (architecture === 'diffusors') {
|
|
72
|
+
return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
|
|
73
|
+
}
|
|
74
|
+
if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
|
|
75
|
+
return ['hosted-model-endpoint'];
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'];
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
];
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* LoRA adapter prompts for multi-adapter serving configuration.
|
|
85
|
+
* Only shown when architecture is transformers AND model server is vllm, sglang, or djl-lmi.
|
|
86
|
+
* Requirements: 1.1, 1.2, 1.4
|
|
87
|
+
*/
|
|
88
|
+
const loraPrompts = [
|
|
89
|
+
{
|
|
90
|
+
type: 'confirm',
|
|
91
|
+
name: 'enableLora',
|
|
92
|
+
message: 'Enable LoRA adapter serving?',
|
|
93
|
+
default: false,
|
|
94
|
+
when: (answers) => {
|
|
95
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
96
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
97
|
+
if (architecture !== 'transformers') return false;
|
|
98
|
+
const loraCapableServers = ['vllm', 'sglang', 'djl-lmi', 'lmi', 'djl'];
|
|
99
|
+
return loraCapableServers.includes(backend);
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
type: 'number',
|
|
104
|
+
name: 'maxLoras',
|
|
105
|
+
message: 'Maximum concurrent LoRA adapters in GPU memory:',
|
|
106
|
+
default: 30,
|
|
107
|
+
when: (answers) => answers.enableLora === true
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
type: 'number',
|
|
111
|
+
name: 'maxLoraRank',
|
|
112
|
+
message: 'Maximum LoRA rank:',
|
|
113
|
+
default: 64,
|
|
114
|
+
when: (answers) => answers.enableLora === true
|
|
115
|
+
}
|
|
116
|
+
];
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Benchmark prompts for SageMaker AI Benchmarking (NVIDIA AIPerf)
|
|
120
|
+
* Sub-prompts shown when 'sagemaker-ai-automated-benchmarking' is selected in testTypes.
|
|
121
|
+
* Requirements: 2.1, 2.2, 2.3, 2.4, 2.5
|
|
122
|
+
*/
|
|
123
|
+
const benchmarkPrompts = [
|
|
124
|
+
{
|
|
125
|
+
type: 'number',
|
|
126
|
+
name: 'benchmarkConcurrency',
|
|
127
|
+
message: 'Concurrent requests for benchmark:',
|
|
128
|
+
default: 10,
|
|
129
|
+
when: (answers) => answers.includeBenchmark === true
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
type: 'number',
|
|
133
|
+
name: 'benchmarkInputTokensMean',
|
|
134
|
+
message: 'Mean input tokens per request:',
|
|
135
|
+
default: 550,
|
|
136
|
+
when: (answers) => answers.includeBenchmark === true
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
type: 'number',
|
|
140
|
+
name: 'benchmarkOutputTokensMean',
|
|
141
|
+
message: 'Mean output tokens per request:',
|
|
142
|
+
default: 150,
|
|
143
|
+
when: (answers) => answers.includeBenchmark === true
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
type: 'confirm',
|
|
147
|
+
name: 'benchmarkStreaming',
|
|
148
|
+
message: 'Enable streaming for benchmark?',
|
|
149
|
+
default: true,
|
|
150
|
+
when: (answers) => answers.includeBenchmark === true
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
type: 'input',
|
|
154
|
+
name: 'benchmarkRequestCount',
|
|
155
|
+
message: 'Total request count (leave empty for service default):',
|
|
156
|
+
default: '',
|
|
157
|
+
when: (answers) => answers.includeBenchmark === true
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
type: 'input',
|
|
161
|
+
name: 'benchmarkS3OutputPath',
|
|
162
|
+
message: 'Benchmark results S3 path (leave empty for auto-created bucket):',
|
|
163
|
+
default: '',
|
|
164
|
+
when: (answers) => answers.includeBenchmark === true
|
|
165
|
+
}
|
|
166
|
+
];
|
|
167
|
+
|
|
168
|
+
export {
|
|
169
|
+
modulePrompts,
|
|
170
|
+
loraPrompts,
|
|
171
|
+
benchmarkPrompts
|
|
172
|
+
};
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Barrel file — re-exports all prompt definitions from phase-based modules.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export {
|
|
9
|
+
deploymentConfigPrompts,
|
|
10
|
+
frameworkPrompts,
|
|
11
|
+
enginePrompts,
|
|
12
|
+
frameworkVersionPrompts,
|
|
13
|
+
frameworkProfilePrompts,
|
|
14
|
+
modelFormatPrompts,
|
|
15
|
+
modelServerPrompts,
|
|
16
|
+
modelLoadStrategyPrompts,
|
|
17
|
+
modelProfilePrompts,
|
|
18
|
+
hfTokenPrompts,
|
|
19
|
+
ngcApiKeyPrompts
|
|
20
|
+
} from './model-prompts.js';
|
|
21
|
+
|
|
22
|
+
export {
|
|
23
|
+
modulePrompts,
|
|
24
|
+
loraPrompts,
|
|
25
|
+
benchmarkPrompts
|
|
26
|
+
} from './feature-prompts.js';
|
|
27
|
+
|
|
28
|
+
export {
|
|
29
|
+
infrastructurePrompts,
|
|
30
|
+
infraRegionAndTargetPrompts,
|
|
31
|
+
infraExistingEndpointPrompts,
|
|
32
|
+
infraInstancePrompts,
|
|
33
|
+
infraAsyncPrompts,
|
|
34
|
+
infraBatchTransformPrompts,
|
|
35
|
+
infraHyperPodPrompts,
|
|
36
|
+
infraBuildPrompts,
|
|
37
|
+
baseImageSearchPrompts,
|
|
38
|
+
baseImagePrompts,
|
|
39
|
+
formatImageChoices,
|
|
40
|
+
filterByCudaGeneration,
|
|
41
|
+
getInstanceCudaGeneration,
|
|
42
|
+
instanceCatalogRaw
|
|
43
|
+
} from './infrastructure-prompts.js';
|
|
44
|
+
|
|
45
|
+
export {
|
|
46
|
+
projectPrompts,
|
|
47
|
+
destinationPrompts
|
|
48
|
+
} from './project-prompts.js';
|