@aws/ml-container-creator 0.9.1 ā 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +2049 -0
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -68
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +138 -138
- package/servers/instance-sizer/lib/instance-ranker.js +76 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/catalogs/models.json +182 -26
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +4 -390
- package/src/lib/bootstrap-command-handler.js +710 -1148
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +641 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +408 -0
- package/src/lib/config-manager.js +66 -1685
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +149 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +315 -311
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +71 -71
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/path-prover-brain.js +607 -0
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +82 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +422 -0
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +149 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/config +108 -5
- package/templates/do/deploy.d/managed-inference.ejs +192 -11
- package/templates/do/optimize +106 -37
- package/templates/do/register +89 -0
- package/templates/do/test +13 -0
- package/templates/do/tune +378 -59
- package/templates/do/validate +44 -4
- package/config/parameter-schema.json +0 -88
|
@@ -0,0 +1,552 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Model & Framework prompt definitions.
|
|
6
|
+
* Covers: deployment config, framework, engine, version, profile, format,
|
|
7
|
+
* model server, model load strategy, model profile, HF token, NGC API key.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Phase 1: Core ML configuration (moved to first)
|
|
12
|
+
* Flattened deployment configuration combining architecture + backend
|
|
13
|
+
* Requirements: 3.1, 3.2, 16.1, 16.2, 16.3, 16.4, 16.8, 16.9
|
|
14
|
+
*/
|
|
15
|
+
const deploymentConfigPrompts = [
|
|
16
|
+
{
|
|
17
|
+
type: 'list',
|
|
18
|
+
name: 'deploymentConfig',
|
|
19
|
+
message: 'Select deployment configuration:',
|
|
20
|
+
choices: [
|
|
21
|
+
{ type: 'separator', separator: 'āā Large Language Models āā' },
|
|
22
|
+
{
|
|
23
|
+
name: 'Transformers with vLLM',
|
|
24
|
+
value: 'transformers-vllm',
|
|
25
|
+
short: 'transformers-vllm'
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: 'Transformers with SGLang',
|
|
29
|
+
value: 'transformers-sglang',
|
|
30
|
+
short: 'transformers-sglang'
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: 'Transformers with TensorRT-LLM',
|
|
34
|
+
value: 'transformers-tensorrt-llm',
|
|
35
|
+
short: 'transformers-tensorrt-llm'
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
name: 'Transformers with LMI (Large Model Inference)',
|
|
39
|
+
value: 'transformers-lmi',
|
|
40
|
+
short: 'transformers-lmi'
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
name: 'Transformers with DJL (Deep Java Library)',
|
|
44
|
+
value: 'transformers-djl',
|
|
45
|
+
short: 'transformers-djl'
|
|
46
|
+
},
|
|
47
|
+
{ type: 'separator', separator: 'āā HTTP Serving āā' },
|
|
48
|
+
{
|
|
49
|
+
name: 'HTTP with Flask',
|
|
50
|
+
value: 'http-flask',
|
|
51
|
+
short: 'http-flask'
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
name: 'HTTP with FastAPI',
|
|
55
|
+
value: 'http-fastapi',
|
|
56
|
+
short: 'http-fastapi'
|
|
57
|
+
},
|
|
58
|
+
{ type: 'separator', separator: 'āā NVIDIA Triton Inference Server āā' },
|
|
59
|
+
{
|
|
60
|
+
name: 'Triton FIL (XGBoost, LightGBM)',
|
|
61
|
+
value: 'triton-fil',
|
|
62
|
+
short: 'triton-fil'
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
name: 'Triton ONNX Runtime',
|
|
66
|
+
value: 'triton-onnxruntime',
|
|
67
|
+
short: 'triton-onnxruntime'
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
name: 'Triton TensorFlow',
|
|
71
|
+
value: 'triton-tensorflow',
|
|
72
|
+
short: 'triton-tensorflow'
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: 'Triton PyTorch',
|
|
76
|
+
value: 'triton-pytorch',
|
|
77
|
+
short: 'triton-pytorch'
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
name: 'Triton vLLM',
|
|
81
|
+
value: 'triton-vllm',
|
|
82
|
+
short: 'triton-vllm'
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
name: 'Triton TensorRT-LLM',
|
|
86
|
+
value: 'triton-tensorrtllm',
|
|
87
|
+
short: 'triton-tensorrtllm'
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
name: 'Triton Python Backend',
|
|
91
|
+
value: 'triton-python',
|
|
92
|
+
short: 'triton-python'
|
|
93
|
+
},
|
|
94
|
+
{ type: 'separator', separator: 'āā Diffusion Models āā' },
|
|
95
|
+
{
|
|
96
|
+
name: 'Diffusors with vLLM Omni',
|
|
97
|
+
value: 'diffusors-vllm-omni',
|
|
98
|
+
short: 'diffusors-vllm-omni'
|
|
99
|
+
},
|
|
100
|
+
{ type: 'separator', separator: 'āā AWS Marketplace āā' },
|
|
101
|
+
{
|
|
102
|
+
name: 'Marketplace Model Package',
|
|
103
|
+
value: 'marketplace',
|
|
104
|
+
short: 'marketplace'
|
|
105
|
+
}
|
|
106
|
+
]
|
|
107
|
+
}
|
|
108
|
+
];
|
|
109
|
+
|
|
110
|
+
// Keep legacy frameworkPrompts for backward compatibility (deprecated)
|
|
111
|
+
const frameworkPrompts = deploymentConfigPrompts;
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Engine selection prompt for http architecture
|
|
115
|
+
* Requirements: 3.7
|
|
116
|
+
*/
|
|
117
|
+
const enginePrompts = [
|
|
118
|
+
{
|
|
119
|
+
type: 'list',
|
|
120
|
+
name: 'engine',
|
|
121
|
+
message: 'Select ML engine:',
|
|
122
|
+
choices: [
|
|
123
|
+
{ name: 'scikit-learn', value: 'sklearn' },
|
|
124
|
+
{ name: 'XGBoost', value: 'xgboost' },
|
|
125
|
+
{ name: 'TensorFlow', value: 'tensorflow' }
|
|
126
|
+
],
|
|
127
|
+
when: (answers) => {
|
|
128
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
129
|
+
return architecture === 'http';
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
];
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Framework version selection prompts (for registry system)
|
|
136
|
+
* Requirements: 2.1, 2.6, 8.2, 8.3
|
|
137
|
+
*/
|
|
138
|
+
const frameworkVersionPrompts = [
|
|
139
|
+
{
|
|
140
|
+
type: 'list',
|
|
141
|
+
name: 'frameworkVersion',
|
|
142
|
+
message: (answers) => `Which version of ${answers.framework} are you using?`,
|
|
143
|
+
choices: (answers) => {
|
|
144
|
+
// Choices will be populated by PromptRunner with registry data
|
|
145
|
+
return answers._frameworkVersionChoices || [];
|
|
146
|
+
},
|
|
147
|
+
when: (answers) => {
|
|
148
|
+
// Only show if we have version choices available
|
|
149
|
+
return answers._frameworkVersionChoices && answers._frameworkVersionChoices.length > 0;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
];
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Framework profile selection prompts (for registry system)
|
|
156
|
+
* Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.10
|
|
157
|
+
*/
|
|
158
|
+
const frameworkProfilePrompts = [
|
|
159
|
+
{
|
|
160
|
+
type: 'list',
|
|
161
|
+
name: 'frameworkProfile',
|
|
162
|
+
message: 'Select a framework configuration profile:',
|
|
163
|
+
choices: (answers) => {
|
|
164
|
+
// Choices will be populated by PromptRunner with registry data
|
|
165
|
+
return answers._frameworkProfileChoices || [];
|
|
166
|
+
},
|
|
167
|
+
when: (answers) => {
|
|
168
|
+
// Only show if we have profile choices available
|
|
169
|
+
return answers._frameworkProfileChoices && answers._frameworkProfileChoices.length > 0;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
];
|
|
173
|
+
|
|
174
|
+
const modelFormatPrompts = [
|
|
175
|
+
{
|
|
176
|
+
type: 'list',
|
|
177
|
+
name: 'modelFormat',
|
|
178
|
+
message: 'In which format is your model serialized?',
|
|
179
|
+
choices: (answers) => {
|
|
180
|
+
// Derive architecture from deploymentConfig
|
|
181
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
182
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
183
|
+
|
|
184
|
+
// For http architecture, use engine to determine formats
|
|
185
|
+
if (architecture === 'http') {
|
|
186
|
+
const engine = answers.engine;
|
|
187
|
+
const formatMap = {
|
|
188
|
+
'xgboost': ['json', 'model', 'ubj'],
|
|
189
|
+
'sklearn': ['pkl', 'joblib'],
|
|
190
|
+
'tensorflow': ['keras', 'h5', 'SavedModel']
|
|
191
|
+
};
|
|
192
|
+
return formatMap[engine] || [];
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// For triton architecture, use backend-specific formats
|
|
196
|
+
if (architecture === 'triton') {
|
|
197
|
+
// FIL backend has multiple format choices
|
|
198
|
+
if (backend === 'fil') {
|
|
199
|
+
return ['xgboost_json', 'xgboost_ubj', 'lightgbm_txt'];
|
|
200
|
+
}
|
|
201
|
+
// Python backend has multiple format choices
|
|
202
|
+
if (backend === 'python') {
|
|
203
|
+
return ['pkl', 'joblib', 'custom'];
|
|
204
|
+
}
|
|
205
|
+
// Other Triton backends have auto-set formats (handled in when clause)
|
|
206
|
+
return [];
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Legacy support for old format (should not be reached with new configs)
|
|
210
|
+
const framework = answers.framework || architecture;
|
|
211
|
+
const formatMap = {
|
|
212
|
+
'xgboost': ['json', 'model', 'ubj'],
|
|
213
|
+
'sklearn': ['pkl', 'joblib'],
|
|
214
|
+
'tensorflow': ['keras', 'h5', 'SavedModel']
|
|
215
|
+
};
|
|
216
|
+
return formatMap[framework] || [];
|
|
217
|
+
},
|
|
218
|
+
when: answers => {
|
|
219
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
220
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
221
|
+
|
|
222
|
+
// Skip for transformers (they use HF Hub)
|
|
223
|
+
if (architecture === 'transformers') {
|
|
224
|
+
return false;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Skip for diffusors (they use HF Hub)
|
|
228
|
+
if (architecture === 'diffusors') {
|
|
229
|
+
return false;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// For http architecture, always show
|
|
233
|
+
if (architecture === 'http') {
|
|
234
|
+
return true;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// For triton architecture, only show for backends with multiple format choices
|
|
238
|
+
if (architecture === 'triton') {
|
|
239
|
+
// FIL and Python backends have multiple format choices
|
|
240
|
+
if (backend === 'fil' || backend === 'python') {
|
|
241
|
+
return true;
|
|
242
|
+
}
|
|
243
|
+
// Other backends have auto-set formats
|
|
244
|
+
return false;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Legacy support
|
|
248
|
+
const framework = answers.framework || architecture;
|
|
249
|
+
return framework !== 'transformers';
|
|
250
|
+
}
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
type: 'list',
|
|
254
|
+
name: 'modelName',
|
|
255
|
+
message: 'Which model do you want to use?',
|
|
256
|
+
choices: (answers) => {
|
|
257
|
+
// Use MCP model-picker choices when available
|
|
258
|
+
if (answers._mcpModelChoices && answers._mcpModelChoices.length > 0) {
|
|
259
|
+
return [...answers._mcpModelChoices, 'Custom (enter manually)'];
|
|
260
|
+
}
|
|
261
|
+
// Fallback to hardcoded defaults based on architecture
|
|
262
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
263
|
+
if (architecture === 'diffusors') {
|
|
264
|
+
return [
|
|
265
|
+
'stabilityai/stable-diffusion-3.5-medium',
|
|
266
|
+
'black-forest-labs/FLUX.1-schnell',
|
|
267
|
+
'black-forest-labs/FLUX.1-dev',
|
|
268
|
+
'Custom (enter manually)'
|
|
269
|
+
];
|
|
270
|
+
}
|
|
271
|
+
return [
|
|
272
|
+
{ type: 'separator', separator: 'āā Meta Llama āā' },
|
|
273
|
+
'meta-llama/Llama-3.2-1B-Instruct',
|
|
274
|
+
'meta-llama/Llama-3.2-3B-Instruct',
|
|
275
|
+
'meta-llama/Llama-3.1-8B-Instruct',
|
|
276
|
+
'meta-llama/Llama-3.3-70B-Instruct',
|
|
277
|
+
{ type: 'separator', separator: 'āā Qwen (Alibaba) āā' },
|
|
278
|
+
'Qwen/Qwen3-0.6B',
|
|
279
|
+
'Qwen/Qwen3-1.7B',
|
|
280
|
+
'Qwen/Qwen3-4B',
|
|
281
|
+
'Qwen/Qwen3-8B',
|
|
282
|
+
'Qwen/Qwen3-14B',
|
|
283
|
+
'Qwen/Qwen3-32B',
|
|
284
|
+
'Qwen/Qwen2.5-7B-Instruct',
|
|
285
|
+
'Qwen/Qwen2.5-14B-Instruct',
|
|
286
|
+
'Qwen/Qwen2.5-32B-Instruct',
|
|
287
|
+
'Qwen/Qwen2.5-72B-Instruct',
|
|
288
|
+
{ type: 'separator', separator: 'āā DeepSeek āā' },
|
|
289
|
+
'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B',
|
|
290
|
+
'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B',
|
|
291
|
+
'deepseek-ai/DeepSeek-R1-Distill-Qwen-14B',
|
|
292
|
+
'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
|
|
293
|
+
'deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
|
|
294
|
+
'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
|
|
295
|
+
{ type: 'separator', separator: 'āā OpenAI āā' },
|
|
296
|
+
'openai/gpt-oss-20b',
|
|
297
|
+
'openai/gpt-oss-120b',
|
|
298
|
+
{ type: 'separator', separator: 'āāāāāāāāāāāāāā' },
|
|
299
|
+
'Custom (enter manually)'
|
|
300
|
+
];
|
|
301
|
+
},
|
|
302
|
+
default: (answers) => {
|
|
303
|
+
if (answers._mcpModelChoices && answers._mcpModelChoices.length > 0) {
|
|
304
|
+
return answers._mcpModelChoices[0];
|
|
305
|
+
}
|
|
306
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
307
|
+
if (architecture === 'diffusors') {
|
|
308
|
+
return 'stabilityai/stable-diffusion-3.5-medium';
|
|
309
|
+
}
|
|
310
|
+
return 'meta-llama/Llama-3.1-8B-Instruct';
|
|
311
|
+
},
|
|
312
|
+
when: answers => {
|
|
313
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
314
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
315
|
+
|
|
316
|
+
// Show for transformers architecture
|
|
317
|
+
if (architecture === 'transformers') {
|
|
318
|
+
return true;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Show for diffusors architecture (reuse HuggingFace model selection)
|
|
322
|
+
if (architecture === 'diffusors') {
|
|
323
|
+
return true;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// Show for Triton LLM backends (vllm, tensorrtllm)
|
|
327
|
+
if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
|
|
328
|
+
return true;
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
return false;
|
|
332
|
+
}
|
|
333
|
+
},
|
|
334
|
+
{
|
|
335
|
+
type: 'input',
|
|
336
|
+
name: 'customModelName',
|
|
337
|
+
message: 'Enter the model path:',
|
|
338
|
+
validate: (input) => {
|
|
339
|
+
if (!input || input.trim() === '') {
|
|
340
|
+
return 'Model name is required';
|
|
341
|
+
}
|
|
342
|
+
// Basic validation - must contain a slash (org/model, s3://path, etc.)
|
|
343
|
+
if (!input.includes('/')) {
|
|
344
|
+
return 'Please use the full model path (e.g., microsoft/DialoGPT-medium, s3://bucket/model, registry://my-package)';
|
|
345
|
+
}
|
|
346
|
+
return true;
|
|
347
|
+
},
|
|
348
|
+
when: answers => {
|
|
349
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
350
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
351
|
+
|
|
352
|
+
// Show for transformers with custom model selection
|
|
353
|
+
if (architecture === 'transformers' && answers.modelName === 'Custom (enter manually)') {
|
|
354
|
+
return true;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// Show for diffusors with custom model selection
|
|
358
|
+
if (architecture === 'diffusors' && answers.modelName === 'Custom (enter manually)') {
|
|
359
|
+
return true;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// Show for Triton LLM backends with custom model selection
|
|
363
|
+
if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm') && answers.modelName === 'Custom (enter manually)') {
|
|
364
|
+
return true;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
return false;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
];
|
|
371
|
+
|
|
372
|
+
// Model server prompts are now deprecated - modelServer is derived from deploymentConfig
|
|
373
|
+
const modelServerPrompts = [];
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* Model loading strategy prompt
|
|
377
|
+
* Asks user whether to bake model into image at build time or download at container startup.
|
|
378
|
+
* Requirements: 13.1, 13.2, 13.3, 13.4, 13.5
|
|
379
|
+
*/
|
|
380
|
+
const modelLoadStrategyPrompts = [
|
|
381
|
+
{
|
|
382
|
+
type: 'list',
|
|
383
|
+
name: 'modelLoadStrategy',
|
|
384
|
+
message: 'How should the model be loaded?\n'
|
|
385
|
+
+ ' Build-time: Bakes model into image (larger image, faster startup)\n'
|
|
386
|
+
+ ' Runtime: Downloads at container startup (smaller image, slower startup)',
|
|
387
|
+
choices: [
|
|
388
|
+
{ name: 'Runtime (download at startup)', value: 'runtime' },
|
|
389
|
+
{ name: 'Build-time (bake into image) [EXPERIMENTAL]', value: 'build-time' }
|
|
390
|
+
],
|
|
391
|
+
default: 'runtime',
|
|
392
|
+
when: (answers) => {
|
|
393
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
394
|
+
return architecture === 'transformers' || architecture === 'diffusors';
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
];
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Model profile selection prompts (for registry system)
|
|
401
|
+
* Requirements: 12.1, 12.2, 12.3, 12.4, 12.5, 12.10
|
|
402
|
+
*/
|
|
403
|
+
const modelProfilePrompts = [
|
|
404
|
+
{
|
|
405
|
+
type: 'list',
|
|
406
|
+
name: 'modelProfile',
|
|
407
|
+
message: 'Select a model configuration profile:',
|
|
408
|
+
choices: (answers) => {
|
|
409
|
+
// Choices will be populated by PromptRunner with registry data
|
|
410
|
+
return answers._modelProfileChoices || [];
|
|
411
|
+
},
|
|
412
|
+
when: (answers) => {
|
|
413
|
+
// Only show if we have profile choices available
|
|
414
|
+
return answers._modelProfileChoices && answers._modelProfileChoices.length > 0;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
];
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* List of example model IDs that don't require HF_TOKEN prompts
|
|
421
|
+
* These are public models that don't need authentication
|
|
422
|
+
*/
|
|
423
|
+
// eslint-disable-next-line no-unused-vars -- reference list for future use
|
|
424
|
+
const EXAMPLE_MODEL_IDS = [
|
|
425
|
+
'meta-llama/Llama-3.1-8B-Instruct',
|
|
426
|
+
'meta-llama/Llama-3.2-3B-Instruct',
|
|
427
|
+
'Qwen/Qwen3-8B',
|
|
428
|
+
'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B',
|
|
429
|
+
'openai/gpt-oss-20b'
|
|
430
|
+
];
|
|
431
|
+
|
|
432
|
+
const hfTokenPrompts = [
|
|
433
|
+
{
|
|
434
|
+
type: 'input',
|
|
435
|
+
name: 'hfToken',
|
|
436
|
+
message: 'HuggingFace token (enter token, "$HF_TOKEN" for env var, or leave empty):',
|
|
437
|
+
when: (answers) => {
|
|
438
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
439
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
440
|
+
|
|
441
|
+
// Prompt for transformers architecture
|
|
442
|
+
const isTransformers = architecture === 'transformers';
|
|
443
|
+
|
|
444
|
+
// Prompt for diffusors architecture (uses HuggingFace Hub)
|
|
445
|
+
const isDiffusors = architecture === 'diffusors';
|
|
446
|
+
|
|
447
|
+
// Prompt for Triton LLM backends (vllm, tensorrtllm)
|
|
448
|
+
// Requirements: 9.1, 9.2
|
|
449
|
+
const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
|
|
450
|
+
|
|
451
|
+
if (!isTransformers && !isDiffusors && !isTritonLlm) {
|
|
452
|
+
return false;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// Skip HF token prompt for non-HuggingFace model sources
|
|
456
|
+
// (S3, Registry models don't need HF auth)
|
|
457
|
+
const modelSource = answers.modelSource;
|
|
458
|
+
if (modelSource && modelSource !== 'huggingface') {
|
|
459
|
+
return false;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Display security warning before prompting
|
|
463
|
+
console.log('\nš HuggingFace Authentication');
|
|
464
|
+
console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
|
|
465
|
+
console.log('ā ļø Security Note: The token will be baked into the Docker image.');
|
|
466
|
+
console.log(' Anyone with access to the image can extract the token using \'docker inspect\'.');
|
|
467
|
+
console.log(' For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.');
|
|
468
|
+
console.log(' This keeps the token out of the image and allows rotation without rebuilding.\n');
|
|
469
|
+
|
|
470
|
+
return true;
|
|
471
|
+
},
|
|
472
|
+
validate: (input) => {
|
|
473
|
+
// Empty is valid (not all models require auth)
|
|
474
|
+
if (!input || input.trim() === '') {
|
|
475
|
+
return true;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
// $HF_TOKEN reference is valid
|
|
479
|
+
if (input.trim() === '$HF_TOKEN') {
|
|
480
|
+
return true;
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// Direct token should start with hf_ (warning only, not blocking)
|
|
484
|
+
if (!input.startsWith('hf_')) {
|
|
485
|
+
console.warn('\nā ļø Warning: HuggingFace tokens typically start with "hf_"');
|
|
486
|
+
console.warn(' If this is intentional, you can ignore this warning.');
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
return true; // Always return true (non-blocking validation)
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
];
|
|
493
|
+
|
|
494
|
+
const ngcApiKeyPrompts = [
|
|
495
|
+
{
|
|
496
|
+
type: 'input',
|
|
497
|
+
name: 'ngcApiKey',
|
|
498
|
+
message: 'NVIDIA NGC API key (enter key, "$NGC_API_KEY" for env var, or leave empty):',
|
|
499
|
+
when: (answers) => {
|
|
500
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
501
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
502
|
+
|
|
503
|
+
// Never prompt for NGC key for Triton configs (public images)
|
|
504
|
+
// Requirements: 9.2
|
|
505
|
+
if (architecture === 'triton') {
|
|
506
|
+
return false;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// Never prompt for NGC key for diffusors configs (public Docker Hub images)
|
|
510
|
+
if (architecture === 'diffusors') {
|
|
511
|
+
return false;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Only prompt for transformers-tensorrt-llm
|
|
515
|
+
if (architecture === 'transformers' && backend === 'tensorrt-llm') {
|
|
516
|
+
console.log('\nš NVIDIA NGC Authentication');
|
|
517
|
+
console.log(' TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
|
|
518
|
+
console.log(' 1. Create account at: https://ngc.nvidia.com/');
|
|
519
|
+
console.log(' 2. Generate API key in account settings');
|
|
520
|
+
console.log(' For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
|
|
521
|
+
return true;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
return false;
|
|
525
|
+
},
|
|
526
|
+
validate: (input) => {
|
|
527
|
+
if (!input || input.trim() === '') {
|
|
528
|
+
return true;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
if (input.trim() === '$NGC_API_KEY') {
|
|
532
|
+
return true;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
return true;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
];
|
|
539
|
+
|
|
540
|
+
export {
|
|
541
|
+
deploymentConfigPrompts,
|
|
542
|
+
frameworkPrompts,
|
|
543
|
+
enginePrompts,
|
|
544
|
+
frameworkVersionPrompts,
|
|
545
|
+
frameworkProfilePrompts,
|
|
546
|
+
modelFormatPrompts,
|
|
547
|
+
modelServerPrompts,
|
|
548
|
+
modelLoadStrategyPrompts,
|
|
549
|
+
modelProfilePrompts,
|
|
550
|
+
hfTokenPrompts,
|
|
551
|
+
ngcApiKeyPrompts
|
|
552
|
+
};
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Project prompt definitions.
|
|
6
|
+
* Covers: project name, destination directory.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Generate pseudo-randomized project name based on framework
|
|
11
|
+
* @param {string} framework - The ML framework
|
|
12
|
+
* @returns {string} Generated project name
|
|
13
|
+
*/
|
|
14
|
+
function generateProjectName(framework) {
|
|
15
|
+
const adjectives = [
|
|
16
|
+
'smart', 'fast', 'clever', 'bright', 'swift', 'agile', 'sharp', 'quick',
|
|
17
|
+
'wise', 'keen', 'bold', 'sleek', 'neat', 'cool', 'fresh', 'prime'
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
const frameworkNames = {
|
|
21
|
+
'sklearn': ['sklearn', 'scikit', 'sk'],
|
|
22
|
+
'xgboost': ['xgb', 'xgboost', 'boost'],
|
|
23
|
+
'tensorflow': ['tf', 'tensorflow', 'tensor'],
|
|
24
|
+
'transformers': ['llm', 'transformer', 'gpt', 'bert', 'ai']
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const suffixes = [
|
|
28
|
+
'model', 'predictor', 'classifier', 'engine', 'service', 'api',
|
|
29
|
+
'container', 'deployment', 'inference', 'ml', 'ai', 'bot'
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
// Get random elements
|
|
33
|
+
const adjective = adjectives[Math.floor(Math.random() * adjectives.length)];
|
|
34
|
+
const frameworkName = frameworkNames[framework] ?
|
|
35
|
+
frameworkNames[framework][Math.floor(Math.random() * frameworkNames[framework].length)] :
|
|
36
|
+
'ml';
|
|
37
|
+
const suffix = suffixes[Math.floor(Math.random() * suffixes.length)];
|
|
38
|
+
|
|
39
|
+
return `${adjective}-${frameworkName}-${suffix}`;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const projectPrompts = [
|
|
43
|
+
{
|
|
44
|
+
type: 'input',
|
|
45
|
+
name: 'projectName',
|
|
46
|
+
message: 'What is the Project Name?',
|
|
47
|
+
default: (answers) => {
|
|
48
|
+
// Derive framework from deploymentConfig if not already set
|
|
49
|
+
const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
|
|
50
|
+
return generateProjectName(framework);
|
|
51
|
+
},
|
|
52
|
+
validate: (input) => {
|
|
53
|
+
if (!input || input.length < 2) {
|
|
54
|
+
return 'Project name must be at least 2 characters.';
|
|
55
|
+
}
|
|
56
|
+
if (input.length > 63) {
|
|
57
|
+
return 'Project name must be 63 characters or fewer.';
|
|
58
|
+
}
|
|
59
|
+
if (!/^[a-z0-9][a-z0-9-]*[a-z0-9]$/.test(input)) {
|
|
60
|
+
return 'Project name must be lowercase alphanumeric with hyphens (e.g. "qwen3-0-6b-v1-test"). No uppercase, dots, or underscores.';
|
|
61
|
+
}
|
|
62
|
+
return true;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
];
|
|
66
|
+
|
|
67
|
+
const destinationPrompts = [
|
|
68
|
+
{
|
|
69
|
+
type: 'input',
|
|
70
|
+
name: 'destinationDir',
|
|
71
|
+
message: 'Where will the output directory be?',
|
|
72
|
+
default: (answers) => {
|
|
73
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
|
|
74
|
+
return `./${answers.projectName}-${timestamp}`;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
];
|
|
78
|
+
|
|
79
|
+
export {
|
|
80
|
+
projectPrompts,
|
|
81
|
+
destinationPrompts
|
|
82
|
+
};
|