@aws/ml-container-creator 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +45 -4
- package/config/bootstrap-stack.json +14 -0
- package/infra/ci-harness/package-lock.json +22 -9
- package/package.json +7 -8
- package/servers/base-image-picker/index.js +3 -3
- package/servers/base-image-picker/manifest.json +4 -2
- package/servers/instance-sizer/index.js +564 -0
- package/servers/instance-sizer/lib/instance-ranker.js +270 -0
- package/servers/instance-sizer/lib/model-resolver.js +269 -0
- package/servers/instance-sizer/lib/vram-estimator.js +177 -0
- package/servers/instance-sizer/manifest.json +17 -0
- package/servers/instance-sizer/package.json +15 -0
- package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
- package/servers/{base-image-picker → lib}/catalogs/model-servers.json +302 -254
- package/servers/lib/catalogs/model-sizes.json +131 -0
- package/servers/lib/catalogs/models.json +632 -0
- package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
- package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
- package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
- package/servers/lib/schemas/image-catalog.schema.json +6 -12
- package/servers/lib/schemas/instances.schema.json +29 -0
- package/servers/lib/schemas/model-catalog.schema.json +12 -10
- package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
- package/servers/model-picker/index.js +4 -4
- package/servers/model-picker/manifest.json +2 -3
- package/servers/region-picker/index.js +1 -1
- package/servers/region-picker/manifest.json +1 -1
- package/src/app.js +36 -0
- package/src/lib/architecture-sync.js +171 -0
- package/src/lib/arn-detection.js +22 -0
- package/src/lib/bootstrap-command-handler.js +120 -0
- package/src/lib/cli-handler.js +3 -3
- package/src/lib/config-manager.js +47 -1
- package/src/lib/configuration-manager.js +2 -2
- package/src/lib/cross-cutting-checker.js +460 -0
- package/src/lib/deployment-entry-schema.js +1 -2
- package/src/lib/dry-run-validator.js +78 -0
- package/src/lib/generation-validator.js +102 -0
- package/src/lib/mcp-validator-config.js +89 -0
- package/src/lib/payload-builder.js +153 -0
- package/src/lib/prompt-runner.js +866 -149
- package/src/lib/prompts.js +2 -2
- package/src/lib/registry-command-handler.js +236 -0
- package/src/lib/registry-loader.js +5 -5
- package/src/lib/schema-sync.js +203 -0
- package/src/lib/schema-validation-engine.js +195 -0
- package/src/lib/secret-classification.js +56 -0
- package/src/lib/secrets-command-handler.js +550 -0
- package/src/lib/service-model-parser.js +102 -0
- package/src/lib/validate-runner.js +216 -0
- package/src/lib/validation-report.js +140 -0
- package/src/lib/validators/base-validator.js +36 -0
- package/src/lib/validators/catalog-validator.js +177 -0
- package/src/lib/validators/enum-validator.js +120 -0
- package/src/lib/validators/required-field-validator.js +150 -0
- package/src/lib/validators/type-validator.js +313 -0
- package/src/prompt-adapter.js +3 -2
- package/templates/Dockerfile +1 -1
- package/templates/do/build +37 -5
- package/templates/do/config +15 -3
- package/templates/do/deploy +60 -5
- package/templates/do/logs +18 -3
- package/templates/do/run +15 -1
- package/templates/do/validate +61 -0
- package/servers/instance-recommender/LICENSE +0 -202
- package/servers/instance-recommender/index.js +0 -284
- package/servers/instance-recommender/manifest.json +0 -16
- package/servers/instance-recommender/package.json +0 -15
- /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
- /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
- /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
- /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
package/src/lib/prompt-runner.js
CHANGED
|
@@ -17,8 +17,6 @@ import {
|
|
|
17
17
|
modelServerPrompts,
|
|
18
18
|
modelLoadStrategyPrompts,
|
|
19
19
|
modelProfilePrompts,
|
|
20
|
-
hfTokenPrompts,
|
|
21
|
-
ngcApiKeyPrompts,
|
|
22
20
|
modulePrompts,
|
|
23
21
|
infraRegionAndTargetPrompts,
|
|
24
22
|
infraInstancePrompts,
|
|
@@ -35,9 +33,13 @@ import {
|
|
|
35
33
|
|
|
36
34
|
import fs from 'fs';
|
|
37
35
|
import path from 'path';
|
|
36
|
+
import { execSync } from 'node:child_process';
|
|
38
37
|
import { fileURLToPath } from 'node:url';
|
|
39
38
|
import RegistryLoader from './registry-loader.js';
|
|
40
39
|
import { runPrompts } from '../prompt-adapter.js';
|
|
40
|
+
import { SECRET_CLASSIFICATIONS } from './secret-classification.js';
|
|
41
|
+
import { isSecretsManagerArn } from './arn-detection.js';
|
|
42
|
+
import BootstrapConfig from './bootstrap-config.js';
|
|
41
43
|
|
|
42
44
|
const __pr_filename = fileURLToPath(import.meta.url);
|
|
43
45
|
const __pr_dirname = path.dirname(__pr_filename);
|
|
@@ -54,6 +56,14 @@ export default class PromptRunner {
|
|
|
54
56
|
|
|
55
57
|
/**
|
|
56
58
|
* Runs all prompting phases and returns combined answers
|
|
59
|
+
*
|
|
60
|
+
* Phase ordering (MCP Catalog Consolidation):
|
|
61
|
+
* Phase 1 (What): deployment config + model name/ID + quantization
|
|
62
|
+
* Phase 2 (How): deployment target + serving profile + base image
|
|
63
|
+
* Phase 3 (Where): region + instance-sizer query + instance type + CUDA/AMI auto-resolution + HyperPod + build target
|
|
64
|
+
* Phase 4 (Details): framework version, model profile, modules
|
|
65
|
+
* Phase 5 (Project): project name + destination
|
|
66
|
+
*
|
|
57
67
|
* @returns {Promise<Object>} Combined answers from all phases
|
|
58
68
|
*/
|
|
59
69
|
async run() {
|
|
@@ -70,39 +80,187 @@ export default class PromptRunner {
|
|
|
70
80
|
// Get only explicit configuration (not defaults) for prompt skipping
|
|
71
81
|
const explicitConfig = this.configManager ? this.configManager.getExplicitConfiguration() : {};
|
|
72
82
|
|
|
73
|
-
//
|
|
74
|
-
//
|
|
75
|
-
//
|
|
83
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
84
|
+
// Phase 1 — What (deployment config + model name/ID + quantization)
|
|
85
|
+
// Requirements: 4.1, 4.2 — model selection drives instance sizing
|
|
86
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
87
|
+
console.log('\n🔧 Core ML Configuration');
|
|
88
|
+
const deploymentConfigAnswers = await this._runPhase(deploymentConfigPrompts, {}, explicitConfig, existingConfig);
|
|
89
|
+
|
|
90
|
+
// Derive architecture, backend, and legacy framework/modelServer from deploymentConfig
|
|
91
|
+
let architecture, backend, framework, modelServer;
|
|
92
|
+
if (deploymentConfigAnswers.deploymentConfig) {
|
|
93
|
+
const parts = deploymentConfigAnswers.deploymentConfig.split('-');
|
|
94
|
+
architecture = parts[0];
|
|
95
|
+
backend = parts.slice(1).join('-');
|
|
96
|
+
// Legacy compatibility: derive framework and modelServer
|
|
97
|
+
framework = architecture;
|
|
98
|
+
modelServer = backend;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Add derived values to answers
|
|
102
|
+
const frameworkAnswers = {
|
|
103
|
+
...deploymentConfigAnswers,
|
|
104
|
+
architecture: architecture || deploymentConfigAnswers.architecture,
|
|
105
|
+
backend: backend || deploymentConfigAnswers.backend,
|
|
106
|
+
framework: framework || deploymentConfigAnswers.framework,
|
|
107
|
+
modelServer: modelServer || deploymentConfigAnswers.modelServer
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
// Engine prompt for http architecture
|
|
111
|
+
const engineAnswers = await this._runPhase(enginePrompts, { ...frameworkAnswers }, explicitConfig, existingConfig);
|
|
112
|
+
|
|
113
|
+
// Auto-set model format for Triton backends with single format
|
|
114
|
+
const tritonAutoFormat = this._getTritonAutoModelFormat(architecture, backend);
|
|
115
|
+
|
|
116
|
+
// Query model-picker MCP server for model choices
|
|
117
|
+
this._queryMcpForModels(frameworkAnswers.architecture);
|
|
118
|
+
if (this._mcpModelChoices) {
|
|
119
|
+
console.log(' 🔍 Querying model-picker...');
|
|
120
|
+
console.log(` ✓ ${this._mcpModelChoices.length} model(s) available from catalog`);
|
|
121
|
+
}
|
|
122
|
+
const modelFormatPreviousAnswers = {
|
|
123
|
+
...frameworkAnswers,
|
|
124
|
+
...engineAnswers,
|
|
125
|
+
...(this._mcpModelChoices ? { _mcpModelChoices: this._mcpModelChoices } : {})
|
|
126
|
+
};
|
|
127
|
+
const modelFormatAnswers = await this._runPhase(
|
|
128
|
+
modelFormatPrompts,
|
|
129
|
+
modelFormatPreviousAnswers,
|
|
130
|
+
explicitConfig,
|
|
131
|
+
existingConfig
|
|
132
|
+
);
|
|
133
|
+
|
|
134
|
+
// Model server prompts are now deprecated (empty array)
|
|
135
|
+
const modelServerAnswers = await this._runPhase(
|
|
136
|
+
modelServerPrompts,
|
|
137
|
+
{...frameworkAnswers, ...engineAnswers},
|
|
138
|
+
explicitConfig,
|
|
139
|
+
existingConfig
|
|
140
|
+
);
|
|
141
|
+
|
|
142
|
+
// Resolve model ID early for instance-sizer query in Phase 3
|
|
143
|
+
const phase1ModelId = modelFormatAnswers.customModelName || modelFormatAnswers.modelName || explicitConfig.modelName;
|
|
144
|
+
|
|
145
|
+
// Fetch model information from HuggingFace and Model Registry
|
|
146
|
+
if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
|
|
147
|
+
await this._fetchAndDisplayModelInfo(phase1ModelId);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
151
|
+
// Phase 2 — How (deployment target + serving profile + base image)
|
|
152
|
+
// Requirements: 4.3 — instance prompt appears AFTER base image is known
|
|
153
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
76
154
|
console.log('\n💪 Infrastructure & Deployment');
|
|
77
155
|
|
|
78
|
-
//
|
|
79
|
-
await this._queryMcpForRegion({}, explicitConfig);
|
|
156
|
+
// 2a. Deployment target (realtime, async, batch, hyperpod, local)
|
|
80
157
|
const bootstrapRegion = existingConfig.awsRegion || explicitConfig.awsRegion;
|
|
81
158
|
const regionPreviousAnswers = bootstrapRegion ? { _bootstrapRegion: bootstrapRegion } : {};
|
|
82
|
-
const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, regionPreviousAnswers, explicitConfig, existingConfig);
|
|
159
|
+
const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, { ...frameworkAnswers, ...regionPreviousAnswers }, explicitConfig, existingConfig);
|
|
160
|
+
|
|
161
|
+
// 2b. Query base-image-picker MCP server for base image choices
|
|
162
|
+
await this._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
|
|
163
|
+
const baseImagePreviousAnswers = {
|
|
164
|
+
...frameworkAnswers,
|
|
165
|
+
...engineAnswers,
|
|
166
|
+
...(this._mcpBaseImageChoices ? { _mcpBaseImageChoices: this._mcpBaseImageChoices } : {})
|
|
167
|
+
};
|
|
168
|
+
const baseImageAnswers = await this._runPhase(
|
|
169
|
+
baseImagePrompts,
|
|
170
|
+
baseImagePreviousAnswers,
|
|
171
|
+
explicitConfig,
|
|
172
|
+
existingConfig
|
|
173
|
+
);
|
|
174
|
+
|
|
175
|
+
// Requirements: 4.2-4.5 — Check model architecture compatibility after base image selection
|
|
176
|
+
this._checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers);
|
|
83
177
|
|
|
84
|
-
//
|
|
178
|
+
// Extract CUDA version from selected base image for instance-sizer context
|
|
179
|
+
const selectedBaseImageCuda = this._extractCudaFromBaseImage(baseImageAnswers);
|
|
180
|
+
|
|
181
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
182
|
+
// Phase 3 — Where (region + instance [derived] + CUDA/AMI + HyperPod + build target)
|
|
183
|
+
// Requirements: 4.4, 4.5, 4.7, 3.6, 3.7 — sizer query with full context
|
|
184
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
185
|
+
|
|
186
|
+
// 3a. Region query
|
|
187
|
+
await this._queryMcpForRegion(frameworkAnswers, explicitConfig);
|
|
188
|
+
|
|
189
|
+
// 3b. Instance type — query instance-sizer with full context (model + profile + CUDA)
|
|
85
190
|
let instanceAnswers = {};
|
|
86
|
-
|
|
191
|
+
const needsInstance = regionAndTargetAnswers.deploymentTarget === 'realtime-inference' ||
|
|
87
192
|
regionAndTargetAnswers.deploymentTarget === 'async-inference' ||
|
|
88
193
|
regionAndTargetAnswers.deploymentTarget === 'batch-transform' ||
|
|
89
|
-
regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks'
|
|
90
|
-
|
|
91
|
-
|
|
194
|
+
regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks';
|
|
195
|
+
|
|
196
|
+
if (needsInstance) {
|
|
197
|
+
// Determine architecture type for heuristic fallback
|
|
198
|
+
const modelArchitecture = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
199
|
+
|
|
200
|
+
// Skip sizer query if --instance-type was provided via CLI
|
|
201
|
+
if (!explicitConfig.instanceType) {
|
|
202
|
+
// Skip sizer for predictor models (CPU-only)
|
|
203
|
+
if (modelArchitecture === 'predictor' || modelArchitecture === 'http') {
|
|
204
|
+
// Architecture heuristic: predictor → ml.m5.large
|
|
205
|
+
console.log(' ℹ️ Predictor model: defaulting to CPU instance (ml.m5.large)');
|
|
206
|
+
this._architectureHeuristicDefault = 'ml.m5.large';
|
|
207
|
+
} else if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
|
|
208
|
+
// Query instance-sizer with full context
|
|
209
|
+
await this._queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, {
|
|
210
|
+
cudaVersion: selectedBaseImageCuda,
|
|
211
|
+
profileEnvVars: this._selectedProfileEnvVars || {}
|
|
212
|
+
});
|
|
213
|
+
} else {
|
|
214
|
+
// No model known — use architecture heuristic
|
|
215
|
+
await this._queryMcpForInstance(frameworkAnswers, explicitConfig);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Build instance prompt choices from sizer results
|
|
220
|
+
const mcpInstanceChoices = this._mcpInstanceSizerChoices || this.configManager?.mcpChoices?.instanceType;
|
|
92
221
|
const instancePreviousAnswers = {
|
|
93
222
|
...regionAndTargetAnswers,
|
|
94
|
-
...(mcpInstanceChoices && mcpInstanceChoices.length > 0 ? { _mcpInstanceChoices: mcpInstanceChoices } : {})
|
|
223
|
+
...(mcpInstanceChoices && mcpInstanceChoices.length > 0 ? { _mcpInstanceChoices: mcpInstanceChoices } : {}),
|
|
224
|
+
...(this._architectureHeuristicDefault ? { _architectureHeuristicDefault: this._architectureHeuristicDefault } : {})
|
|
95
225
|
};
|
|
96
226
|
instanceAnswers = await this._runPhase(infraInstancePrompts, instancePreviousAnswers, explicitConfig, existingConfig);
|
|
227
|
+
|
|
228
|
+
// Apply architecture heuristic fallback when sizer returns empty
|
|
229
|
+
if (!instanceAnswers.instanceType && !explicitConfig.instanceType && this._architectureHeuristicDefault) {
|
|
230
|
+
instanceAnswers.instanceType = this._architectureHeuristicDefault;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// In auto-prompt mode, use instance-sizer's top recommendation as the instance type
|
|
235
|
+
if (this.configManager?.isAutoPrompt() && this._mcpInstanceSizerChoices && this._mcpInstanceSizerChoices.length > 0) {
|
|
236
|
+
const sizerRecommendation = this._mcpInstanceSizerChoices[0];
|
|
237
|
+
if (!explicitConfig.instanceType) {
|
|
238
|
+
instanceAnswers.instanceType = sizerRecommendation;
|
|
239
|
+
console.log(` ✓ Auto-prompt: using instance-sizer recommendation: ${sizerRecommendation}`);
|
|
240
|
+
}
|
|
97
241
|
}
|
|
98
242
|
|
|
99
|
-
//
|
|
243
|
+
// Auto-set tensor parallelism when sizer recommends TP > 1
|
|
244
|
+
// Requirements: 4.8
|
|
245
|
+
if (this._instanceSizerMetadata) {
|
|
246
|
+
const sizerRecs = this._instanceSizerMetadata.recommendations || [];
|
|
247
|
+
const finalInstanceType = instanceAnswers.customInstanceType || instanceAnswers.instanceType;
|
|
248
|
+
const matchingRec = sizerRecs.find(r => r.instanceType === finalInstanceType);
|
|
249
|
+
const tpRec = matchingRec || sizerRecs[0];
|
|
250
|
+
if (tpRec && tpRec.tensorParallelism > 1) {
|
|
251
|
+
this._autoTensorParallelism = tpRec.tensorParallelism;
|
|
252
|
+
this._autoGpuCount = tpRec.gpuCount;
|
|
253
|
+
console.log(` ✓ Auto-set tensor parallelism: TP=${tpRec.tensorParallelism} (${tpRec.gpuCount} GPUs)`);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// 3c. Async-specific prompts (only when deploymentTarget === 'async-inference')
|
|
100
258
|
let asyncAnswers = {};
|
|
101
259
|
if (regionAndTargetAnswers.deploymentTarget === 'async-inference') {
|
|
102
260
|
asyncAnswers = await this._runPhase(infraAsyncPrompts, { ...regionAndTargetAnswers }, explicitConfig, existingConfig);
|
|
103
261
|
}
|
|
104
262
|
|
|
105
|
-
//
|
|
263
|
+
// 3d. Batch transform-specific prompts (only when deploymentTarget === 'batch-transform')
|
|
106
264
|
let batchTransformAnswers = {};
|
|
107
265
|
if (regionAndTargetAnswers.deploymentTarget === 'batch-transform') {
|
|
108
266
|
batchTransformAnswers = await this._runPhase(
|
|
@@ -113,16 +271,24 @@ export default class PromptRunner {
|
|
|
113
271
|
);
|
|
114
272
|
}
|
|
115
273
|
|
|
116
|
-
//
|
|
274
|
+
// 3e. CUDA/AMI auto-resolution
|
|
275
|
+
const instanceType = instanceAnswers.customInstanceType || instanceAnswers.instanceType;
|
|
276
|
+
const cudaAnswer = await this._promptCudaVersion(
|
|
277
|
+
instanceType,
|
|
278
|
+
frameworkAnswers.framework,
|
|
279
|
+
null, // frameworkVersion not yet known in Phase 3
|
|
280
|
+
selectedBaseImageCuda // base image CUDA version for intersection
|
|
281
|
+
);
|
|
282
|
+
|
|
283
|
+
// 3f. HyperPod prompts — only query MCP and prompt when deployment target is hyperpod-eks
|
|
117
284
|
let hyperPodAnswers = {};
|
|
118
285
|
if (regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks') {
|
|
119
|
-
// Resolve the actual region (handle 'custom' selection)
|
|
120
286
|
const resolvedRegion = regionAndTargetAnswers.customAwsRegion || regionAndTargetAnswers.awsRegion;
|
|
121
287
|
await this._queryMcpForHyperPod({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
|
|
122
288
|
hyperPodAnswers = await this._runPhase(infraHyperPodPrompts, { ...regionAndTargetAnswers }, explicitConfig, existingConfig);
|
|
123
289
|
}
|
|
124
290
|
|
|
125
|
-
//
|
|
291
|
+
// 3g. Build target + role ARN (always)
|
|
126
292
|
const buildAnswers = await this._runPhase(infraBuildPrompts, { ...regionAndTargetAnswers, ...instanceAnswers, ...hyperPodAnswers }, explicitConfig, existingConfig);
|
|
127
293
|
|
|
128
294
|
// Combine all infrastructure answers
|
|
@@ -135,54 +301,16 @@ export default class PromptRunner {
|
|
|
135
301
|
...buildAnswers
|
|
136
302
|
};
|
|
137
303
|
|
|
138
|
-
//
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
// Derive architecture, backend, and legacy framework/modelServer from deploymentConfig
|
|
144
|
-
// Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7
|
|
145
|
-
let architecture, backend, framework, modelServer;
|
|
146
|
-
if (deploymentConfigAnswers.deploymentConfig) {
|
|
147
|
-
const parts = deploymentConfigAnswers.deploymentConfig.split('-');
|
|
148
|
-
architecture = parts[0];
|
|
149
|
-
backend = parts.slice(1).join('-');
|
|
150
|
-
// Legacy compatibility: derive framework and modelServer
|
|
151
|
-
framework = architecture;
|
|
152
|
-
modelServer = backend;
|
|
304
|
+
// Apply CUDA resolution to infra answers
|
|
305
|
+
if (cudaAnswer) {
|
|
306
|
+
infraAnswers._selectedCudaVersion = cudaAnswer.cudaVersion;
|
|
307
|
+
infraAnswers._resolvedInferenceAmiVersion = cudaAnswer.inferenceAmiVersion;
|
|
153
308
|
}
|
|
154
|
-
|
|
155
|
-
//
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
backend: backend || deploymentConfigAnswers.backend,
|
|
160
|
-
framework: framework || deploymentConfigAnswers.framework,
|
|
161
|
-
modelServer: modelServer || deploymentConfigAnswers.modelServer
|
|
162
|
-
};
|
|
163
|
-
|
|
164
|
-
// Engine prompt for http architecture
|
|
165
|
-
// Requirements: 3.7
|
|
166
|
-
const engineAnswers = await this._runPhase(enginePrompts, { ...frameworkAnswers }, explicitConfig, existingConfig);
|
|
167
|
-
|
|
168
|
-
// Auto-set model format for Triton backends with single format
|
|
169
|
-
// Requirements: 3.3, 3.4, 3.5
|
|
170
|
-
const tritonAutoFormat = this._getTritonAutoModelFormat(architecture, backend);
|
|
171
|
-
|
|
172
|
-
// Query base-image-picker MCP server for base image choices
|
|
173
|
-
// Requirements: 5.1, 5.2, 5.3
|
|
174
|
-
await this._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
|
|
175
|
-
const baseImagePreviousAnswers = {
|
|
176
|
-
...frameworkAnswers,
|
|
177
|
-
...engineAnswers,
|
|
178
|
-
...(this._mcpBaseImageChoices ? { _mcpBaseImageChoices: this._mcpBaseImageChoices } : {})
|
|
179
|
-
};
|
|
180
|
-
const baseImageAnswers = await this._runPhase(
|
|
181
|
-
baseImagePrompts,
|
|
182
|
-
baseImagePreviousAnswers,
|
|
183
|
-
explicitConfig,
|
|
184
|
-
existingConfig
|
|
185
|
-
);
|
|
309
|
+
|
|
310
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
311
|
+
// Phase 4 — Details (framework version, model profile, modules)
|
|
312
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
313
|
+
console.log('\n📦 Module Selection');
|
|
186
314
|
|
|
187
315
|
// Populate framework version choices from registry
|
|
188
316
|
const frameworkVersionChoices = this._getFrameworkVersionChoices(frameworkAnswers.framework);
|
|
@@ -209,44 +337,10 @@ export default class PromptRunner {
|
|
|
209
337
|
explicitConfig,
|
|
210
338
|
existingConfig
|
|
211
339
|
);
|
|
212
|
-
|
|
213
|
-
// Query model-picker MCP server for model choices
|
|
214
|
-
this._queryMcpForModels(frameworkAnswers.architecture);
|
|
215
|
-
if (this._mcpModelChoices) {
|
|
216
|
-
console.log(' 🔍 Querying model-picker...');
|
|
217
|
-
console.log(` ✓ ${this._mcpModelChoices.length} model(s) available from catalog`);
|
|
218
|
-
}
|
|
219
|
-
const modelFormatPreviousAnswers = {
|
|
220
|
-
...frameworkAnswers,
|
|
221
|
-
...engineAnswers,
|
|
222
|
-
...frameworkVersionAnswers,
|
|
223
|
-
...frameworkProfileAnswers,
|
|
224
|
-
...(this._mcpModelChoices ? { _mcpModelChoices: this._mcpModelChoices } : {})
|
|
225
|
-
};
|
|
226
|
-
const modelFormatAnswers = await this._runPhase(
|
|
227
|
-
modelFormatPrompts,
|
|
228
|
-
modelFormatPreviousAnswers,
|
|
229
|
-
explicitConfig,
|
|
230
|
-
existingConfig
|
|
231
|
-
);
|
|
232
|
-
|
|
233
|
-
// Model server prompts are now deprecated (empty array)
|
|
234
|
-
const modelServerAnswers = await this._runPhase(
|
|
235
|
-
modelServerPrompts,
|
|
236
|
-
{...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers},
|
|
237
|
-
explicitConfig,
|
|
238
|
-
existingConfig
|
|
239
|
-
);
|
|
240
|
-
|
|
340
|
+
|
|
241
341
|
// Populate model profile choices from registry (if model ID is available)
|
|
342
|
+
const modelId = phase1ModelId;
|
|
242
343
|
const currentAnswers = {...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers};
|
|
243
|
-
const modelId = currentAnswers.customModelName || currentAnswers.modelName || explicitConfig.modelName;
|
|
244
|
-
|
|
245
|
-
// Fetch model information from HuggingFace and Model Registry
|
|
246
|
-
// Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.11, 11.1, 11.2, 11.3, 11.5, 11.6, 11.7
|
|
247
|
-
if (modelId && modelId !== 'Custom (enter manually)') {
|
|
248
|
-
await this._fetchAndDisplayModelInfo(modelId);
|
|
249
|
-
}
|
|
250
344
|
|
|
251
345
|
const modelProfileChoices = this._getModelProfileChoices(modelId);
|
|
252
346
|
const modelProfileAnswers = await this._runPhase(
|
|
@@ -257,7 +351,6 @@ export default class PromptRunner {
|
|
|
257
351
|
);
|
|
258
352
|
|
|
259
353
|
// Model loading strategy prompt (build-time vs runtime)
|
|
260
|
-
// Requirements: 13.1, 13.2, 13.3, 13.4, 13.5
|
|
261
354
|
const modelLoadStrategyAnswers = await this._runPhase(
|
|
262
355
|
modelLoadStrategyPrompts,
|
|
263
356
|
{ ...frameworkAnswers, ...engineAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
|
|
@@ -265,40 +358,13 @@ export default class PromptRunner {
|
|
|
265
358
|
existingConfig
|
|
266
359
|
);
|
|
267
360
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
const ngcApiKeyAnswers =
|
|
273
|
-
{ ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
|
|
274
|
-
explicitConfig, existingConfig);
|
|
275
|
-
|
|
276
|
-
// Validate instance type against framework requirements (now that framework is known)
|
|
277
|
-
// Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6
|
|
278
|
-
const instanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
|
|
279
|
-
if (instanceType && frameworkVersionAnswers.frameworkVersion) {
|
|
280
|
-
await this._validateAndDisplayInstanceType(
|
|
281
|
-
instanceType,
|
|
282
|
-
frameworkAnswers.framework,
|
|
283
|
-
frameworkVersionAnswers.frameworkVersion
|
|
284
|
-
);
|
|
285
|
-
}
|
|
361
|
+
// Secret prompts — registry-driven secret selection (replaces hardcoded hfToken/ngcApiKey prompts)
|
|
362
|
+
const secretPreviousAnswers = { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers };
|
|
363
|
+
const secretAnswers = await this._runSecretPrompts(secretPreviousAnswers, explicitConfig, existingConfig);
|
|
364
|
+
const hfTokenAnswers = { hfToken: secretAnswers.hfToken, hfTokenArn: secretAnswers.hfTokenArn };
|
|
365
|
+
const ngcApiKeyAnswers = { ngcApiKey: secretAnswers.ngcApiKey, ngcTokenArn: secretAnswers.ngcTokenArn };
|
|
286
366
|
|
|
287
|
-
//
|
|
288
|
-
// let the user pick which one. This transparently sets the inference AMI version.
|
|
289
|
-
const cudaAnswer = await this._promptCudaVersion(
|
|
290
|
-
instanceType,
|
|
291
|
-
frameworkAnswers.framework,
|
|
292
|
-
frameworkVersionAnswers.frameworkVersion
|
|
293
|
-
);
|
|
294
|
-
if (cudaAnswer) {
|
|
295
|
-
infraAnswers._selectedCudaVersion = cudaAnswer.cudaVersion;
|
|
296
|
-
infraAnswers._resolvedInferenceAmiVersion = cudaAnswer.inferenceAmiVersion;
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
// Phase 3: Module Selection
|
|
300
|
-
// Requirements: 3.3 — module selection after ML configuration
|
|
301
|
-
console.log('\n📦 Module Selection');
|
|
367
|
+
// Module selection
|
|
302
368
|
const moduleAnswers = await this._runPhase(modulePrompts, { ...frameworkAnswers, ...engineAnswers }, explicitConfig, existingConfig);
|
|
303
369
|
|
|
304
370
|
// Ensure transformers, diffusors, and ineligible Triton backends don't get sample model
|
|
@@ -309,8 +375,19 @@ export default class PromptRunner {
|
|
|
309
375
|
moduleAnswers.includeSampleModel = false;
|
|
310
376
|
}
|
|
311
377
|
|
|
312
|
-
//
|
|
313
|
-
|
|
378
|
+
// Validate instance type against framework requirements (now that framework version is known)
|
|
379
|
+
const finalInstanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
|
|
380
|
+
if (finalInstanceType && frameworkVersionAnswers.frameworkVersion) {
|
|
381
|
+
await this._validateAndDisplayInstanceType(
|
|
382
|
+
finalInstanceType,
|
|
383
|
+
frameworkAnswers.framework,
|
|
384
|
+
frameworkVersionAnswers.frameworkVersion
|
|
385
|
+
);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
389
|
+
// Phase 5 — Project (project name + destination)
|
|
390
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
314
391
|
console.log('\n📋 Project Configuration');
|
|
315
392
|
const allTechnicalAnswers = {
|
|
316
393
|
...frameworkAnswers,
|
|
@@ -439,6 +516,21 @@ export default class PromptRunner {
|
|
|
439
516
|
delete combinedAnswers.customInstanceType;
|
|
440
517
|
}
|
|
441
518
|
|
|
519
|
+
// Propagate tensor parallelism from instance-sizer to templates
|
|
520
|
+
// Requirements: 4.8 — auto-set TP when sizer recommends > 1
|
|
521
|
+
if (this._autoTensorParallelism) {
|
|
522
|
+
combinedAnswers.tensorParallelSize = this._autoTensorParallelism;
|
|
523
|
+
combinedAnswers.gpuCount = this._autoGpuCount;
|
|
524
|
+
} else if (this._instanceSizerMetadata) {
|
|
525
|
+
const sizerInstanceType = combinedAnswers.instanceType;
|
|
526
|
+
const sizerRecs = this._instanceSizerMetadata.recommendations || [];
|
|
527
|
+
const matchingRec = sizerRecs.find(r => r.instanceType === sizerInstanceType);
|
|
528
|
+
if (matchingRec && matchingRec.tensorParallelism > 1) {
|
|
529
|
+
combinedAnswers.tensorParallelSize = matchingRec.tensorParallelism;
|
|
530
|
+
combinedAnswers.gpuCount = matchingRec.gpuCount;
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
442
534
|
// Handle custom HyperPod cluster name
|
|
443
535
|
if (combinedAnswers.customHyperPodCluster) {
|
|
444
536
|
combinedAnswers.hyperPodCluster = combinedAnswers.customHyperPodCluster;
|
|
@@ -623,6 +715,118 @@ export default class PromptRunner {
|
|
|
623
715
|
return null;
|
|
624
716
|
}
|
|
625
717
|
|
|
718
|
+
/**
|
|
719
|
+
* Extract CUDA version from the selected base image.
|
|
720
|
+
* Looks at the MCP base image metadata for accelerator.version or labels.cuda_version.
|
|
721
|
+
* @param {object} baseImageAnswers - Answers from the base image prompt
|
|
722
|
+
* @returns {string|null} CUDA version string (e.g., "12.1") or null
|
|
723
|
+
* @private
|
|
724
|
+
*/
|
|
725
|
+
_extractCudaFromBaseImage(baseImageAnswers) {
|
|
726
|
+
if (!this._mcpBaseImageChoices) return null;
|
|
727
|
+
|
|
728
|
+
const selectedImage = baseImageAnswers.baseImage || baseImageAnswers.customBaseImage;
|
|
729
|
+
if (!selectedImage) return null;
|
|
730
|
+
|
|
731
|
+
// Find the matching entry in the MCP choices
|
|
732
|
+
const matchingChoice = this._mcpBaseImageChoices.find(c => c.value === selectedImage);
|
|
733
|
+
if (!matchingChoice) return null;
|
|
734
|
+
|
|
735
|
+
// Try to extract CUDA version from the choice metadata
|
|
736
|
+
// The formatImageChoices function stores labels in the choice object
|
|
737
|
+
if (matchingChoice._meta?.labels?.cuda_version) {
|
|
738
|
+
return matchingChoice._meta.labels.cuda_version;
|
|
739
|
+
}
|
|
740
|
+
if (matchingChoice._meta?.accelerator?.version) {
|
|
741
|
+
return matchingChoice._meta.accelerator.version;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
return null;
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
/**
|
|
748
|
+
* Check model architecture compatibility against the selected base image.
|
|
749
|
+
* Emits an advisory warning if the model's model_type is not in the server's
|
|
750
|
+
* supportedModelTypes. Skips silently if supportedModelTypes is empty (sync not run).
|
|
751
|
+
* Requirements: 4.2, 4.3, 4.4, 4.5
|
|
752
|
+
* @param {Object} baseImageAnswers - Answers from base image selection phase
|
|
753
|
+
* @param {Object} frameworkAnswers - Answers from framework/deployment config phase
|
|
754
|
+
* @private
|
|
755
|
+
*/
|
|
756
|
+
_checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers) {
|
|
757
|
+
// Requirement 4.5: skip if no model_type was resolved
|
|
758
|
+
if (!this._modelType) return;
|
|
759
|
+
|
|
760
|
+
// Determine the selected image
|
|
761
|
+
const selectedImage = baseImageAnswers.baseImage || baseImageAnswers.customBaseImage;
|
|
762
|
+
if (!selectedImage || selectedImage === 'custom') return;
|
|
763
|
+
|
|
764
|
+
// Resolve the matching choice from MCP base image choices
|
|
765
|
+
if (!this._mcpBaseImageChoices) return;
|
|
766
|
+
const matchingChoice = this._mcpBaseImageChoices.find(c => c.value === selectedImage);
|
|
767
|
+
if (!matchingChoice) return;
|
|
768
|
+
|
|
769
|
+
// Determine the server name from framework answers
|
|
770
|
+
const server = frameworkAnswers.modelServer || frameworkAnswers.backend;
|
|
771
|
+
if (!server) return;
|
|
772
|
+
|
|
773
|
+
// Load the model-servers catalog to find the entry with supportedModelTypes
|
|
774
|
+
try {
|
|
775
|
+
const catalogPath = path.resolve(GENERATOR_ROOT, 'servers', 'lib', 'catalogs', 'model-servers.json');
|
|
776
|
+
const catalog = JSON.parse(fs.readFileSync(catalogPath, 'utf8'));
|
|
777
|
+
|
|
778
|
+
const serverEntries = catalog[server];
|
|
779
|
+
if (!Array.isArray(serverEntries)) return;
|
|
780
|
+
|
|
781
|
+
// Find the catalog entry matching the selected image
|
|
782
|
+
const entry = serverEntries.find(e => e.image === selectedImage);
|
|
783
|
+
if (!entry) return;
|
|
784
|
+
|
|
785
|
+
const supported = entry.supportedModelTypes;
|
|
786
|
+
// Requirement 4.5: skip silently when supportedModelTypes is empty (sync not run)
|
|
787
|
+
if (!supported || supported.length === 0) return;
|
|
788
|
+
|
|
789
|
+
// Requirement 4.2-4.3: cross-reference model_type (case-insensitive)
|
|
790
|
+
const modelTypeLower = this._modelType.toLowerCase();
|
|
791
|
+
if (!supported.includes(modelTypeLower)) {
|
|
792
|
+
const version = entry.labels?.framework_version || entry.tag || 'unknown';
|
|
793
|
+
const docsUrls = {
|
|
794
|
+
vllm: 'https://docs.vllm.ai/en/latest/models/supported_models.html',
|
|
795
|
+
sglang: 'https://sgl-project.github.io/references/supported_models.html',
|
|
796
|
+
'tensorrt-llm': 'https://nvidia.github.io/TensorRT-LLM/reference/support-matrix.html'
|
|
797
|
+
};
|
|
798
|
+
const docsUrl = docsUrls[server] || `https://github.com/search?q=${server}+supported+models`;
|
|
799
|
+
|
|
800
|
+
// Requirement 4.3-4.4: emit advisory warning (does not block generation)
|
|
801
|
+
console.log(`\n ⚠️ Model architecture "${this._modelType}" may not be supported by ${server} ${version}`);
|
|
802
|
+
console.log(' Consider upgrading to a newer base image, or verify compatibility at:');
|
|
803
|
+
console.log(` ${docsUrl}`);
|
|
804
|
+
}
|
|
805
|
+
} catch (err) {
|
|
806
|
+
// Graceful degradation: if catalog can't be read, skip silently
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
/**
|
|
811
|
+
* Get architecture-based heuristic default instance type.
|
|
812
|
+
* Used when the instance-sizer cannot produce a recommendation.
|
|
813
|
+
* Requirements: 3.9, 4.6
|
|
814
|
+
* @param {string} architecture - Model architecture type
|
|
815
|
+
* @returns {string} Default instance type
|
|
816
|
+
* @private
|
|
817
|
+
*/
|
|
818
|
+
_getArchitectureHeuristicDefault(architecture) {
|
|
819
|
+
const HEURISTIC_DEFAULTS = {
|
|
820
|
+
'transformers': 'ml.g5.xlarge',
|
|
821
|
+
'transformer': 'ml.g5.xlarge',
|
|
822
|
+
'diffusors': 'ml.g5.2xlarge',
|
|
823
|
+
'diffusor': 'ml.g5.2xlarge',
|
|
824
|
+
'predictor': 'ml.m5.large',
|
|
825
|
+
'http': 'ml.m5.large'
|
|
826
|
+
};
|
|
827
|
+
return Object.hasOwn(HEURISTIC_DEFAULTS, architecture) ? HEURISTIC_DEFAULTS[architecture] : 'ml.g5.xlarge';
|
|
828
|
+
}
|
|
829
|
+
|
|
626
830
|
/**
|
|
627
831
|
* Query MCP region-picker server before infrastructure prompts.
|
|
628
832
|
* Populates configManager.mcpChoices so _runPhase injects them into list prompts.
|
|
@@ -671,8 +875,8 @@ export default class PromptRunner {
|
|
|
671
875
|
}
|
|
672
876
|
|
|
673
877
|
/**
|
|
674
|
-
* Query MCP instance-
|
|
675
|
-
*
|
|
878
|
+
* Query MCP instance-sizer server with tag-based search after deployment target is known.
|
|
879
|
+
* Used when no model name is available for VRAM-based sizing.
|
|
676
880
|
* Populates configManager.mcpChoices so _runPhase injects them into list prompts.
|
|
677
881
|
* @private
|
|
678
882
|
*/
|
|
@@ -686,7 +890,7 @@ export default class PromptRunner {
|
|
|
686
890
|
const smart = this.options.smart === true;
|
|
687
891
|
|
|
688
892
|
// Instance type: query if not already provided via CLI/config
|
|
689
|
-
if (!explicitConfig.instanceType && mcpServers.includes('instance-
|
|
893
|
+
if (!explicitConfig.instanceType && mcpServers.includes('instance-sizer')) {
|
|
690
894
|
const { instanceSearch } = await this._runPrompts([{
|
|
691
895
|
type: 'input',
|
|
692
896
|
name: 'instanceSearch',
|
|
@@ -695,8 +899,8 @@ export default class PromptRunner {
|
|
|
695
899
|
}]);
|
|
696
900
|
|
|
697
901
|
if (instanceSearch && instanceSearch.trim()) {
|
|
698
|
-
console.log(` 🔍 Querying instance-
|
|
699
|
-
const result = await cm.queryMcpServer('instance-
|
|
902
|
+
console.log(` 🔍 Querying instance-sizer [search]${smart ? ' [smart]' : ''}...`);
|
|
903
|
+
const result = await cm.queryMcpServer('instance-sizer', {
|
|
700
904
|
...frameworkAnswers,
|
|
701
905
|
instanceSearch: instanceSearch.trim()
|
|
702
906
|
});
|
|
@@ -713,6 +917,150 @@ export default class PromptRunner {
|
|
|
713
917
|
}
|
|
714
918
|
}
|
|
715
919
|
|
|
920
|
+
/**
|
|
921
|
+
* Query the instance-sizer MCP server after model is known.
|
|
922
|
+
* Estimates VRAM requirements and returns filtered, ranked instance recommendations.
|
|
923
|
+
* Stores results in this._mcpInstanceSizerChoices and this._instanceSizerMetadata.
|
|
924
|
+
* Requirements: 4.4, 4.5, 4.7, 3.6, 3.7
|
|
925
|
+
* @param {object} frameworkAnswers - Framework/architecture answers
|
|
926
|
+
* @param {object} modelFormatAnswers - Model format answers (contains modelName)
|
|
927
|
+
* @param {object} explicitConfig - Explicit CLI/config values
|
|
928
|
+
* @param {object} [sizerContext={}] - Additional context for the sizer query
|
|
929
|
+
* @param {string} [sizerContext.cudaVersion] - CUDA version from base image
|
|
930
|
+
* @param {object} [sizerContext.profileEnvVars] - Profile ENV overrides
|
|
931
|
+
* @private
|
|
932
|
+
*/
|
|
933
|
+
async _queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, sizerContext = {}) {
|
|
934
|
+
const cm = this.configManager;
|
|
935
|
+
if (!cm) return;
|
|
936
|
+
|
|
937
|
+
const mcpServers = cm.getMcpServerNames();
|
|
938
|
+
if (!mcpServers.includes('instance-sizer')) return;
|
|
939
|
+
|
|
940
|
+
// Resolve model name from answers or explicit config
|
|
941
|
+
const modelName = modelFormatAnswers.customModelName || modelFormatAnswers.modelName || explicitConfig.modelName;
|
|
942
|
+
if (!modelName || modelName === 'Custom (enter manually)') return;
|
|
943
|
+
|
|
944
|
+
const smart = this.options.smart === true;
|
|
945
|
+
const discover = this.options.discover === true;
|
|
946
|
+
|
|
947
|
+
const modeLabel = [smart && '[smart]', discover && '[discover]'].filter(Boolean).join(' ');
|
|
948
|
+
console.log(` 🔍 Querying instance-sizer${modeLabel ? ` ${modeLabel}` : ''}...`);
|
|
949
|
+
|
|
950
|
+
try {
|
|
951
|
+
const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
|
|
952
|
+
if (!fs.existsSync(mcpConfigPath)) return;
|
|
953
|
+
|
|
954
|
+
const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
|
|
955
|
+
const serverConfig = mcpConfig.mcpServers?.['instance-sizer'];
|
|
956
|
+
if (!serverConfig) return;
|
|
957
|
+
|
|
958
|
+
const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
|
|
959
|
+
const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
|
|
960
|
+
|
|
961
|
+
const serverArgs = [...(serverConfig.args || [])];
|
|
962
|
+
if (discover && !serverArgs.includes('--discover')) {
|
|
963
|
+
serverArgs.push('--discover');
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
const transport = new StdioClientTransport({
|
|
967
|
+
command: serverConfig.command,
|
|
968
|
+
args: serverArgs,
|
|
969
|
+
env: {
|
|
970
|
+
...process.env,
|
|
971
|
+
...(serverConfig.env || {}),
|
|
972
|
+
...(smart ? { BEDROCK_SMART: 'true' } : {})
|
|
973
|
+
},
|
|
974
|
+
stderr: 'pipe'
|
|
975
|
+
});
|
|
976
|
+
|
|
977
|
+
const mcpClient = new Client(
|
|
978
|
+
{ name: 'ml-container-creator', version: '1.0.0' },
|
|
979
|
+
{ capabilities: {} }
|
|
980
|
+
);
|
|
981
|
+
|
|
982
|
+
await mcpClient.connect(transport);
|
|
983
|
+
|
|
984
|
+
const toolArgs = {
|
|
985
|
+
modelName,
|
|
986
|
+
limit: 10,
|
|
987
|
+
context: {
|
|
988
|
+
architecture: frameworkAnswers.architecture || undefined,
|
|
989
|
+
backend: frameworkAnswers.backend || undefined,
|
|
990
|
+
deploymentTarget: frameworkAnswers.deploymentTarget || undefined,
|
|
991
|
+
profileEnvVars: sizerContext.profileEnvVars || undefined
|
|
992
|
+
}
|
|
993
|
+
};
|
|
994
|
+
|
|
995
|
+
// Add CUDA version from base image for filtering
|
|
996
|
+
if (sizerContext.cudaVersion) {
|
|
997
|
+
toolArgs.cudaVersion = sizerContext.cudaVersion;
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
// Add quantization if available from model format answers
|
|
1001
|
+
if (modelFormatAnswers.quantization) {
|
|
1002
|
+
toolArgs.quantization = modelFormatAnswers.quantization;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
const result = await mcpClient.callTool({
|
|
1006
|
+
name: 'get_instance_recommendation',
|
|
1007
|
+
arguments: toolArgs
|
|
1008
|
+
});
|
|
1009
|
+
|
|
1010
|
+
await mcpClient.close();
|
|
1011
|
+
|
|
1012
|
+
// Parse the response
|
|
1013
|
+
const textBlock = result?.content?.find(b => b.type === 'text');
|
|
1014
|
+
if (textBlock) {
|
|
1015
|
+
const parsed = JSON.parse(textBlock.text);
|
|
1016
|
+
|
|
1017
|
+
if (parsed.choices?.instanceType?.length > 0) {
|
|
1018
|
+
this._instanceSizerMetadata = parsed.metadata || null;
|
|
1019
|
+
|
|
1020
|
+
// Build display labels with VRAM estimate and utilization percentage
|
|
1021
|
+
const recommendations = parsed.metadata?.recommendations || [];
|
|
1022
|
+
const estimatedVramGb = parsed.metadata?.estimatedVramGb;
|
|
1023
|
+
|
|
1024
|
+
// Store choices with display labels for the instance prompt
|
|
1025
|
+
this._mcpInstanceSizerChoices = parsed.choices.instanceType;
|
|
1026
|
+
this._mcpInstanceSizerDisplayChoices = recommendations.map(rec => ({
|
|
1027
|
+
name: rec.displayLabel || `${rec.instanceType} (${estimatedVramGb ? estimatedVramGb.toFixed(1) : '?'}GB / ${rec.totalVramGb || '?'}GB — ${rec.utilizationPercent || '?'}% utilization)`,
|
|
1028
|
+
value: rec.instanceType,
|
|
1029
|
+
short: rec.instanceType
|
|
1030
|
+
}));
|
|
1031
|
+
|
|
1032
|
+
const choices = parsed.choices.instanceType;
|
|
1033
|
+
const topRec = recommendations[0];
|
|
1034
|
+
const vramInfo = estimatedVramGb
|
|
1035
|
+
? ` (model needs ~${estimatedVramGb.toFixed(1)}GB VRAM)`
|
|
1036
|
+
: '';
|
|
1037
|
+
|
|
1038
|
+
console.log(` ✓ ${choices.length} compatible instance(s) found${vramInfo}`);
|
|
1039
|
+
// Display compact recommendation table
|
|
1040
|
+
for (const rec of recommendations) {
|
|
1041
|
+
const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
|
|
1042
|
+
const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
|
|
1043
|
+
const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
|
|
1044
|
+
console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}`);
|
|
1045
|
+
}
|
|
1046
|
+
} else if (parsed.metadata?.warning) {
|
|
1047
|
+
console.log(` ⚠️ ${parsed.metadata.warning}`);
|
|
1048
|
+
} else {
|
|
1049
|
+
// Apply architecture heuristic fallback when sizer returns empty
|
|
1050
|
+
const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
1051
|
+
this._architectureHeuristicDefault = this._getArchitectureHeuristicDefault(archForHeuristic);
|
|
1052
|
+
console.log(` ↳ No instance-sizer results, using heuristic default: ${this._architectureHeuristicDefault}`);
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
} catch (err) {
|
|
1056
|
+
// Sizer unavailable — apply architecture heuristic fallback
|
|
1057
|
+
const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
1058
|
+
this._architectureHeuristicDefault = this._getArchitectureHeuristicDefault(archForHeuristic);
|
|
1059
|
+
console.log(` ⚠️ instance-sizer: ${err.message}`);
|
|
1060
|
+
console.log(` ↳ Using heuristic default: ${this._architectureHeuristicDefault}`);
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
|
|
716
1064
|
/**
|
|
717
1065
|
* Query the hyperpod-cluster-picker MCP server for available HyperPod EKS clusters.
|
|
718
1066
|
* Populates configManager.mcpChoices.hyperPodCluster so _runPhase injects them into the list prompt.
|
|
@@ -1098,6 +1446,12 @@ export default class PromptRunner {
|
|
|
1098
1446
|
modelFamily = vals.family;
|
|
1099
1447
|
}
|
|
1100
1448
|
|
|
1449
|
+
// Extract model_type for architecture validation
|
|
1450
|
+
// Requirements: 4.1
|
|
1451
|
+
if (vals.model_type) {
|
|
1452
|
+
this._modelType = vals.model_type;
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1101
1455
|
// Extract model source metadata for loading adapter
|
|
1102
1456
|
// Requirements: 2.1, 2.2, 2.3, 2.4
|
|
1103
1457
|
if (vals.provider) {
|
|
@@ -1149,6 +1503,11 @@ export default class PromptRunner {
|
|
|
1149
1503
|
if (hfData.chatTemplate) {
|
|
1150
1504
|
chatTemplate = hfData.chatTemplate;
|
|
1151
1505
|
}
|
|
1506
|
+
// Extract model_type for architecture validation
|
|
1507
|
+
// Requirements: 4.1
|
|
1508
|
+
if (hfData.modelConfig?.model_type) {
|
|
1509
|
+
this._modelType = hfData.modelConfig.model_type;
|
|
1510
|
+
}
|
|
1152
1511
|
console.log(' ✅ Found on HuggingFace Hub');
|
|
1153
1512
|
} else {
|
|
1154
1513
|
console.log(' ℹ️ Not found on HuggingFace Hub (may be private or offline)');
|
|
@@ -1276,6 +1635,332 @@ export default class PromptRunner {
|
|
|
1276
1635
|
}
|
|
1277
1636
|
}
|
|
1278
1637
|
|
|
1638
|
+
/**
|
|
1639
|
+
* Run secret prompts using the Secret_Classification registry.
|
|
1640
|
+
* For each secret type whose stages apply to the current context:
|
|
1641
|
+
* - Query for managed secrets of that type
|
|
1642
|
+
* - If managed secrets exist: show selection list (secrets + "Enter plaintext token" + "Skip")
|
|
1643
|
+
* - If no managed secrets exist: fall back to existing plaintext prompt
|
|
1644
|
+
*
|
|
1645
|
+
* Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9
|
|
1646
|
+
* @param {object} previousAnswers - Answers from previous prompt phases
|
|
1647
|
+
* @param {object} explicitConfig - Explicit CLI/config values
|
|
1648
|
+
* @param {object} existingConfig - Existing project configuration
|
|
1649
|
+
* @returns {Promise<object>} Object with token/ARN values keyed by config field names
|
|
1650
|
+
* @private
|
|
1651
|
+
*/
|
|
1652
|
+
async _runSecretPrompts(previousAnswers, explicitConfig, existingConfig) {
|
|
1653
|
+
const results = {};
|
|
1654
|
+
|
|
1655
|
+
for (const classification of SECRET_CLASSIFICATIONS) {
|
|
1656
|
+
// Check if this secret type's stages apply to the current context
|
|
1657
|
+
if (!this._secretStagesApply(classification, previousAnswers)) continue;
|
|
1658
|
+
|
|
1659
|
+
// Determine the config keys for this classification
|
|
1660
|
+
const arnConfigKey = this._getArnConfigKey(classification);
|
|
1661
|
+
const plaintextConfigKey = this._getPlaintextConfigKey(classification);
|
|
1662
|
+
|
|
1663
|
+
// Skip if ARN already provided via CLI flag
|
|
1664
|
+
if (explicitConfig[arnConfigKey]) {
|
|
1665
|
+
results[arnConfigKey] = explicitConfig[arnConfigKey];
|
|
1666
|
+
continue;
|
|
1667
|
+
}
|
|
1668
|
+
|
|
1669
|
+
// Skip if plaintext already provided via CLI flag
|
|
1670
|
+
if (explicitConfig[plaintextConfigKey]) {
|
|
1671
|
+
results[plaintextConfigKey] = explicitConfig[plaintextConfigKey];
|
|
1672
|
+
continue;
|
|
1673
|
+
}
|
|
1674
|
+
|
|
1675
|
+
// Query for existing managed secrets of this type
|
|
1676
|
+
const managedSecrets = await this._listManagedSecrets(classification.identifier);
|
|
1677
|
+
|
|
1678
|
+
if (managedSecrets.length > 0) {
|
|
1679
|
+
// Show selection list: managed secrets + plaintext entry + skip
|
|
1680
|
+
const answer = await this._promptSecretSelection(classification, managedSecrets, previousAnswers);
|
|
1681
|
+
Object.assign(results, answer);
|
|
1682
|
+
} else {
|
|
1683
|
+
// Fall back to existing plaintext prompt
|
|
1684
|
+
const answer = await this._promptPlaintextFallback(classification, previousAnswers, explicitConfig, existingConfig);
|
|
1685
|
+
Object.assign(results, answer);
|
|
1686
|
+
}
|
|
1687
|
+
}
|
|
1688
|
+
|
|
1689
|
+
return results;
|
|
1690
|
+
}
|
|
1691
|
+
|
|
1692
|
+
/**
|
|
1693
|
+
* Determine if a secret classification's stages apply to the current generation context.
|
|
1694
|
+
* Build-time secrets apply when the project involves a Docker build step.
|
|
1695
|
+
* Runtime secrets apply when the architecture uses HuggingFace Hub models.
|
|
1696
|
+
* Requirements: 8.9
|
|
1697
|
+
* @param {object} classification - Secret classification entry
|
|
1698
|
+
* @param {object} answers - Current answers from previous phases
|
|
1699
|
+
* @returns {boolean} True if the secret type is applicable
|
|
1700
|
+
* @private
|
|
1701
|
+
*/
|
|
1702
|
+
_secretStagesApply(classification, answers) {
|
|
1703
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
1704
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
1705
|
+
|
|
1706
|
+
if (classification.identifier === 'hf-token') {
|
|
1707
|
+
// HF token applies to transformers, diffusors, and Triton LLM backends
|
|
1708
|
+
const isTransformers = architecture === 'transformers';
|
|
1709
|
+
const isDiffusors = architecture === 'diffusors';
|
|
1710
|
+
const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
|
|
1711
|
+
|
|
1712
|
+
if (!isTransformers && !isDiffusors && !isTritonLlm) return false;
|
|
1713
|
+
|
|
1714
|
+
// Skip for non-HuggingFace model sources
|
|
1715
|
+
const modelSource = answers.modelSource;
|
|
1716
|
+
if (modelSource && modelSource !== 'huggingface') return false;
|
|
1717
|
+
|
|
1718
|
+
return true;
|
|
1719
|
+
}
|
|
1720
|
+
|
|
1721
|
+
if (classification.identifier === 'ngc-token') {
|
|
1722
|
+
// NGC token only applies to transformers-tensorrt-llm (build-time only)
|
|
1723
|
+
if (architecture === 'triton') return false;
|
|
1724
|
+
if (architecture === 'diffusors') return false;
|
|
1725
|
+
return architecture === 'transformers' && backend === 'tensorrt-llm';
|
|
1726
|
+
}
|
|
1727
|
+
|
|
1728
|
+
// For future secret types, check if any stage applies
|
|
1729
|
+
// Build-time applies to all Docker-based deployments
|
|
1730
|
+
// Runtime applies to architectures that download at startup
|
|
1731
|
+
return classification.stages.length > 0;
|
|
1732
|
+
}
|
|
1733
|
+
|
|
1734
|
+
/**
|
|
1735
|
+
* Get the ARN config key for a classification.
|
|
1736
|
+
* Maps classification identifiers to config field names.
|
|
1737
|
+
* @param {object} classification - Secret classification entry
|
|
1738
|
+
* @returns {string} Config key for the ARN value
|
|
1739
|
+
* @private
|
|
1740
|
+
*/
|
|
1741
|
+
_getArnConfigKey(classification) {
|
|
1742
|
+
const keyMap = {
|
|
1743
|
+
'hf-token': 'hfTokenArn',
|
|
1744
|
+
'ngc-token': 'ngcTokenArn'
|
|
1745
|
+
};
|
|
1746
|
+
return keyMap[classification.identifier] || `${classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase())}Arn`;
|
|
1747
|
+
}
|
|
1748
|
+
|
|
1749
|
+
/**
|
|
1750
|
+
* Get the plaintext config key for a classification.
|
|
1751
|
+
* Maps classification identifiers to config field names.
|
|
1752
|
+
* @param {object} classification - Secret classification entry
|
|
1753
|
+
* @returns {string} Config key for the plaintext value
|
|
1754
|
+
* @private
|
|
1755
|
+
*/
|
|
1756
|
+
_getPlaintextConfigKey(classification) {
|
|
1757
|
+
const keyMap = {
|
|
1758
|
+
'hf-token': 'hfToken',
|
|
1759
|
+
'ngc-token': 'ngcApiKey'
|
|
1760
|
+
};
|
|
1761
|
+
return keyMap[classification.identifier] || classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
|
|
1762
|
+
}
|
|
1763
|
+
|
|
1764
|
+
/**
|
|
1765
|
+
* List managed secrets of a given type from AWS Secrets Manager.
|
|
1766
|
+
* Uses the active bootstrap profile to query for secrets tagged with
|
|
1767
|
+
* the mlcc:secret-type matching the given identifier.
|
|
1768
|
+
* @param {string} secretType - The secret type identifier (e.g., 'hf-token')
|
|
1769
|
+
* @returns {Promise<Array<{name: string, arn: string}>>} Array of managed secrets
|
|
1770
|
+
* @private
|
|
1771
|
+
*/
|
|
1772
|
+
async _listManagedSecrets(secretType) {
|
|
1773
|
+
try {
|
|
1774
|
+
const bootstrapConfig = new BootstrapConfig();
|
|
1775
|
+
const activeProfile = bootstrapConfig.getActiveProfile();
|
|
1776
|
+
if (!activeProfile) return [];
|
|
1777
|
+
|
|
1778
|
+
const profile = activeProfile.config.awsProfile;
|
|
1779
|
+
const region = activeProfile.config.awsRegion;
|
|
1780
|
+
if (!profile || !region) return [];
|
|
1781
|
+
|
|
1782
|
+
const command = `aws secretsmanager list-secrets --filters Key=tag-key,Values=mlcc:managed-by Key=tag-value,Values=ml-container-creator --region ${region} --profile ${profile} --output json`;
|
|
1783
|
+
const output = execSync(command, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 });
|
|
1784
|
+
const trimmed = output.trim();
|
|
1785
|
+
if (!trimmed) return [];
|
|
1786
|
+
|
|
1787
|
+
const result = JSON.parse(trimmed);
|
|
1788
|
+
const secrets = result.SecretList || [];
|
|
1789
|
+
|
|
1790
|
+
// Filter by secret type tag
|
|
1791
|
+
return secrets
|
|
1792
|
+
.filter(secret => {
|
|
1793
|
+
const typeTag = (secret.Tags || []).find(t => t.Key === 'mlcc:secret-type');
|
|
1794
|
+
return typeTag && typeTag.Value === secretType;
|
|
1795
|
+
})
|
|
1796
|
+
.map(secret => ({
|
|
1797
|
+
name: secret.Name,
|
|
1798
|
+
arn: secret.ARN
|
|
1799
|
+
}));
|
|
1800
|
+
} catch {
|
|
1801
|
+
// If AWS CLI fails (not configured, no credentials, etc.), return empty
|
|
1802
|
+
return [];
|
|
1803
|
+
}
|
|
1804
|
+
}
|
|
1805
|
+
|
|
1806
|
+
/**
|
|
1807
|
+
* Display a selection list for managed secrets of a given type.
|
|
1808
|
+
* Shows available secrets plus options for plaintext entry and skip.
|
|
1809
|
+
* Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6
|
|
1810
|
+
* @param {object} classification - Secret classification entry
|
|
1811
|
+
* @param {Array<{name: string, arn: string}>} managedSecrets - Available managed secrets
|
|
1812
|
+
* @param {object} previousAnswers - Answers from previous phases
|
|
1813
|
+
* @returns {Promise<object>} Object with the selected value keyed by config field name
|
|
1814
|
+
* @private
|
|
1815
|
+
*/
|
|
1816
|
+
async _promptSecretSelection(classification, managedSecrets, previousAnswers) {
|
|
1817
|
+
const arnConfigKey = this._getArnConfigKey(classification);
|
|
1818
|
+
|
|
1819
|
+
console.log(`\n🔐 ${classification.displayName}`);
|
|
1820
|
+
console.log(` ${classification.purpose}`);
|
|
1821
|
+
|
|
1822
|
+
// Build choices: managed secrets + enter plaintext + skip
|
|
1823
|
+
const choices = [
|
|
1824
|
+
...managedSecrets.map(secret => ({
|
|
1825
|
+
name: `🔒 ${secret.name} (${secret.arn})`,
|
|
1826
|
+
value: secret.arn,
|
|
1827
|
+
short: secret.name
|
|
1828
|
+
})),
|
|
1829
|
+
{ name: '✏️ Enter plaintext token', value: '__plaintext__', short: 'Plaintext' },
|
|
1830
|
+
{ name: '⏭️ Skip (use environment variable)', value: '__skip__', short: 'Skip' }
|
|
1831
|
+
];
|
|
1832
|
+
|
|
1833
|
+
const { secretSelection } = await this._runPrompts([{
|
|
1834
|
+
type: 'list',
|
|
1835
|
+
name: 'secretSelection',
|
|
1836
|
+
message: `Select ${classification.promptLabel}:`,
|
|
1837
|
+
choices
|
|
1838
|
+
}]);
|
|
1839
|
+
|
|
1840
|
+
if (secretSelection === '__skip__') {
|
|
1841
|
+
return {};
|
|
1842
|
+
}
|
|
1843
|
+
|
|
1844
|
+
if (secretSelection === '__plaintext__') {
|
|
1845
|
+
// Use existing plaintext flow
|
|
1846
|
+
return this._promptPlaintextEntry(classification, previousAnswers);
|
|
1847
|
+
}
|
|
1848
|
+
|
|
1849
|
+
// User selected a managed secret ARN
|
|
1850
|
+
return { [arnConfigKey]: secretSelection };
|
|
1851
|
+
}
|
|
1852
|
+
|
|
1853
|
+
/**
|
|
1854
|
+
* Prompt for plaintext token entry with ARN detection.
|
|
1855
|
+
* If the user enters an ARN, store it as an ARN reference.
|
|
1856
|
+
* Requirements: 8.4, 8.5, 8.6
|
|
1857
|
+
* @param {object} classification - Secret classification entry
|
|
1858
|
+
* @param {object} previousAnswers - Answers from previous phases
|
|
1859
|
+
* @returns {Promise<object>} Object with the value keyed by config field name
|
|
1860
|
+
* @private
|
|
1861
|
+
*/
|
|
1862
|
+
async _promptPlaintextEntry(classification, _previousAnswers) {
|
|
1863
|
+
const arnConfigKey = this._getArnConfigKey(classification);
|
|
1864
|
+
const plaintextConfigKey = this._getPlaintextConfigKey(classification);
|
|
1865
|
+
|
|
1866
|
+
const { tokenValue } = await this._runPrompts([{
|
|
1867
|
+
type: 'input',
|
|
1868
|
+
name: 'tokenValue',
|
|
1869
|
+
message: `${classification.promptLabel} (enter token, ARN, or leave empty):`,
|
|
1870
|
+
validate: (input) => {
|
|
1871
|
+
// Empty is valid
|
|
1872
|
+
if (!input || input.trim() === '') return true;
|
|
1873
|
+
// Environment variable reference is valid
|
|
1874
|
+
if (input.trim().startsWith('$')) return true;
|
|
1875
|
+
return true;
|
|
1876
|
+
}
|
|
1877
|
+
}]);
|
|
1878
|
+
|
|
1879
|
+
if (!tokenValue || tokenValue.trim() === '') {
|
|
1880
|
+
return {};
|
|
1881
|
+
}
|
|
1882
|
+
|
|
1883
|
+
const value = tokenValue.trim();
|
|
1884
|
+
|
|
1885
|
+
// ARN detection: if the value is a Secrets Manager ARN, store as ARN
|
|
1886
|
+
if (isSecretsManagerArn(value)) {
|
|
1887
|
+
return { [arnConfigKey]: value };
|
|
1888
|
+
}
|
|
1889
|
+
|
|
1890
|
+
// Otherwise store as plaintext
|
|
1891
|
+
return { [plaintextConfigKey]: value };
|
|
1892
|
+
}
|
|
1893
|
+
|
|
1894
|
+
/**
|
|
1895
|
+
* Fall back to existing plaintext prompt when no managed secrets exist.
|
|
1896
|
+
* Uses the same prompts as the original hfTokenPrompts/ngcApiKeyPrompts
|
|
1897
|
+
* but with ARN detection on the input.
|
|
1898
|
+
* Requirements: 8.7
|
|
1899
|
+
* @param {object} classification - Secret classification entry
|
|
1900
|
+
* @param {object} previousAnswers - Answers from previous phases
|
|
1901
|
+
* @param {object} explicitConfig - Explicit CLI/config values
|
|
1902
|
+
* @param {object} existingConfig - Existing project configuration
|
|
1903
|
+
* @returns {Promise<object>} Object with the value keyed by config field name
|
|
1904
|
+
* @private
|
|
1905
|
+
*/
|
|
1906
|
+
async _promptPlaintextFallback(classification, _previousAnswers, _explicitConfig, _existingConfig) {
|
|
1907
|
+
const arnConfigKey = this._getArnConfigKey(classification);
|
|
1908
|
+
const plaintextConfigKey = this._getPlaintextConfigKey(classification);
|
|
1909
|
+
|
|
1910
|
+
// If in auto-prompt mode, skip
|
|
1911
|
+
if (this.configManager?.isAutoPrompt()) {
|
|
1912
|
+
return {};
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1915
|
+
// Display context-appropriate security message
|
|
1916
|
+
if (classification.identifier === 'hf-token') {
|
|
1917
|
+
console.log('\n🔐 HuggingFace Authentication');
|
|
1918
|
+
console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
|
|
1919
|
+
console.log(' 💡 Tip: Use `ml-container-creator secrets create --type hf-token` to store');
|
|
1920
|
+
console.log(' your token in AWS Secrets Manager for zero-knowledge operation.');
|
|
1921
|
+
console.log(' For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.\n');
|
|
1922
|
+
} else if (classification.identifier === 'ngc-token') {
|
|
1923
|
+
console.log('\n🔐 NVIDIA NGC Authentication');
|
|
1924
|
+
console.log(' TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
|
|
1925
|
+
console.log(' 💡 Tip: Use `ml-container-creator secrets create --type ngc-token` to store');
|
|
1926
|
+
console.log(' your key in AWS Secrets Manager for zero-knowledge operation.');
|
|
1927
|
+
console.log(' For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
|
|
1928
|
+
} else {
|
|
1929
|
+
console.log(`\n🔐 ${classification.displayName}`);
|
|
1930
|
+
console.log(` ${classification.purpose}\n`);
|
|
1931
|
+
}
|
|
1932
|
+
|
|
1933
|
+
const { tokenValue } = await this._runPrompts([{
|
|
1934
|
+
type: 'input',
|
|
1935
|
+
name: 'tokenValue',
|
|
1936
|
+
message: `${classification.promptLabel} (enter token, ARN, "$${classification.envVar}" for env var, or leave empty):`,
|
|
1937
|
+
validate: (input) => {
|
|
1938
|
+
if (!input || input.trim() === '') return true;
|
|
1939
|
+
if (input.trim().startsWith('$')) return true;
|
|
1940
|
+
// Warn about HF token format
|
|
1941
|
+
if (classification.identifier === 'hf-token' && !input.startsWith('hf_') && !isSecretsManagerArn(input)) {
|
|
1942
|
+
console.warn('\n⚠️ Warning: HuggingFace tokens typically start with "hf_"');
|
|
1943
|
+
console.warn(' If this is intentional, you can ignore this warning.');
|
|
1944
|
+
}
|
|
1945
|
+
return true;
|
|
1946
|
+
}
|
|
1947
|
+
}]);
|
|
1948
|
+
|
|
1949
|
+
if (!tokenValue || tokenValue.trim() === '') {
|
|
1950
|
+
return {};
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1953
|
+
const value = tokenValue.trim();
|
|
1954
|
+
|
|
1955
|
+
// ARN detection: if the value is a Secrets Manager ARN, store as ARN
|
|
1956
|
+
if (isSecretsManagerArn(value)) {
|
|
1957
|
+
return { [arnConfigKey]: value };
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
// Otherwise store as plaintext
|
|
1961
|
+
return { [plaintextConfigKey]: value };
|
|
1962
|
+
}
|
|
1963
|
+
|
|
1279
1964
|
/**
|
|
1280
1965
|
* CUDA-to-AMI mapping.
|
|
1281
1966
|
* Maps CUDA major.minor versions to the SageMaker inference AMI that provides
|
|
@@ -1283,13 +1968,13 @@ export default class PromptRunner {
|
|
|
1283
1968
|
* @private
|
|
1284
1969
|
*/
|
|
1285
1970
|
static CUDA_AMI_MAP = {
|
|
1286
|
-
'11.0': 'al2-ami-sagemaker-inference-gpu-2
|
|
1971
|
+
'11.0': 'al2-ami-sagemaker-inference-gpu-2',
|
|
1287
1972
|
'11.4': 'al2-ami-sagemaker-inference-gpu-2-1',
|
|
1288
|
-
'11.8': 'al2-ami-sagemaker-inference-gpu-
|
|
1973
|
+
'11.8': 'al2-ami-sagemaker-inference-gpu-2-1',
|
|
1289
1974
|
'12.1': 'al2-ami-sagemaker-inference-gpu-3-1',
|
|
1290
|
-
'12.2': '
|
|
1291
|
-
'12.4': '
|
|
1292
|
-
'12.6': '
|
|
1975
|
+
'12.2': 'al2023-ami-sagemaker-inference-gpu-4-1',
|
|
1976
|
+
'12.4': 'al2023-ami-sagemaker-inference-gpu-4-1',
|
|
1977
|
+
'12.6': 'al2023-ami-sagemaker-inference-gpu-4-1'
|
|
1293
1978
|
};
|
|
1294
1979
|
|
|
1295
1980
|
/**
|
|
@@ -1297,16 +1982,21 @@ export default class PromptRunner {
|
|
|
1297
1982
|
* supports multiple versions. The choice transparently resolves to the
|
|
1298
1983
|
* correct SageMaker inference AMI.
|
|
1299
1984
|
*
|
|
1985
|
+
* When a base image CUDA version is provided, auto-resolves by intersecting
|
|
1986
|
+
* with the instance's supported versions. Removes the CUDA prompt from the
|
|
1987
|
+
* interactive flow when auto-resolution succeeds.
|
|
1988
|
+
*
|
|
1300
1989
|
* Skipped for CPU instances, non-CUDA accelerators, or when only one
|
|
1301
1990
|
* compatible CUDA version exists.
|
|
1302
1991
|
*
|
|
1303
1992
|
* @param {string} instanceType - Selected instance type (e.g. "ml.g5.2xlarge")
|
|
1304
1993
|
* @param {string} framework - Selected framework name
|
|
1305
1994
|
* @param {string} frameworkVersion - Selected framework version
|
|
1995
|
+
* @param {string} [baseImageCuda] - CUDA version from selected base image (for auto-resolution)
|
|
1306
1996
|
* @returns {Promise<{cudaVersion: string, inferenceAmiVersion: string}|null>}
|
|
1307
1997
|
* @private
|
|
1308
1998
|
*/
|
|
1309
|
-
async _promptCudaVersion(instanceType, framework, frameworkVersion) {
|
|
1999
|
+
async _promptCudaVersion(instanceType, framework, frameworkVersion, baseImageCuda) {
|
|
1310
2000
|
if (!instanceType) return null;
|
|
1311
2001
|
|
|
1312
2002
|
// Look up instance in accelerator mapping
|
|
@@ -1316,6 +2006,33 @@ export default class PromptRunner {
|
|
|
1316
2006
|
const instanceCudaVersions = instanceInfo.accelerator.versions;
|
|
1317
2007
|
if (!instanceCudaVersions || instanceCudaVersions.length === 0) return null;
|
|
1318
2008
|
|
|
2009
|
+
// Auto-resolution: when base image specifies a CUDA version, intersect with instance support
|
|
2010
|
+
// Requirements: 3.11, 4.9, 4.10, 4.11
|
|
2011
|
+
if (baseImageCuda) {
|
|
2012
|
+
const majorRequired = baseImageCuda.split('.')[0];
|
|
2013
|
+
const intersection = instanceCudaVersions.filter(v => {
|
|
2014
|
+
if (v === baseImageCuda) return true;
|
|
2015
|
+
if (v.startsWith(`${majorRequired }.`)) return true;
|
|
2016
|
+
return false;
|
|
2017
|
+
});
|
|
2018
|
+
|
|
2019
|
+
if (intersection.length > 0) {
|
|
2020
|
+
// Auto-select: pick exact match or highest compatible
|
|
2021
|
+
const exactMatch = intersection.find(v => v === baseImageCuda);
|
|
2022
|
+
const selectedVersion = exactMatch || intersection.sort().pop();
|
|
2023
|
+
const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[selectedVersion];
|
|
2024
|
+
if (inferenceAmiVersion) {
|
|
2025
|
+
console.log(`\n🔧 CUDA ${selectedVersion} auto-resolved from base image (requires ${baseImageCuda})`);
|
|
2026
|
+
console.log(` AMI: ${inferenceAmiVersion}`);
|
|
2027
|
+
return { cudaVersion: selectedVersion, inferenceAmiVersion };
|
|
2028
|
+
}
|
|
2029
|
+
} else {
|
|
2030
|
+
// No intersection — warn and fall through to manual prompt
|
|
2031
|
+
console.log(`\n ⚠️ Base image requires CUDA ${baseImageCuda} but instance ${instanceType} supports: ${instanceCudaVersions.join(', ')}`);
|
|
2032
|
+
console.log(' No compatible CUDA version found. Falling back to manual selection.');
|
|
2033
|
+
}
|
|
2034
|
+
}
|
|
2035
|
+
|
|
1319
2036
|
// Get framework CUDA requirements (if available)
|
|
1320
2037
|
const registryConfigManager = this.registryConfigManager;
|
|
1321
2038
|
const frameworkConfig = registryConfigManager?.frameworkRegistry?.[framework]?.[frameworkVersion];
|