@aws/ml-container-creator 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +7 -2
- package/package.json +7 -8
- package/servers/base-image-picker/index.js +3 -3
- package/servers/base-image-picker/manifest.json +4 -2
- package/servers/instance-sizer/index.js +561 -0
- package/servers/instance-sizer/lib/instance-ranker.js +245 -0
- package/servers/instance-sizer/lib/model-resolver.js +265 -0
- package/servers/instance-sizer/lib/vram-estimator.js +177 -0
- package/servers/instance-sizer/manifest.json +17 -0
- package/servers/instance-sizer/package.json +15 -0
- package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
- package/servers/{base-image-picker → lib}/catalogs/model-servers.json +19 -249
- package/servers/lib/catalogs/model-sizes.json +131 -0
- package/servers/lib/catalogs/models.json +602 -0
- package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
- package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
- package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
- package/servers/lib/schemas/image-catalog.schema.json +0 -12
- package/servers/lib/schemas/instances.schema.json +29 -0
- package/servers/lib/schemas/model-catalog.schema.json +12 -10
- package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
- package/servers/model-picker/index.js +2 -3
- package/servers/model-picker/manifest.json +2 -3
- package/servers/region-picker/index.js +1 -1
- package/servers/region-picker/manifest.json +1 -1
- package/src/app.js +17 -0
- package/src/lib/bootstrap-command-handler.js +38 -0
- package/src/lib/cli-handler.js +3 -3
- package/src/lib/config-manager.js +4 -1
- package/src/lib/configuration-manager.js +2 -2
- package/src/lib/cross-cutting-checker.js +341 -0
- package/src/lib/dry-run-validator.js +78 -0
- package/src/lib/generation-validator.js +102 -0
- package/src/lib/mcp-validator-config.js +89 -0
- package/src/lib/payload-builder.js +153 -0
- package/src/lib/prompt-runner.js +445 -135
- package/src/lib/prompts.js +1 -1
- package/src/lib/registry-loader.js +5 -5
- package/src/lib/schema-sync.js +203 -0
- package/src/lib/schema-validation-engine.js +195 -0
- package/src/lib/service-model-parser.js +102 -0
- package/src/lib/validate-runner.js +167 -0
- package/src/lib/validation-report.js +133 -0
- package/src/lib/validators/base-validator.js +36 -0
- package/src/lib/validators/catalog-validator.js +177 -0
- package/src/lib/validators/enum-validator.js +120 -0
- package/src/lib/validators/required-field-validator.js +150 -0
- package/src/lib/validators/type-validator.js +313 -0
- package/templates/Dockerfile +1 -1
- package/templates/do/build +15 -5
- package/templates/do/run +5 -1
- package/templates/do/validate +61 -0
- package/servers/instance-recommender/LICENSE +0 -202
- package/servers/instance-recommender/index.js +0 -284
- package/servers/instance-recommender/manifest.json +0 -16
- package/servers/instance-recommender/package.json +0 -15
- /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
- /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
- /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
- /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
package/src/lib/prompt-runner.js
CHANGED
|
@@ -54,6 +54,14 @@ export default class PromptRunner {
|
|
|
54
54
|
|
|
55
55
|
/**
|
|
56
56
|
* Runs all prompting phases and returns combined answers
|
|
57
|
+
*
|
|
58
|
+
* Phase ordering (MCP Catalog Consolidation):
|
|
59
|
+
* Phase 1 (What): deployment config + model name/ID + quantization
|
|
60
|
+
* Phase 2 (How): deployment target + serving profile + base image
|
|
61
|
+
* Phase 3 (Where): region + instance-sizer query + instance type + CUDA/AMI auto-resolution + HyperPod + build target
|
|
62
|
+
* Phase 4 (Details): framework version, model profile, modules
|
|
63
|
+
* Phase 5 (Project): project name + destination
|
|
64
|
+
*
|
|
57
65
|
* @returns {Promise<Object>} Combined answers from all phases
|
|
58
66
|
*/
|
|
59
67
|
async run() {
|
|
@@ -70,39 +78,184 @@ export default class PromptRunner {
|
|
|
70
78
|
// Get only explicit configuration (not defaults) for prompt skipping
|
|
71
79
|
const explicitConfig = this.configManager ? this.configManager.getExplicitConfiguration() : {};
|
|
72
80
|
|
|
73
|
-
//
|
|
74
|
-
//
|
|
75
|
-
//
|
|
81
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
82
|
+
// Phase 1 — What (deployment config + model name/ID + quantization)
|
|
83
|
+
// Requirements: 4.1, 4.2 — model selection drives instance sizing
|
|
84
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
85
|
+
console.log('\n🔧 Core ML Configuration');
|
|
86
|
+
const deploymentConfigAnswers = await this._runPhase(deploymentConfigPrompts, {}, explicitConfig, existingConfig);
|
|
87
|
+
|
|
88
|
+
// Derive architecture, backend, and legacy framework/modelServer from deploymentConfig
|
|
89
|
+
let architecture, backend, framework, modelServer;
|
|
90
|
+
if (deploymentConfigAnswers.deploymentConfig) {
|
|
91
|
+
const parts = deploymentConfigAnswers.deploymentConfig.split('-');
|
|
92
|
+
architecture = parts[0];
|
|
93
|
+
backend = parts.slice(1).join('-');
|
|
94
|
+
// Legacy compatibility: derive framework and modelServer
|
|
95
|
+
framework = architecture;
|
|
96
|
+
modelServer = backend;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Add derived values to answers
|
|
100
|
+
const frameworkAnswers = {
|
|
101
|
+
...deploymentConfigAnswers,
|
|
102
|
+
architecture: architecture || deploymentConfigAnswers.architecture,
|
|
103
|
+
backend: backend || deploymentConfigAnswers.backend,
|
|
104
|
+
framework: framework || deploymentConfigAnswers.framework,
|
|
105
|
+
modelServer: modelServer || deploymentConfigAnswers.modelServer
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
// Engine prompt for http architecture
|
|
109
|
+
const engineAnswers = await this._runPhase(enginePrompts, { ...frameworkAnswers }, explicitConfig, existingConfig);
|
|
110
|
+
|
|
111
|
+
// Auto-set model format for Triton backends with single format
|
|
112
|
+
const tritonAutoFormat = this._getTritonAutoModelFormat(architecture, backend);
|
|
113
|
+
|
|
114
|
+
// Query model-picker MCP server for model choices
|
|
115
|
+
this._queryMcpForModels(frameworkAnswers.architecture);
|
|
116
|
+
if (this._mcpModelChoices) {
|
|
117
|
+
console.log(' 🔍 Querying model-picker...');
|
|
118
|
+
console.log(` ✓ ${this._mcpModelChoices.length} model(s) available from catalog`);
|
|
119
|
+
}
|
|
120
|
+
const modelFormatPreviousAnswers = {
|
|
121
|
+
...frameworkAnswers,
|
|
122
|
+
...engineAnswers,
|
|
123
|
+
...(this._mcpModelChoices ? { _mcpModelChoices: this._mcpModelChoices } : {})
|
|
124
|
+
};
|
|
125
|
+
const modelFormatAnswers = await this._runPhase(
|
|
126
|
+
modelFormatPrompts,
|
|
127
|
+
modelFormatPreviousAnswers,
|
|
128
|
+
explicitConfig,
|
|
129
|
+
existingConfig
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
// Model server prompts are now deprecated (empty array)
|
|
133
|
+
const modelServerAnswers = await this._runPhase(
|
|
134
|
+
modelServerPrompts,
|
|
135
|
+
{...frameworkAnswers, ...engineAnswers},
|
|
136
|
+
explicitConfig,
|
|
137
|
+
existingConfig
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
// Resolve model ID early for instance-sizer query in Phase 3
|
|
141
|
+
const phase1ModelId = modelFormatAnswers.customModelName || modelFormatAnswers.modelName || explicitConfig.modelName;
|
|
142
|
+
|
|
143
|
+
// Fetch model information from HuggingFace and Model Registry
|
|
144
|
+
if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
|
|
145
|
+
await this._fetchAndDisplayModelInfo(phase1ModelId);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
149
|
+
// Phase 2 — How (deployment target + serving profile + base image)
|
|
150
|
+
// Requirements: 4.3 — instance prompt appears AFTER base image is known
|
|
151
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
76
152
|
console.log('\n💪 Infrastructure & Deployment');
|
|
77
153
|
|
|
78
|
-
//
|
|
79
|
-
await this._queryMcpForRegion({}, explicitConfig);
|
|
154
|
+
// 2a. Deployment target (realtime, async, batch, hyperpod, local)
|
|
80
155
|
const bootstrapRegion = existingConfig.awsRegion || explicitConfig.awsRegion;
|
|
81
156
|
const regionPreviousAnswers = bootstrapRegion ? { _bootstrapRegion: bootstrapRegion } : {};
|
|
82
|
-
const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, regionPreviousAnswers, explicitConfig, existingConfig);
|
|
157
|
+
const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, { ...frameworkAnswers, ...regionPreviousAnswers }, explicitConfig, existingConfig);
|
|
158
|
+
|
|
159
|
+
// 2b. Query base-image-picker MCP server for base image choices
|
|
160
|
+
await this._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
|
|
161
|
+
const baseImagePreviousAnswers = {
|
|
162
|
+
...frameworkAnswers,
|
|
163
|
+
...engineAnswers,
|
|
164
|
+
...(this._mcpBaseImageChoices ? { _mcpBaseImageChoices: this._mcpBaseImageChoices } : {})
|
|
165
|
+
};
|
|
166
|
+
const baseImageAnswers = await this._runPhase(
|
|
167
|
+
baseImagePrompts,
|
|
168
|
+
baseImagePreviousAnswers,
|
|
169
|
+
explicitConfig,
|
|
170
|
+
existingConfig
|
|
171
|
+
);
|
|
83
172
|
|
|
84
|
-
//
|
|
173
|
+
// Extract CUDA version from selected base image for instance-sizer context
|
|
174
|
+
const selectedBaseImageCuda = this._extractCudaFromBaseImage(baseImageAnswers);
|
|
175
|
+
|
|
176
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
177
|
+
// Phase 3 — Where (region + instance [derived] + CUDA/AMI + HyperPod + build target)
|
|
178
|
+
// Requirements: 4.4, 4.5, 4.7, 3.6, 3.7 — sizer query with full context
|
|
179
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
180
|
+
|
|
181
|
+
// 3a. Region query
|
|
182
|
+
await this._queryMcpForRegion(frameworkAnswers, explicitConfig);
|
|
183
|
+
|
|
184
|
+
// 3b. Instance type — query instance-sizer with full context (model + profile + CUDA)
|
|
85
185
|
let instanceAnswers = {};
|
|
86
|
-
|
|
186
|
+
const needsInstance = regionAndTargetAnswers.deploymentTarget === 'realtime-inference' ||
|
|
87
187
|
regionAndTargetAnswers.deploymentTarget === 'async-inference' ||
|
|
88
188
|
regionAndTargetAnswers.deploymentTarget === 'batch-transform' ||
|
|
89
|
-
regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks'
|
|
90
|
-
|
|
91
|
-
|
|
189
|
+
regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks';
|
|
190
|
+
|
|
191
|
+
if (needsInstance) {
|
|
192
|
+
// Determine architecture type for heuristic fallback
|
|
193
|
+
const modelArchitecture = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
194
|
+
|
|
195
|
+
// Skip sizer query if --instance-type was provided via CLI
|
|
196
|
+
if (!explicitConfig.instanceType) {
|
|
197
|
+
// Skip sizer for predictor models (CPU-only)
|
|
198
|
+
if (modelArchitecture === 'predictor' || modelArchitecture === 'http') {
|
|
199
|
+
// Architecture heuristic: predictor → ml.m5.large
|
|
200
|
+
console.log(' ℹ️ Predictor model: defaulting to CPU instance (ml.m5.large)');
|
|
201
|
+
this._architectureHeuristicDefault = 'ml.m5.large';
|
|
202
|
+
} else if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
|
|
203
|
+
// Query instance-sizer with full context
|
|
204
|
+
await this._queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, {
|
|
205
|
+
cudaVersion: selectedBaseImageCuda,
|
|
206
|
+
profileEnvVars: this._selectedProfileEnvVars || {}
|
|
207
|
+
});
|
|
208
|
+
} else {
|
|
209
|
+
// No model known — use architecture heuristic
|
|
210
|
+
await this._queryMcpForInstance(frameworkAnswers, explicitConfig);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Build instance prompt choices from sizer results
|
|
215
|
+
const mcpInstanceChoices = this._mcpInstanceSizerChoices || this.configManager?.mcpChoices?.instanceType;
|
|
92
216
|
const instancePreviousAnswers = {
|
|
93
217
|
...regionAndTargetAnswers,
|
|
94
|
-
...(mcpInstanceChoices && mcpInstanceChoices.length > 0 ? { _mcpInstanceChoices: mcpInstanceChoices } : {})
|
|
218
|
+
...(mcpInstanceChoices && mcpInstanceChoices.length > 0 ? { _mcpInstanceChoices: mcpInstanceChoices } : {}),
|
|
219
|
+
...(this._architectureHeuristicDefault ? { _architectureHeuristicDefault: this._architectureHeuristicDefault } : {})
|
|
95
220
|
};
|
|
96
221
|
instanceAnswers = await this._runPhase(infraInstancePrompts, instancePreviousAnswers, explicitConfig, existingConfig);
|
|
222
|
+
|
|
223
|
+
// Apply architecture heuristic fallback when sizer returns empty
|
|
224
|
+
if (!instanceAnswers.instanceType && !explicitConfig.instanceType && this._architectureHeuristicDefault) {
|
|
225
|
+
instanceAnswers.instanceType = this._architectureHeuristicDefault;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// In auto-prompt mode, use instance-sizer's top recommendation as the instance type
|
|
230
|
+
if (this.configManager?.isAutoPrompt() && this._mcpInstanceSizerChoices && this._mcpInstanceSizerChoices.length > 0) {
|
|
231
|
+
const sizerRecommendation = this._mcpInstanceSizerChoices[0];
|
|
232
|
+
if (!explicitConfig.instanceType) {
|
|
233
|
+
instanceAnswers.instanceType = sizerRecommendation;
|
|
234
|
+
console.log(` ✓ Auto-prompt: using instance-sizer recommendation: ${sizerRecommendation}`);
|
|
235
|
+
}
|
|
97
236
|
}
|
|
98
237
|
|
|
99
|
-
//
|
|
238
|
+
// Auto-set tensor parallelism when sizer recommends TP > 1
|
|
239
|
+
// Requirements: 4.8
|
|
240
|
+
if (this._instanceSizerMetadata) {
|
|
241
|
+
const sizerRecs = this._instanceSizerMetadata.recommendations || [];
|
|
242
|
+
const finalInstanceType = instanceAnswers.customInstanceType || instanceAnswers.instanceType;
|
|
243
|
+
const matchingRec = sizerRecs.find(r => r.instanceType === finalInstanceType);
|
|
244
|
+
const tpRec = matchingRec || sizerRecs[0];
|
|
245
|
+
if (tpRec && tpRec.tensorParallelism > 1) {
|
|
246
|
+
this._autoTensorParallelism = tpRec.tensorParallelism;
|
|
247
|
+
this._autoGpuCount = tpRec.gpuCount;
|
|
248
|
+
console.log(` ✓ Auto-set tensor parallelism: TP=${tpRec.tensorParallelism} (${tpRec.gpuCount} GPUs)`);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// 3c. Async-specific prompts (only when deploymentTarget === 'async-inference')
|
|
100
253
|
let asyncAnswers = {};
|
|
101
254
|
if (regionAndTargetAnswers.deploymentTarget === 'async-inference') {
|
|
102
255
|
asyncAnswers = await this._runPhase(infraAsyncPrompts, { ...regionAndTargetAnswers }, explicitConfig, existingConfig);
|
|
103
256
|
}
|
|
104
257
|
|
|
105
|
-
//
|
|
258
|
+
// 3d. Batch transform-specific prompts (only when deploymentTarget === 'batch-transform')
|
|
106
259
|
let batchTransformAnswers = {};
|
|
107
260
|
if (regionAndTargetAnswers.deploymentTarget === 'batch-transform') {
|
|
108
261
|
batchTransformAnswers = await this._runPhase(
|
|
@@ -113,16 +266,24 @@ export default class PromptRunner {
|
|
|
113
266
|
);
|
|
114
267
|
}
|
|
115
268
|
|
|
116
|
-
//
|
|
269
|
+
// 3e. CUDA/AMI auto-resolution
|
|
270
|
+
const instanceType = instanceAnswers.customInstanceType || instanceAnswers.instanceType;
|
|
271
|
+
const cudaAnswer = await this._promptCudaVersion(
|
|
272
|
+
instanceType,
|
|
273
|
+
frameworkAnswers.framework,
|
|
274
|
+
null, // frameworkVersion not yet known in Phase 3
|
|
275
|
+
selectedBaseImageCuda // base image CUDA version for intersection
|
|
276
|
+
);
|
|
277
|
+
|
|
278
|
+
// 3f. HyperPod prompts — only query MCP and prompt when deployment target is hyperpod-eks
|
|
117
279
|
let hyperPodAnswers = {};
|
|
118
280
|
if (regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks') {
|
|
119
|
-
// Resolve the actual region (handle 'custom' selection)
|
|
120
281
|
const resolvedRegion = regionAndTargetAnswers.customAwsRegion || regionAndTargetAnswers.awsRegion;
|
|
121
282
|
await this._queryMcpForHyperPod({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
|
|
122
283
|
hyperPodAnswers = await this._runPhase(infraHyperPodPrompts, { ...regionAndTargetAnswers }, explicitConfig, existingConfig);
|
|
123
284
|
}
|
|
124
285
|
|
|
125
|
-
//
|
|
286
|
+
// 3g. Build target + role ARN (always)
|
|
126
287
|
const buildAnswers = await this._runPhase(infraBuildPrompts, { ...regionAndTargetAnswers, ...instanceAnswers, ...hyperPodAnswers }, explicitConfig, existingConfig);
|
|
127
288
|
|
|
128
289
|
// Combine all infrastructure answers
|
|
@@ -135,54 +296,16 @@ export default class PromptRunner {
|
|
|
135
296
|
...buildAnswers
|
|
136
297
|
};
|
|
137
298
|
|
|
138
|
-
//
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
// Derive architecture, backend, and legacy framework/modelServer from deploymentConfig
|
|
144
|
-
// Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7
|
|
145
|
-
let architecture, backend, framework, modelServer;
|
|
146
|
-
if (deploymentConfigAnswers.deploymentConfig) {
|
|
147
|
-
const parts = deploymentConfigAnswers.deploymentConfig.split('-');
|
|
148
|
-
architecture = parts[0];
|
|
149
|
-
backend = parts.slice(1).join('-');
|
|
150
|
-
// Legacy compatibility: derive framework and modelServer
|
|
151
|
-
framework = architecture;
|
|
152
|
-
modelServer = backend;
|
|
299
|
+
// Apply CUDA resolution to infra answers
|
|
300
|
+
if (cudaAnswer) {
|
|
301
|
+
infraAnswers._selectedCudaVersion = cudaAnswer.cudaVersion;
|
|
302
|
+
infraAnswers._resolvedInferenceAmiVersion = cudaAnswer.inferenceAmiVersion;
|
|
153
303
|
}
|
|
154
|
-
|
|
155
|
-
//
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
backend: backend || deploymentConfigAnswers.backend,
|
|
160
|
-
framework: framework || deploymentConfigAnswers.framework,
|
|
161
|
-
modelServer: modelServer || deploymentConfigAnswers.modelServer
|
|
162
|
-
};
|
|
163
|
-
|
|
164
|
-
// Engine prompt for http architecture
|
|
165
|
-
// Requirements: 3.7
|
|
166
|
-
const engineAnswers = await this._runPhase(enginePrompts, { ...frameworkAnswers }, explicitConfig, existingConfig);
|
|
167
|
-
|
|
168
|
-
// Auto-set model format for Triton backends with single format
|
|
169
|
-
// Requirements: 3.3, 3.4, 3.5
|
|
170
|
-
const tritonAutoFormat = this._getTritonAutoModelFormat(architecture, backend);
|
|
171
|
-
|
|
172
|
-
// Query base-image-picker MCP server for base image choices
|
|
173
|
-
// Requirements: 5.1, 5.2, 5.3
|
|
174
|
-
await this._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
|
|
175
|
-
const baseImagePreviousAnswers = {
|
|
176
|
-
...frameworkAnswers,
|
|
177
|
-
...engineAnswers,
|
|
178
|
-
...(this._mcpBaseImageChoices ? { _mcpBaseImageChoices: this._mcpBaseImageChoices } : {})
|
|
179
|
-
};
|
|
180
|
-
const baseImageAnswers = await this._runPhase(
|
|
181
|
-
baseImagePrompts,
|
|
182
|
-
baseImagePreviousAnswers,
|
|
183
|
-
explicitConfig,
|
|
184
|
-
existingConfig
|
|
185
|
-
);
|
|
304
|
+
|
|
305
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
306
|
+
// Phase 4 — Details (framework version, model profile, modules)
|
|
307
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
308
|
+
console.log('\n📦 Module Selection');
|
|
186
309
|
|
|
187
310
|
// Populate framework version choices from registry
|
|
188
311
|
const frameworkVersionChoices = this._getFrameworkVersionChoices(frameworkAnswers.framework);
|
|
@@ -209,44 +332,10 @@ export default class PromptRunner {
|
|
|
209
332
|
explicitConfig,
|
|
210
333
|
existingConfig
|
|
211
334
|
);
|
|
212
|
-
|
|
213
|
-
// Query model-picker MCP server for model choices
|
|
214
|
-
this._queryMcpForModels(frameworkAnswers.architecture);
|
|
215
|
-
if (this._mcpModelChoices) {
|
|
216
|
-
console.log(' 🔍 Querying model-picker...');
|
|
217
|
-
console.log(` ✓ ${this._mcpModelChoices.length} model(s) available from catalog`);
|
|
218
|
-
}
|
|
219
|
-
const modelFormatPreviousAnswers = {
|
|
220
|
-
...frameworkAnswers,
|
|
221
|
-
...engineAnswers,
|
|
222
|
-
...frameworkVersionAnswers,
|
|
223
|
-
...frameworkProfileAnswers,
|
|
224
|
-
...(this._mcpModelChoices ? { _mcpModelChoices: this._mcpModelChoices } : {})
|
|
225
|
-
};
|
|
226
|
-
const modelFormatAnswers = await this._runPhase(
|
|
227
|
-
modelFormatPrompts,
|
|
228
|
-
modelFormatPreviousAnswers,
|
|
229
|
-
explicitConfig,
|
|
230
|
-
existingConfig
|
|
231
|
-
);
|
|
232
|
-
|
|
233
|
-
// Model server prompts are now deprecated (empty array)
|
|
234
|
-
const modelServerAnswers = await this._runPhase(
|
|
235
|
-
modelServerPrompts,
|
|
236
|
-
{...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers},
|
|
237
|
-
explicitConfig,
|
|
238
|
-
existingConfig
|
|
239
|
-
);
|
|
240
|
-
|
|
335
|
+
|
|
241
336
|
// Populate model profile choices from registry (if model ID is available)
|
|
337
|
+
const modelId = phase1ModelId;
|
|
242
338
|
const currentAnswers = {...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers};
|
|
243
|
-
const modelId = currentAnswers.customModelName || currentAnswers.modelName || explicitConfig.modelName;
|
|
244
|
-
|
|
245
|
-
// Fetch model information from HuggingFace and Model Registry
|
|
246
|
-
// Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.11, 11.1, 11.2, 11.3, 11.5, 11.6, 11.7
|
|
247
|
-
if (modelId && modelId !== 'Custom (enter manually)') {
|
|
248
|
-
await this._fetchAndDisplayModelInfo(modelId);
|
|
249
|
-
}
|
|
250
339
|
|
|
251
340
|
const modelProfileChoices = this._getModelProfileChoices(modelId);
|
|
252
341
|
const modelProfileAnswers = await this._runPhase(
|
|
@@ -257,7 +346,6 @@ export default class PromptRunner {
|
|
|
257
346
|
);
|
|
258
347
|
|
|
259
348
|
// Model loading strategy prompt (build-time vs runtime)
|
|
260
|
-
// Requirements: 13.1, 13.2, 13.3, 13.4, 13.5
|
|
261
349
|
const modelLoadStrategyAnswers = await this._runPhase(
|
|
262
350
|
modelLoadStrategyPrompts,
|
|
263
351
|
{ ...frameworkAnswers, ...engineAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
|
|
@@ -273,32 +361,7 @@ export default class PromptRunner {
|
|
|
273
361
|
{ ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
|
|
274
362
|
explicitConfig, existingConfig);
|
|
275
363
|
|
|
276
|
-
//
|
|
277
|
-
// Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6
|
|
278
|
-
const instanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
|
|
279
|
-
if (instanceType && frameworkVersionAnswers.frameworkVersion) {
|
|
280
|
-
await this._validateAndDisplayInstanceType(
|
|
281
|
-
instanceType,
|
|
282
|
-
frameworkAnswers.framework,
|
|
283
|
-
frameworkVersionAnswers.frameworkVersion
|
|
284
|
-
);
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
// CUDA version selection: if the selected instance supports multiple CUDA versions,
|
|
288
|
-
// let the user pick which one. This transparently sets the inference AMI version.
|
|
289
|
-
const cudaAnswer = await this._promptCudaVersion(
|
|
290
|
-
instanceType,
|
|
291
|
-
frameworkAnswers.framework,
|
|
292
|
-
frameworkVersionAnswers.frameworkVersion
|
|
293
|
-
);
|
|
294
|
-
if (cudaAnswer) {
|
|
295
|
-
infraAnswers._selectedCudaVersion = cudaAnswer.cudaVersion;
|
|
296
|
-
infraAnswers._resolvedInferenceAmiVersion = cudaAnswer.inferenceAmiVersion;
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
// Phase 3: Module Selection
|
|
300
|
-
// Requirements: 3.3 — module selection after ML configuration
|
|
301
|
-
console.log('\n📦 Module Selection');
|
|
364
|
+
// Module selection
|
|
302
365
|
const moduleAnswers = await this._runPhase(modulePrompts, { ...frameworkAnswers, ...engineAnswers }, explicitConfig, existingConfig);
|
|
303
366
|
|
|
304
367
|
// Ensure transformers, diffusors, and ineligible Triton backends don't get sample model
|
|
@@ -309,8 +372,19 @@ export default class PromptRunner {
|
|
|
309
372
|
moduleAnswers.includeSampleModel = false;
|
|
310
373
|
}
|
|
311
374
|
|
|
312
|
-
//
|
|
313
|
-
|
|
375
|
+
// Validate instance type against framework requirements (now that framework version is known)
|
|
376
|
+
const finalInstanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
|
|
377
|
+
if (finalInstanceType && frameworkVersionAnswers.frameworkVersion) {
|
|
378
|
+
await this._validateAndDisplayInstanceType(
|
|
379
|
+
finalInstanceType,
|
|
380
|
+
frameworkAnswers.framework,
|
|
381
|
+
frameworkVersionAnswers.frameworkVersion
|
|
382
|
+
);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
386
|
+
// Phase 5 — Project (project name + destination)
|
|
387
|
+
// ══════════════════════════════════════════════════════════════════════
|
|
314
388
|
console.log('\n📋 Project Configuration');
|
|
315
389
|
const allTechnicalAnswers = {
|
|
316
390
|
...frameworkAnswers,
|
|
@@ -439,6 +513,21 @@ export default class PromptRunner {
|
|
|
439
513
|
delete combinedAnswers.customInstanceType;
|
|
440
514
|
}
|
|
441
515
|
|
|
516
|
+
// Propagate tensor parallelism from instance-sizer to templates
|
|
517
|
+
// Requirements: 4.8 — auto-set TP when sizer recommends > 1
|
|
518
|
+
if (this._autoTensorParallelism) {
|
|
519
|
+
combinedAnswers.tensorParallelSize = this._autoTensorParallelism;
|
|
520
|
+
combinedAnswers.gpuCount = this._autoGpuCount;
|
|
521
|
+
} else if (this._instanceSizerMetadata) {
|
|
522
|
+
const sizerInstanceType = combinedAnswers.instanceType;
|
|
523
|
+
const sizerRecs = this._instanceSizerMetadata.recommendations || [];
|
|
524
|
+
const matchingRec = sizerRecs.find(r => r.instanceType === sizerInstanceType);
|
|
525
|
+
if (matchingRec && matchingRec.tensorParallelism > 1) {
|
|
526
|
+
combinedAnswers.tensorParallelSize = matchingRec.tensorParallelism;
|
|
527
|
+
combinedAnswers.gpuCount = matchingRec.gpuCount;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
442
531
|
// Handle custom HyperPod cluster name
|
|
443
532
|
if (combinedAnswers.customHyperPodCluster) {
|
|
444
533
|
combinedAnswers.hyperPodCluster = combinedAnswers.customHyperPodCluster;
|
|
@@ -623,6 +712,55 @@ export default class PromptRunner {
|
|
|
623
712
|
return null;
|
|
624
713
|
}
|
|
625
714
|
|
|
715
|
+
/**
|
|
716
|
+
* Extract CUDA version from the selected base image.
|
|
717
|
+
* Looks at the MCP base image metadata for accelerator.version or labels.cuda_version.
|
|
718
|
+
* @param {object} baseImageAnswers - Answers from the base image prompt
|
|
719
|
+
* @returns {string|null} CUDA version string (e.g., "12.1") or null
|
|
720
|
+
* @private
|
|
721
|
+
*/
|
|
722
|
+
_extractCudaFromBaseImage(baseImageAnswers) {
|
|
723
|
+
if (!this._mcpBaseImageChoices) return null;
|
|
724
|
+
|
|
725
|
+
const selectedImage = baseImageAnswers.baseImage || baseImageAnswers.customBaseImage;
|
|
726
|
+
if (!selectedImage) return null;
|
|
727
|
+
|
|
728
|
+
// Find the matching entry in the MCP choices
|
|
729
|
+
const matchingChoice = this._mcpBaseImageChoices.find(c => c.value === selectedImage);
|
|
730
|
+
if (!matchingChoice) return null;
|
|
731
|
+
|
|
732
|
+
// Try to extract CUDA version from the choice metadata
|
|
733
|
+
// The formatImageChoices function stores labels in the choice object
|
|
734
|
+
if (matchingChoice._meta?.labels?.cuda_version) {
|
|
735
|
+
return matchingChoice._meta.labels.cuda_version;
|
|
736
|
+
}
|
|
737
|
+
if (matchingChoice._meta?.accelerator?.version) {
|
|
738
|
+
return matchingChoice._meta.accelerator.version;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
return null;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
/**
|
|
745
|
+
* Get architecture-based heuristic default instance type.
|
|
746
|
+
* Used when the instance-sizer cannot produce a recommendation.
|
|
747
|
+
* Requirements: 3.9, 4.6
|
|
748
|
+
* @param {string} architecture - Model architecture type
|
|
749
|
+
* @returns {string} Default instance type
|
|
750
|
+
* @private
|
|
751
|
+
*/
|
|
752
|
+
_getArchitectureHeuristicDefault(architecture) {
|
|
753
|
+
const HEURISTIC_DEFAULTS = {
|
|
754
|
+
'transformers': 'ml.g5.xlarge',
|
|
755
|
+
'transformer': 'ml.g5.xlarge',
|
|
756
|
+
'diffusors': 'ml.g5.2xlarge',
|
|
757
|
+
'diffusor': 'ml.g5.2xlarge',
|
|
758
|
+
'predictor': 'ml.m5.large',
|
|
759
|
+
'http': 'ml.m5.large'
|
|
760
|
+
};
|
|
761
|
+
return Object.hasOwn(HEURISTIC_DEFAULTS, architecture) ? HEURISTIC_DEFAULTS[architecture] : 'ml.g5.xlarge';
|
|
762
|
+
}
|
|
763
|
+
|
|
626
764
|
/**
|
|
627
765
|
* Query MCP region-picker server before infrastructure prompts.
|
|
628
766
|
* Populates configManager.mcpChoices so _runPhase injects them into list prompts.
|
|
@@ -671,8 +809,8 @@ export default class PromptRunner {
|
|
|
671
809
|
}
|
|
672
810
|
|
|
673
811
|
/**
|
|
674
|
-
* Query MCP instance-
|
|
675
|
-
*
|
|
812
|
+
* Query MCP instance-sizer server with tag-based search after deployment target is known.
|
|
813
|
+
* Used when no model name is available for VRAM-based sizing.
|
|
676
814
|
* Populates configManager.mcpChoices so _runPhase injects them into list prompts.
|
|
677
815
|
* @private
|
|
678
816
|
*/
|
|
@@ -686,7 +824,7 @@ export default class PromptRunner {
|
|
|
686
824
|
const smart = this.options.smart === true;
|
|
687
825
|
|
|
688
826
|
// Instance type: query if not already provided via CLI/config
|
|
689
|
-
if (!explicitConfig.instanceType && mcpServers.includes('instance-
|
|
827
|
+
if (!explicitConfig.instanceType && mcpServers.includes('instance-sizer')) {
|
|
690
828
|
const { instanceSearch } = await this._runPrompts([{
|
|
691
829
|
type: 'input',
|
|
692
830
|
name: 'instanceSearch',
|
|
@@ -695,8 +833,8 @@ export default class PromptRunner {
|
|
|
695
833
|
}]);
|
|
696
834
|
|
|
697
835
|
if (instanceSearch && instanceSearch.trim()) {
|
|
698
|
-
console.log(` 🔍 Querying instance-
|
|
699
|
-
const result = await cm.queryMcpServer('instance-
|
|
836
|
+
console.log(` 🔍 Querying instance-sizer [search]${smart ? ' [smart]' : ''}...`);
|
|
837
|
+
const result = await cm.queryMcpServer('instance-sizer', {
|
|
700
838
|
...frameworkAnswers,
|
|
701
839
|
instanceSearch: instanceSearch.trim()
|
|
702
840
|
});
|
|
@@ -713,6 +851,146 @@ export default class PromptRunner {
|
|
|
713
851
|
}
|
|
714
852
|
}
|
|
715
853
|
|
|
854
|
+
/**
|
|
855
|
+
* Query the instance-sizer MCP server after model is known.
|
|
856
|
+
* Estimates VRAM requirements and returns filtered, ranked instance recommendations.
|
|
857
|
+
* Stores results in this._mcpInstanceSizerChoices and this._instanceSizerMetadata.
|
|
858
|
+
* Requirements: 4.4, 4.5, 4.7, 3.6, 3.7
|
|
859
|
+
* @param {object} frameworkAnswers - Framework/architecture answers
|
|
860
|
+
* @param {object} modelFormatAnswers - Model format answers (contains modelName)
|
|
861
|
+
* @param {object} explicitConfig - Explicit CLI/config values
|
|
862
|
+
* @param {object} [sizerContext={}] - Additional context for the sizer query
|
|
863
|
+
* @param {string} [sizerContext.cudaVersion] - CUDA version from base image
|
|
864
|
+
* @param {object} [sizerContext.profileEnvVars] - Profile ENV overrides
|
|
865
|
+
* @private
|
|
866
|
+
*/
|
|
867
|
+
async _queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, sizerContext = {}) {
|
|
868
|
+
const cm = this.configManager;
|
|
869
|
+
if (!cm) return;
|
|
870
|
+
|
|
871
|
+
const mcpServers = cm.getMcpServerNames();
|
|
872
|
+
if (!mcpServers.includes('instance-sizer')) return;
|
|
873
|
+
|
|
874
|
+
// Resolve model name from answers or explicit config
|
|
875
|
+
const modelName = modelFormatAnswers.customModelName || modelFormatAnswers.modelName || explicitConfig.modelName;
|
|
876
|
+
if (!modelName || modelName === 'Custom (enter manually)') return;
|
|
877
|
+
|
|
878
|
+
const smart = this.options.smart === true;
|
|
879
|
+
const discover = this.options.discover === true;
|
|
880
|
+
|
|
881
|
+
const modeLabel = [smart && '[smart]', discover && '[discover]'].filter(Boolean).join(' ');
|
|
882
|
+
console.log(` 🔍 Querying instance-sizer${modeLabel ? ` ${modeLabel}` : ''}...`);
|
|
883
|
+
|
|
884
|
+
try {
|
|
885
|
+
const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
|
|
886
|
+
if (!fs.existsSync(mcpConfigPath)) return;
|
|
887
|
+
|
|
888
|
+
const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
|
|
889
|
+
const serverConfig = mcpConfig.mcpServers?.['instance-sizer'];
|
|
890
|
+
if (!serverConfig) return;
|
|
891
|
+
|
|
892
|
+
const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
|
|
893
|
+
const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
|
|
894
|
+
|
|
895
|
+
const serverArgs = [...(serverConfig.args || [])];
|
|
896
|
+
if (discover && !serverArgs.includes('--discover')) {
|
|
897
|
+
serverArgs.push('--discover');
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
const transport = new StdioClientTransport({
|
|
901
|
+
command: serverConfig.command,
|
|
902
|
+
args: serverArgs,
|
|
903
|
+
env: {
|
|
904
|
+
...process.env,
|
|
905
|
+
...(serverConfig.env || {}),
|
|
906
|
+
...(smart ? { BEDROCK_SMART: 'true' } : {})
|
|
907
|
+
},
|
|
908
|
+
stderr: 'pipe'
|
|
909
|
+
});
|
|
910
|
+
|
|
911
|
+
const mcpClient = new Client(
|
|
912
|
+
{ name: 'ml-container-creator', version: '1.0.0' },
|
|
913
|
+
{ capabilities: {} }
|
|
914
|
+
);
|
|
915
|
+
|
|
916
|
+
await mcpClient.connect(transport);
|
|
917
|
+
|
|
918
|
+
const toolArgs = {
|
|
919
|
+
modelName,
|
|
920
|
+
limit: 8,
|
|
921
|
+
context: {
|
|
922
|
+
architecture: frameworkAnswers.architecture || undefined,
|
|
923
|
+
backend: frameworkAnswers.backend || undefined,
|
|
924
|
+
deploymentTarget: frameworkAnswers.deploymentTarget || undefined,
|
|
925
|
+
profileEnvVars: sizerContext.profileEnvVars || undefined
|
|
926
|
+
}
|
|
927
|
+
};
|
|
928
|
+
|
|
929
|
+
// Add CUDA version from base image for filtering
|
|
930
|
+
if (sizerContext.cudaVersion) {
|
|
931
|
+
toolArgs.cudaVersion = sizerContext.cudaVersion;
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
// Add quantization if available from model format answers
|
|
935
|
+
if (modelFormatAnswers.quantization) {
|
|
936
|
+
toolArgs.quantization = modelFormatAnswers.quantization;
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
const result = await mcpClient.callTool({
|
|
940
|
+
name: 'get_instance_recommendation',
|
|
941
|
+
arguments: toolArgs
|
|
942
|
+
});
|
|
943
|
+
|
|
944
|
+
await mcpClient.close();
|
|
945
|
+
|
|
946
|
+
// Parse the response
|
|
947
|
+
const textBlock = result?.content?.find(b => b.type === 'text');
|
|
948
|
+
if (textBlock) {
|
|
949
|
+
const parsed = JSON.parse(textBlock.text);
|
|
950
|
+
|
|
951
|
+
if (parsed.choices?.instanceType?.length > 0) {
|
|
952
|
+
this._instanceSizerMetadata = parsed.metadata || null;
|
|
953
|
+
|
|
954
|
+
// Build display labels with VRAM estimate and utilization percentage
|
|
955
|
+
const recommendations = parsed.metadata?.recommendations || [];
|
|
956
|
+
const estimatedVramGb = parsed.metadata?.estimatedVramGb;
|
|
957
|
+
|
|
958
|
+
// Store choices with display labels for the instance prompt
|
|
959
|
+
this._mcpInstanceSizerChoices = parsed.choices.instanceType;
|
|
960
|
+
this._mcpInstanceSizerDisplayChoices = recommendations.map(rec => ({
|
|
961
|
+
name: rec.displayLabel || `${rec.instanceType} (${estimatedVramGb ? estimatedVramGb.toFixed(1) : '?'}GB / ${rec.totalVramGb || '?'}GB — ${rec.utilizationPercent || '?'}% utilization)`,
|
|
962
|
+
value: rec.instanceType,
|
|
963
|
+
short: rec.instanceType
|
|
964
|
+
}));
|
|
965
|
+
|
|
966
|
+
const choices = parsed.choices.instanceType;
|
|
967
|
+
const topRec = recommendations[0];
|
|
968
|
+
const vramInfo = estimatedVramGb
|
|
969
|
+
? ` (VRAM: ${estimatedVramGb.toFixed(1)}GB)`
|
|
970
|
+
: '';
|
|
971
|
+
const tpInfo = topRec?.tensorParallelism > 1
|
|
972
|
+
? ` [TP=${topRec.tensorParallelism}]`
|
|
973
|
+
: '';
|
|
974
|
+
|
|
975
|
+
console.log(` ✓ ${choices.length} sized instance(s): ${choices[0]}${vramInfo}${tpInfo}`);
|
|
976
|
+
} else if (parsed.metadata?.warning) {
|
|
977
|
+
console.log(` ⚠️ ${parsed.metadata.warning}`);
|
|
978
|
+
} else {
|
|
979
|
+
// Apply architecture heuristic fallback when sizer returns empty
|
|
980
|
+
const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
981
|
+
this._architectureHeuristicDefault = this._getArchitectureHeuristicDefault(archForHeuristic);
|
|
982
|
+
console.log(` ↳ No instance-sizer results, using heuristic default: ${this._architectureHeuristicDefault}`);
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
} catch (err) {
|
|
986
|
+
// Sizer unavailable — apply architecture heuristic fallback
|
|
987
|
+
const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
988
|
+
this._architectureHeuristicDefault = this._getArchitectureHeuristicDefault(archForHeuristic);
|
|
989
|
+
console.log(` ⚠️ instance-sizer: ${err.message}`);
|
|
990
|
+
console.log(` ↳ Using heuristic default: ${this._architectureHeuristicDefault}`);
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
|
|
716
994
|
/**
|
|
717
995
|
* Query the hyperpod-cluster-picker MCP server for available HyperPod EKS clusters.
|
|
718
996
|
* Populates configManager.mcpChoices.hyperPodCluster so _runPhase injects them into the list prompt.
|
|
@@ -1297,16 +1575,21 @@ export default class PromptRunner {
|
|
|
1297
1575
|
* supports multiple versions. The choice transparently resolves to the
|
|
1298
1576
|
* correct SageMaker inference AMI.
|
|
1299
1577
|
*
|
|
1578
|
+
* When a base image CUDA version is provided, auto-resolves by intersecting
|
|
1579
|
+
* with the instance's supported versions. Removes the CUDA prompt from the
|
|
1580
|
+
* interactive flow when auto-resolution succeeds.
|
|
1581
|
+
*
|
|
1300
1582
|
* Skipped for CPU instances, non-CUDA accelerators, or when only one
|
|
1301
1583
|
* compatible CUDA version exists.
|
|
1302
1584
|
*
|
|
1303
1585
|
* @param {string} instanceType - Selected instance type (e.g. "ml.g5.2xlarge")
|
|
1304
1586
|
* @param {string} framework - Selected framework name
|
|
1305
1587
|
* @param {string} frameworkVersion - Selected framework version
|
|
1588
|
+
* @param {string} [baseImageCuda] - CUDA version from selected base image (for auto-resolution)
|
|
1306
1589
|
* @returns {Promise<{cudaVersion: string, inferenceAmiVersion: string}|null>}
|
|
1307
1590
|
* @private
|
|
1308
1591
|
*/
|
|
1309
|
-
async _promptCudaVersion(instanceType, framework, frameworkVersion) {
|
|
1592
|
+
async _promptCudaVersion(instanceType, framework, frameworkVersion, baseImageCuda) {
|
|
1310
1593
|
if (!instanceType) return null;
|
|
1311
1594
|
|
|
1312
1595
|
// Look up instance in accelerator mapping
|
|
@@ -1316,6 +1599,33 @@ export default class PromptRunner {
|
|
|
1316
1599
|
const instanceCudaVersions = instanceInfo.accelerator.versions;
|
|
1317
1600
|
if (!instanceCudaVersions || instanceCudaVersions.length === 0) return null;
|
|
1318
1601
|
|
|
1602
|
+
// Auto-resolution: when base image specifies a CUDA version, intersect with instance support
|
|
1603
|
+
// Requirements: 3.11, 4.9, 4.10, 4.11
|
|
1604
|
+
if (baseImageCuda) {
|
|
1605
|
+
const majorRequired = baseImageCuda.split('.')[0];
|
|
1606
|
+
const intersection = instanceCudaVersions.filter(v => {
|
|
1607
|
+
if (v === baseImageCuda) return true;
|
|
1608
|
+
if (v.startsWith(`${majorRequired }.`)) return true;
|
|
1609
|
+
return false;
|
|
1610
|
+
});
|
|
1611
|
+
|
|
1612
|
+
if (intersection.length > 0) {
|
|
1613
|
+
// Auto-select: pick exact match or highest compatible
|
|
1614
|
+
const exactMatch = intersection.find(v => v === baseImageCuda);
|
|
1615
|
+
const selectedVersion = exactMatch || intersection.sort().pop();
|
|
1616
|
+
const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[selectedVersion];
|
|
1617
|
+
if (inferenceAmiVersion) {
|
|
1618
|
+
console.log(`\n🔧 CUDA ${selectedVersion} auto-resolved from base image (requires ${baseImageCuda})`);
|
|
1619
|
+
console.log(` AMI: ${inferenceAmiVersion}`);
|
|
1620
|
+
return { cudaVersion: selectedVersion, inferenceAmiVersion };
|
|
1621
|
+
}
|
|
1622
|
+
} else {
|
|
1623
|
+
// No intersection — warn and fall through to manual prompt
|
|
1624
|
+
console.log(`\n ⚠️ Base image requires CUDA ${baseImageCuda} but instance ${instanceType} supports: ${instanceCudaVersions.join(', ')}`);
|
|
1625
|
+
console.log(' No compatible CUDA version found. Falling back to manual selection.');
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1628
|
+
|
|
1319
1629
|
// Get framework CUDA requirements (if available)
|
|
1320
1630
|
const registryConfigManager = this.registryConfigManager;
|
|
1321
1631
|
const frameworkConfig = registryConfigManager?.frameworkRegistry?.[framework]?.[frameworkVersion];
|