@aws/ml-container-creator 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/bin/cli.js +45 -4
  2. package/config/bootstrap-stack.json +14 -0
  3. package/infra/ci-harness/package-lock.json +22 -9
  4. package/package.json +7 -8
  5. package/servers/base-image-picker/index.js +3 -3
  6. package/servers/base-image-picker/manifest.json +4 -2
  7. package/servers/instance-sizer/index.js +564 -0
  8. package/servers/instance-sizer/lib/instance-ranker.js +270 -0
  9. package/servers/instance-sizer/lib/model-resolver.js +269 -0
  10. package/servers/instance-sizer/lib/vram-estimator.js +177 -0
  11. package/servers/instance-sizer/manifest.json +17 -0
  12. package/servers/instance-sizer/package.json +15 -0
  13. package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
  14. package/servers/{base-image-picker → lib}/catalogs/model-servers.json +302 -254
  15. package/servers/lib/catalogs/model-sizes.json +131 -0
  16. package/servers/lib/catalogs/models.json +632 -0
  17. package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
  18. package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
  19. package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
  20. package/servers/lib/schemas/image-catalog.schema.json +6 -12
  21. package/servers/lib/schemas/instances.schema.json +29 -0
  22. package/servers/lib/schemas/model-catalog.schema.json +12 -10
  23. package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
  24. package/servers/model-picker/index.js +4 -4
  25. package/servers/model-picker/manifest.json +2 -3
  26. package/servers/region-picker/index.js +1 -1
  27. package/servers/region-picker/manifest.json +1 -1
  28. package/src/app.js +36 -0
  29. package/src/lib/architecture-sync.js +171 -0
  30. package/src/lib/arn-detection.js +22 -0
  31. package/src/lib/bootstrap-command-handler.js +120 -0
  32. package/src/lib/cli-handler.js +3 -3
  33. package/src/lib/config-manager.js +47 -1
  34. package/src/lib/configuration-manager.js +2 -2
  35. package/src/lib/cross-cutting-checker.js +460 -0
  36. package/src/lib/deployment-entry-schema.js +1 -2
  37. package/src/lib/dry-run-validator.js +78 -0
  38. package/src/lib/generation-validator.js +102 -0
  39. package/src/lib/mcp-validator-config.js +89 -0
  40. package/src/lib/payload-builder.js +153 -0
  41. package/src/lib/prompt-runner.js +866 -149
  42. package/src/lib/prompts.js +2 -2
  43. package/src/lib/registry-command-handler.js +236 -0
  44. package/src/lib/registry-loader.js +5 -5
  45. package/src/lib/schema-sync.js +203 -0
  46. package/src/lib/schema-validation-engine.js +195 -0
  47. package/src/lib/secret-classification.js +56 -0
  48. package/src/lib/secrets-command-handler.js +550 -0
  49. package/src/lib/service-model-parser.js +102 -0
  50. package/src/lib/validate-runner.js +216 -0
  51. package/src/lib/validation-report.js +140 -0
  52. package/src/lib/validators/base-validator.js +36 -0
  53. package/src/lib/validators/catalog-validator.js +177 -0
  54. package/src/lib/validators/enum-validator.js +120 -0
  55. package/src/lib/validators/required-field-validator.js +150 -0
  56. package/src/lib/validators/type-validator.js +313 -0
  57. package/src/prompt-adapter.js +3 -2
  58. package/templates/Dockerfile +1 -1
  59. package/templates/do/build +37 -5
  60. package/templates/do/config +15 -3
  61. package/templates/do/deploy +60 -5
  62. package/templates/do/logs +18 -3
  63. package/templates/do/run +15 -1
  64. package/templates/do/validate +61 -0
  65. package/servers/instance-recommender/LICENSE +0 -202
  66. package/servers/instance-recommender/index.js +0 -284
  67. package/servers/instance-recommender/manifest.json +0 -16
  68. package/servers/instance-recommender/package.json +0 -15
  69. /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
  70. /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
  71. /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
  72. /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
@@ -17,8 +17,6 @@ import {
17
17
  modelServerPrompts,
18
18
  modelLoadStrategyPrompts,
19
19
  modelProfilePrompts,
20
- hfTokenPrompts,
21
- ngcApiKeyPrompts,
22
20
  modulePrompts,
23
21
  infraRegionAndTargetPrompts,
24
22
  infraInstancePrompts,
@@ -35,9 +33,13 @@ import {
35
33
 
36
34
  import fs from 'fs';
37
35
  import path from 'path';
36
+ import { execSync } from 'node:child_process';
38
37
  import { fileURLToPath } from 'node:url';
39
38
  import RegistryLoader from './registry-loader.js';
40
39
  import { runPrompts } from '../prompt-adapter.js';
40
+ import { SECRET_CLASSIFICATIONS } from './secret-classification.js';
41
+ import { isSecretsManagerArn } from './arn-detection.js';
42
+ import BootstrapConfig from './bootstrap-config.js';
41
43
 
42
44
  const __pr_filename = fileURLToPath(import.meta.url);
43
45
  const __pr_dirname = path.dirname(__pr_filename);
@@ -54,6 +56,14 @@ export default class PromptRunner {
54
56
 
55
57
  /**
56
58
  * Runs all prompting phases and returns combined answers
59
+ *
60
+ * Phase ordering (MCP Catalog Consolidation):
61
+ * Phase 1 (What): deployment config + model name/ID + quantization
62
+ * Phase 2 (How): deployment target + serving profile + base image
63
+ * Phase 3 (Where): region + instance-sizer query + instance type + CUDA/AMI auto-resolution + HyperPod + build target
64
+ * Phase 4 (Details): framework version, model profile, modules
65
+ * Phase 5 (Project): project name + destination
66
+ *
57
67
  * @returns {Promise<Object>} Combined answers from all phases
58
68
  */
59
69
  async run() {
@@ -70,39 +80,187 @@ export default class PromptRunner {
70
80
  // Get only explicit configuration (not defaults) for prompt skipping
71
81
  const explicitConfig = this.configManager ? this.configManager.getExplicitConfiguration() : {};
72
82
 
73
- // Phase 1: Infrastructure & Deployment
74
- // Requirements: 3.1 — infrastructure prompts run first
75
- // Ordering: Region Deployment Target Instance (if managed) → HyperPod (if eks) → Build Target
83
+ // ══════════════════════════════════════════════════════════════════════
84
+ // Phase 1 — What (deployment config + model name/ID + quantization)
85
+ // Requirements: 4.1, 4.2 model selection drives instance sizing
86
+ // ══════════════════════════════════════════════════════════════════════
87
+ console.log('\n🔧 Core ML Configuration');
88
+ const deploymentConfigAnswers = await this._runPhase(deploymentConfigPrompts, {}, explicitConfig, existingConfig);
89
+
90
+ // Derive architecture, backend, and legacy framework/modelServer from deploymentConfig
91
+ let architecture, backend, framework, modelServer;
92
+ if (deploymentConfigAnswers.deploymentConfig) {
93
+ const parts = deploymentConfigAnswers.deploymentConfig.split('-');
94
+ architecture = parts[0];
95
+ backend = parts.slice(1).join('-');
96
+ // Legacy compatibility: derive framework and modelServer
97
+ framework = architecture;
98
+ modelServer = backend;
99
+ }
100
+
101
+ // Add derived values to answers
102
+ const frameworkAnswers = {
103
+ ...deploymentConfigAnswers,
104
+ architecture: architecture || deploymentConfigAnswers.architecture,
105
+ backend: backend || deploymentConfigAnswers.backend,
106
+ framework: framework || deploymentConfigAnswers.framework,
107
+ modelServer: modelServer || deploymentConfigAnswers.modelServer
108
+ };
109
+
110
+ // Engine prompt for http architecture
111
+ const engineAnswers = await this._runPhase(enginePrompts, { ...frameworkAnswers }, explicitConfig, existingConfig);
112
+
113
+ // Auto-set model format for Triton backends with single format
114
+ const tritonAutoFormat = this._getTritonAutoModelFormat(architecture, backend);
115
+
116
+ // Query model-picker MCP server for model choices
117
+ this._queryMcpForModels(frameworkAnswers.architecture);
118
+ if (this._mcpModelChoices) {
119
+ console.log(' 🔍 Querying model-picker...');
120
+ console.log(` ✓ ${this._mcpModelChoices.length} model(s) available from catalog`);
121
+ }
122
+ const modelFormatPreviousAnswers = {
123
+ ...frameworkAnswers,
124
+ ...engineAnswers,
125
+ ...(this._mcpModelChoices ? { _mcpModelChoices: this._mcpModelChoices } : {})
126
+ };
127
+ const modelFormatAnswers = await this._runPhase(
128
+ modelFormatPrompts,
129
+ modelFormatPreviousAnswers,
130
+ explicitConfig,
131
+ existingConfig
132
+ );
133
+
134
+ // Model server prompts are now deprecated (empty array)
135
+ const modelServerAnswers = await this._runPhase(
136
+ modelServerPrompts,
137
+ {...frameworkAnswers, ...engineAnswers},
138
+ explicitConfig,
139
+ existingConfig
140
+ );
141
+
142
+ // Resolve model ID early for instance-sizer query in Phase 3
143
+ const phase1ModelId = modelFormatAnswers.customModelName || modelFormatAnswers.modelName || explicitConfig.modelName;
144
+
145
+ // Fetch model information from HuggingFace and Model Registry
146
+ if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
147
+ await this._fetchAndDisplayModelInfo(phase1ModelId);
148
+ }
149
+
150
+ // ══════════════════════════════════════════════════════════════════════
151
+ // Phase 2 — How (deployment target + serving profile + base image)
152
+ // Requirements: 4.3 — instance prompt appears AFTER base image is known
153
+ // ══════════════════════════════════════════════════════════════════════
76
154
  console.log('\n💪 Infrastructure & Deployment');
77
155
 
78
- // 1a. Query region MCP, then prompt for region + deployment target
79
- await this._queryMcpForRegion({}, explicitConfig);
156
+ // 2a. Deployment target (realtime, async, batch, hyperpod, local)
80
157
  const bootstrapRegion = existingConfig.awsRegion || explicitConfig.awsRegion;
81
158
  const regionPreviousAnswers = bootstrapRegion ? { _bootstrapRegion: bootstrapRegion } : {};
82
- const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, regionPreviousAnswers, explicitConfig, existingConfig);
159
+ const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, { ...frameworkAnswers, ...regionPreviousAnswers }, explicitConfig, existingConfig);
160
+
161
+ // 2b. Query base-image-picker MCP server for base image choices
162
+ await this._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
163
+ const baseImagePreviousAnswers = {
164
+ ...frameworkAnswers,
165
+ ...engineAnswers,
166
+ ...(this._mcpBaseImageChoices ? { _mcpBaseImageChoices: this._mcpBaseImageChoices } : {})
167
+ };
168
+ const baseImageAnswers = await this._runPhase(
169
+ baseImagePrompts,
170
+ baseImagePreviousAnswers,
171
+ explicitConfig,
172
+ existingConfig
173
+ );
174
+
175
+ // Requirements: 4.2-4.5 — Check model architecture compatibility after base image selection
176
+ this._checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers);
83
177
 
84
- // 1b. Instance type query MCP and prompt for realtime-inference, async-inference, batch-transform, and hyperpod-eks
178
+ // Extract CUDA version from selected base image for instance-sizer context
179
+ const selectedBaseImageCuda = this._extractCudaFromBaseImage(baseImageAnswers);
180
+
181
+ // ══════════════════════════════════════════════════════════════════════
182
+ // Phase 3 — Where (region + instance [derived] + CUDA/AMI + HyperPod + build target)
183
+ // Requirements: 4.4, 4.5, 4.7, 3.6, 3.7 — sizer query with full context
184
+ // ══════════════════════════════════════════════════════════════════════
185
+
186
+ // 3a. Region query
187
+ await this._queryMcpForRegion(frameworkAnswers, explicitConfig);
188
+
189
+ // 3b. Instance type — query instance-sizer with full context (model + profile + CUDA)
85
190
  let instanceAnswers = {};
86
- if (regionAndTargetAnswers.deploymentTarget === 'realtime-inference' ||
191
+ const needsInstance = regionAndTargetAnswers.deploymentTarget === 'realtime-inference' ||
87
192
  regionAndTargetAnswers.deploymentTarget === 'async-inference' ||
88
193
  regionAndTargetAnswers.deploymentTarget === 'batch-transform' ||
89
- regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks') {
90
- await this._queryMcpForInstance({}, explicitConfig);
91
- const mcpInstanceChoices = this.configManager?.mcpChoices?.instanceType;
194
+ regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks';
195
+
196
+ if (needsInstance) {
197
+ // Determine architecture type for heuristic fallback
198
+ const modelArchitecture = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
199
+
200
+ // Skip sizer query if --instance-type was provided via CLI
201
+ if (!explicitConfig.instanceType) {
202
+ // Skip sizer for predictor models (CPU-only)
203
+ if (modelArchitecture === 'predictor' || modelArchitecture === 'http') {
204
+ // Architecture heuristic: predictor → ml.m5.large
205
+ console.log(' ℹ️ Predictor model: defaulting to CPU instance (ml.m5.large)');
206
+ this._architectureHeuristicDefault = 'ml.m5.large';
207
+ } else if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
208
+ // Query instance-sizer with full context
209
+ await this._queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, {
210
+ cudaVersion: selectedBaseImageCuda,
211
+ profileEnvVars: this._selectedProfileEnvVars || {}
212
+ });
213
+ } else {
214
+ // No model known — use architecture heuristic
215
+ await this._queryMcpForInstance(frameworkAnswers, explicitConfig);
216
+ }
217
+ }
218
+
219
+ // Build instance prompt choices from sizer results
220
+ const mcpInstanceChoices = this._mcpInstanceSizerChoices || this.configManager?.mcpChoices?.instanceType;
92
221
  const instancePreviousAnswers = {
93
222
  ...regionAndTargetAnswers,
94
- ...(mcpInstanceChoices && mcpInstanceChoices.length > 0 ? { _mcpInstanceChoices: mcpInstanceChoices } : {})
223
+ ...(mcpInstanceChoices && mcpInstanceChoices.length > 0 ? { _mcpInstanceChoices: mcpInstanceChoices } : {}),
224
+ ...(this._architectureHeuristicDefault ? { _architectureHeuristicDefault: this._architectureHeuristicDefault } : {})
95
225
  };
96
226
  instanceAnswers = await this._runPhase(infraInstancePrompts, instancePreviousAnswers, explicitConfig, existingConfig);
227
+
228
+ // Apply architecture heuristic fallback when sizer returns empty
229
+ if (!instanceAnswers.instanceType && !explicitConfig.instanceType && this._architectureHeuristicDefault) {
230
+ instanceAnswers.instanceType = this._architectureHeuristicDefault;
231
+ }
232
+ }
233
+
234
+ // In auto-prompt mode, use instance-sizer's top recommendation as the instance type
235
+ if (this.configManager?.isAutoPrompt() && this._mcpInstanceSizerChoices && this._mcpInstanceSizerChoices.length > 0) {
236
+ const sizerRecommendation = this._mcpInstanceSizerChoices[0];
237
+ if (!explicitConfig.instanceType) {
238
+ instanceAnswers.instanceType = sizerRecommendation;
239
+ console.log(` ✓ Auto-prompt: using instance-sizer recommendation: ${sizerRecommendation}`);
240
+ }
97
241
  }
98
242
 
99
- // 1b-async. Async-specific prompts (only when deploymentTarget === 'async-inference')
243
+ // Auto-set tensor parallelism when sizer recommends TP > 1
244
+ // Requirements: 4.8
245
+ if (this._instanceSizerMetadata) {
246
+ const sizerRecs = this._instanceSizerMetadata.recommendations || [];
247
+ const finalInstanceType = instanceAnswers.customInstanceType || instanceAnswers.instanceType;
248
+ const matchingRec = sizerRecs.find(r => r.instanceType === finalInstanceType);
249
+ const tpRec = matchingRec || sizerRecs[0];
250
+ if (tpRec && tpRec.tensorParallelism > 1) {
251
+ this._autoTensorParallelism = tpRec.tensorParallelism;
252
+ this._autoGpuCount = tpRec.gpuCount;
253
+ console.log(` ✓ Auto-set tensor parallelism: TP=${tpRec.tensorParallelism} (${tpRec.gpuCount} GPUs)`);
254
+ }
255
+ }
256
+
257
+ // 3c. Async-specific prompts (only when deploymentTarget === 'async-inference')
100
258
  let asyncAnswers = {};
101
259
  if (regionAndTargetAnswers.deploymentTarget === 'async-inference') {
102
260
  asyncAnswers = await this._runPhase(infraAsyncPrompts, { ...regionAndTargetAnswers }, explicitConfig, existingConfig);
103
261
  }
104
262
 
105
- // 1b-batch. Batch transform-specific prompts (only when deploymentTarget === 'batch-transform')
263
+ // 3d. Batch transform-specific prompts (only when deploymentTarget === 'batch-transform')
106
264
  let batchTransformAnswers = {};
107
265
  if (regionAndTargetAnswers.deploymentTarget === 'batch-transform') {
108
266
  batchTransformAnswers = await this._runPhase(
@@ -113,16 +271,24 @@ export default class PromptRunner {
113
271
  );
114
272
  }
115
273
 
116
- // 1c. HyperPod prompts — only query MCP and prompt when deployment target is hyperpod-eks
274
+ // 3e. CUDA/AMI auto-resolution
275
+ const instanceType = instanceAnswers.customInstanceType || instanceAnswers.instanceType;
276
+ const cudaAnswer = await this._promptCudaVersion(
277
+ instanceType,
278
+ frameworkAnswers.framework,
279
+ null, // frameworkVersion not yet known in Phase 3
280
+ selectedBaseImageCuda // base image CUDA version for intersection
281
+ );
282
+
283
+ // 3f. HyperPod prompts — only query MCP and prompt when deployment target is hyperpod-eks
117
284
  let hyperPodAnswers = {};
118
285
  if (regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks') {
119
- // Resolve the actual region (handle 'custom' selection)
120
286
  const resolvedRegion = regionAndTargetAnswers.customAwsRegion || regionAndTargetAnswers.awsRegion;
121
287
  await this._queryMcpForHyperPod({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
122
288
  hyperPodAnswers = await this._runPhase(infraHyperPodPrompts, { ...regionAndTargetAnswers }, explicitConfig, existingConfig);
123
289
  }
124
290
 
125
- // 1d. Build target + role ARN (always)
291
+ // 3g. Build target + role ARN (always)
126
292
  const buildAnswers = await this._runPhase(infraBuildPrompts, { ...regionAndTargetAnswers, ...instanceAnswers, ...hyperPodAnswers }, explicitConfig, existingConfig);
127
293
 
128
294
  // Combine all infrastructure answers
@@ -135,54 +301,16 @@ export default class PromptRunner {
135
301
  ...buildAnswers
136
302
  };
137
303
 
138
- // Phase 2: Core ML Configuration
139
- // Requirements: 3.1, 3.2 — ML configuration prompts run after infrastructure
140
- console.log('\n🔧 Core ML Configuration');
141
- const deploymentConfigAnswers = await this._runPhase(deploymentConfigPrompts, { ...infraAnswers }, explicitConfig, existingConfig);
142
-
143
- // Derive architecture, backend, and legacy framework/modelServer from deploymentConfig
144
- // Requirements: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7
145
- let architecture, backend, framework, modelServer;
146
- if (deploymentConfigAnswers.deploymentConfig) {
147
- const parts = deploymentConfigAnswers.deploymentConfig.split('-');
148
- architecture = parts[0];
149
- backend = parts.slice(1).join('-');
150
- // Legacy compatibility: derive framework and modelServer
151
- framework = architecture;
152
- modelServer = backend;
304
+ // Apply CUDA resolution to infra answers
305
+ if (cudaAnswer) {
306
+ infraAnswers._selectedCudaVersion = cudaAnswer.cudaVersion;
307
+ infraAnswers._resolvedInferenceAmiVersion = cudaAnswer.inferenceAmiVersion;
153
308
  }
154
-
155
- // Add derived values to answers
156
- const frameworkAnswers = {
157
- ...deploymentConfigAnswers,
158
- architecture: architecture || deploymentConfigAnswers.architecture,
159
- backend: backend || deploymentConfigAnswers.backend,
160
- framework: framework || deploymentConfigAnswers.framework,
161
- modelServer: modelServer || deploymentConfigAnswers.modelServer
162
- };
163
-
164
- // Engine prompt for http architecture
165
- // Requirements: 3.7
166
- const engineAnswers = await this._runPhase(enginePrompts, { ...frameworkAnswers }, explicitConfig, existingConfig);
167
-
168
- // Auto-set model format for Triton backends with single format
169
- // Requirements: 3.3, 3.4, 3.5
170
- const tritonAutoFormat = this._getTritonAutoModelFormat(architecture, backend);
171
-
172
- // Query base-image-picker MCP server for base image choices
173
- // Requirements: 5.1, 5.2, 5.3
174
- await this._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
175
- const baseImagePreviousAnswers = {
176
- ...frameworkAnswers,
177
- ...engineAnswers,
178
- ...(this._mcpBaseImageChoices ? { _mcpBaseImageChoices: this._mcpBaseImageChoices } : {})
179
- };
180
- const baseImageAnswers = await this._runPhase(
181
- baseImagePrompts,
182
- baseImagePreviousAnswers,
183
- explicitConfig,
184
- existingConfig
185
- );
309
+
310
+ // ══════════════════════════════════════════════════════════════════════
311
+ // Phase 4 — Details (framework version, model profile, modules)
312
+ // ══════════════════════════════════════════════════════════════════════
313
+ console.log('\n📦 Module Selection');
186
314
 
187
315
  // Populate framework version choices from registry
188
316
  const frameworkVersionChoices = this._getFrameworkVersionChoices(frameworkAnswers.framework);
@@ -209,44 +337,10 @@ export default class PromptRunner {
209
337
  explicitConfig,
210
338
  existingConfig
211
339
  );
212
-
213
- // Query model-picker MCP server for model choices
214
- this._queryMcpForModels(frameworkAnswers.architecture);
215
- if (this._mcpModelChoices) {
216
- console.log(' 🔍 Querying model-picker...');
217
- console.log(` ✓ ${this._mcpModelChoices.length} model(s) available from catalog`);
218
- }
219
- const modelFormatPreviousAnswers = {
220
- ...frameworkAnswers,
221
- ...engineAnswers,
222
- ...frameworkVersionAnswers,
223
- ...frameworkProfileAnswers,
224
- ...(this._mcpModelChoices ? { _mcpModelChoices: this._mcpModelChoices } : {})
225
- };
226
- const modelFormatAnswers = await this._runPhase(
227
- modelFormatPrompts,
228
- modelFormatPreviousAnswers,
229
- explicitConfig,
230
- existingConfig
231
- );
232
-
233
- // Model server prompts are now deprecated (empty array)
234
- const modelServerAnswers = await this._runPhase(
235
- modelServerPrompts,
236
- {...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers},
237
- explicitConfig,
238
- existingConfig
239
- );
240
-
340
+
241
341
  // Populate model profile choices from registry (if model ID is available)
342
+ const modelId = phase1ModelId;
242
343
  const currentAnswers = {...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers};
243
- const modelId = currentAnswers.customModelName || currentAnswers.modelName || explicitConfig.modelName;
244
-
245
- // Fetch model information from HuggingFace and Model Registry
246
- // Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.11, 11.1, 11.2, 11.3, 11.5, 11.6, 11.7
247
- if (modelId && modelId !== 'Custom (enter manually)') {
248
- await this._fetchAndDisplayModelInfo(modelId);
249
- }
250
344
 
251
345
  const modelProfileChoices = this._getModelProfileChoices(modelId);
252
346
  const modelProfileAnswers = await this._runPhase(
@@ -257,7 +351,6 @@ export default class PromptRunner {
257
351
  );
258
352
 
259
353
  // Model loading strategy prompt (build-time vs runtime)
260
- // Requirements: 13.1, 13.2, 13.3, 13.4, 13.5
261
354
  const modelLoadStrategyAnswers = await this._runPhase(
262
355
  modelLoadStrategyPrompts,
263
356
  { ...frameworkAnswers, ...engineAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
@@ -265,40 +358,13 @@ export default class PromptRunner {
265
358
  existingConfig
266
359
  );
267
360
 
268
- const hfTokenAnswers = await this._runPhase(hfTokenPrompts,
269
- { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
270
- explicitConfig, existingConfig);
271
-
272
- const ngcApiKeyAnswers = await this._runPhase(ngcApiKeyPrompts,
273
- { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
274
- explicitConfig, existingConfig);
275
-
276
- // Validate instance type against framework requirements (now that framework is known)
277
- // Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6
278
- const instanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
279
- if (instanceType && frameworkVersionAnswers.frameworkVersion) {
280
- await this._validateAndDisplayInstanceType(
281
- instanceType,
282
- frameworkAnswers.framework,
283
- frameworkVersionAnswers.frameworkVersion
284
- );
285
- }
361
+ // Secret prompts registry-driven secret selection (replaces hardcoded hfToken/ngcApiKey prompts)
362
+ const secretPreviousAnswers = { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers };
363
+ const secretAnswers = await this._runSecretPrompts(secretPreviousAnswers, explicitConfig, existingConfig);
364
+ const hfTokenAnswers = { hfToken: secretAnswers.hfToken, hfTokenArn: secretAnswers.hfTokenArn };
365
+ const ngcApiKeyAnswers = { ngcApiKey: secretAnswers.ngcApiKey, ngcTokenArn: secretAnswers.ngcTokenArn };
286
366
 
287
- // CUDA version selection: if the selected instance supports multiple CUDA versions,
288
- // let the user pick which one. This transparently sets the inference AMI version.
289
- const cudaAnswer = await this._promptCudaVersion(
290
- instanceType,
291
- frameworkAnswers.framework,
292
- frameworkVersionAnswers.frameworkVersion
293
- );
294
- if (cudaAnswer) {
295
- infraAnswers._selectedCudaVersion = cudaAnswer.cudaVersion;
296
- infraAnswers._resolvedInferenceAmiVersion = cudaAnswer.inferenceAmiVersion;
297
- }
298
-
299
- // Phase 3: Module Selection
300
- // Requirements: 3.3 — module selection after ML configuration
301
- console.log('\n📦 Module Selection');
367
+ // Module selection
302
368
  const moduleAnswers = await this._runPhase(modulePrompts, { ...frameworkAnswers, ...engineAnswers }, explicitConfig, existingConfig);
303
369
 
304
370
  // Ensure transformers, diffusors, and ineligible Triton backends don't get sample model
@@ -309,8 +375,19 @@ export default class PromptRunner {
309
375
  moduleAnswers.includeSampleModel = false;
310
376
  }
311
377
 
312
- // Phase 4: Project Configuration
313
- // Requirements: 3.4 project configuration last
378
+ // Validate instance type against framework requirements (now that framework version is known)
379
+ const finalInstanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
380
+ if (finalInstanceType && frameworkVersionAnswers.frameworkVersion) {
381
+ await this._validateAndDisplayInstanceType(
382
+ finalInstanceType,
383
+ frameworkAnswers.framework,
384
+ frameworkVersionAnswers.frameworkVersion
385
+ );
386
+ }
387
+
388
+ // ══════════════════════════════════════════════════════════════════════
389
+ // Phase 5 — Project (project name + destination)
390
+ // ══════════════════════════════════════════════════════════════════════
314
391
  console.log('\n📋 Project Configuration');
315
392
  const allTechnicalAnswers = {
316
393
  ...frameworkAnswers,
@@ -439,6 +516,21 @@ export default class PromptRunner {
439
516
  delete combinedAnswers.customInstanceType;
440
517
  }
441
518
 
519
+ // Propagate tensor parallelism from instance-sizer to templates
520
+ // Requirements: 4.8 — auto-set TP when sizer recommends > 1
521
+ if (this._autoTensorParallelism) {
522
+ combinedAnswers.tensorParallelSize = this._autoTensorParallelism;
523
+ combinedAnswers.gpuCount = this._autoGpuCount;
524
+ } else if (this._instanceSizerMetadata) {
525
+ const sizerInstanceType = combinedAnswers.instanceType;
526
+ const sizerRecs = this._instanceSizerMetadata.recommendations || [];
527
+ const matchingRec = sizerRecs.find(r => r.instanceType === sizerInstanceType);
528
+ if (matchingRec && matchingRec.tensorParallelism > 1) {
529
+ combinedAnswers.tensorParallelSize = matchingRec.tensorParallelism;
530
+ combinedAnswers.gpuCount = matchingRec.gpuCount;
531
+ }
532
+ }
533
+
442
534
  // Handle custom HyperPod cluster name
443
535
  if (combinedAnswers.customHyperPodCluster) {
444
536
  combinedAnswers.hyperPodCluster = combinedAnswers.customHyperPodCluster;
@@ -623,6 +715,118 @@ export default class PromptRunner {
623
715
  return null;
624
716
  }
625
717
 
718
+ /**
719
+ * Extract CUDA version from the selected base image.
720
+ * Looks at the MCP base image metadata for accelerator.version or labels.cuda_version.
721
+ * @param {object} baseImageAnswers - Answers from the base image prompt
722
+ * @returns {string|null} CUDA version string (e.g., "12.1") or null
723
+ * @private
724
+ */
725
+ _extractCudaFromBaseImage(baseImageAnswers) {
726
+ if (!this._mcpBaseImageChoices) return null;
727
+
728
+ const selectedImage = baseImageAnswers.baseImage || baseImageAnswers.customBaseImage;
729
+ if (!selectedImage) return null;
730
+
731
+ // Find the matching entry in the MCP choices
732
+ const matchingChoice = this._mcpBaseImageChoices.find(c => c.value === selectedImage);
733
+ if (!matchingChoice) return null;
734
+
735
+ // Try to extract CUDA version from the choice metadata
736
+ // The formatImageChoices function stores labels in the choice object
737
+ if (matchingChoice._meta?.labels?.cuda_version) {
738
+ return matchingChoice._meta.labels.cuda_version;
739
+ }
740
+ if (matchingChoice._meta?.accelerator?.version) {
741
+ return matchingChoice._meta.accelerator.version;
742
+ }
743
+
744
+ return null;
745
+ }
746
+
747
+ /**
748
+ * Check model architecture compatibility against the selected base image.
749
+ * Emits an advisory warning if the model's model_type is not in the server's
750
+ * supportedModelTypes. Skips silently if supportedModelTypes is empty (sync not run).
751
+ * Requirements: 4.2, 4.3, 4.4, 4.5
752
+ * @param {Object} baseImageAnswers - Answers from base image selection phase
753
+ * @param {Object} frameworkAnswers - Answers from framework/deployment config phase
754
+ * @private
755
+ */
756
+ _checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers) {
757
+ // Requirement 4.5: skip if no model_type was resolved
758
+ if (!this._modelType) return;
759
+
760
+ // Determine the selected image
761
+ const selectedImage = baseImageAnswers.baseImage || baseImageAnswers.customBaseImage;
762
+ if (!selectedImage || selectedImage === 'custom') return;
763
+
764
+ // Resolve the matching choice from MCP base image choices
765
+ if (!this._mcpBaseImageChoices) return;
766
+ const matchingChoice = this._mcpBaseImageChoices.find(c => c.value === selectedImage);
767
+ if (!matchingChoice) return;
768
+
769
+ // Determine the server name from framework answers
770
+ const server = frameworkAnswers.modelServer || frameworkAnswers.backend;
771
+ if (!server) return;
772
+
773
+ // Load the model-servers catalog to find the entry with supportedModelTypes
774
+ try {
775
+ const catalogPath = path.resolve(GENERATOR_ROOT, 'servers', 'lib', 'catalogs', 'model-servers.json');
776
+ const catalog = JSON.parse(fs.readFileSync(catalogPath, 'utf8'));
777
+
778
+ const serverEntries = catalog[server];
779
+ if (!Array.isArray(serverEntries)) return;
780
+
781
+ // Find the catalog entry matching the selected image
782
+ const entry = serverEntries.find(e => e.image === selectedImage);
783
+ if (!entry) return;
784
+
785
+ const supported = entry.supportedModelTypes;
786
+ // Requirement 4.5: skip silently when supportedModelTypes is empty (sync not run)
787
+ if (!supported || supported.length === 0) return;
788
+
789
+ // Requirement 4.2-4.3: cross-reference model_type (case-insensitive)
790
+ const modelTypeLower = this._modelType.toLowerCase();
791
+ if (!supported.includes(modelTypeLower)) {
792
+ const version = entry.labels?.framework_version || entry.tag || 'unknown';
793
+ const docsUrls = {
794
+ vllm: 'https://docs.vllm.ai/en/latest/models/supported_models.html',
795
+ sglang: 'https://sgl-project.github.io/references/supported_models.html',
796
+ 'tensorrt-llm': 'https://nvidia.github.io/TensorRT-LLM/reference/support-matrix.html'
797
+ };
798
+ const docsUrl = docsUrls[server] || `https://github.com/search?q=${server}+supported+models`;
799
+
800
+ // Requirement 4.3-4.4: emit advisory warning (does not block generation)
801
+ console.log(`\n ⚠️ Model architecture "${this._modelType}" may not be supported by ${server} ${version}`);
802
+ console.log(' Consider upgrading to a newer base image, or verify compatibility at:');
803
+ console.log(` ${docsUrl}`);
804
+ }
805
+ } catch (err) {
806
+ // Graceful degradation: if catalog can't be read, skip silently
807
+ }
808
+ }
809
+
810
+ /**
811
+ * Get architecture-based heuristic default instance type.
812
+ * Used when the instance-sizer cannot produce a recommendation.
813
+ * Requirements: 3.9, 4.6
814
+ * @param {string} architecture - Model architecture type
815
+ * @returns {string} Default instance type
816
+ * @private
817
+ */
818
+ _getArchitectureHeuristicDefault(architecture) {
819
+ const HEURISTIC_DEFAULTS = {
820
+ 'transformers': 'ml.g5.xlarge',
821
+ 'transformer': 'ml.g5.xlarge',
822
+ 'diffusors': 'ml.g5.2xlarge',
823
+ 'diffusor': 'ml.g5.2xlarge',
824
+ 'predictor': 'ml.m5.large',
825
+ 'http': 'ml.m5.large'
826
+ };
827
+ return Object.hasOwn(HEURISTIC_DEFAULTS, architecture) ? HEURISTIC_DEFAULTS[architecture] : 'ml.g5.xlarge';
828
+ }
829
+
626
830
  /**
627
831
  * Query MCP region-picker server before infrastructure prompts.
628
832
  * Populates configManager.mcpChoices so _runPhase injects them into list prompts.
@@ -671,8 +875,8 @@ export default class PromptRunner {
671
875
  }
672
876
 
673
877
  /**
674
- * Query MCP instance-recommender server after deployment target is known.
675
- * Only runs when deploymentTarget is realtime-inference.
878
+ * Query MCP instance-sizer server with tag-based search after deployment target is known.
879
+ * Used when no model name is available for VRAM-based sizing.
676
880
  * Populates configManager.mcpChoices so _runPhase injects them into list prompts.
677
881
  * @private
678
882
  */
@@ -686,7 +890,7 @@ export default class PromptRunner {
686
890
  const smart = this.options.smart === true;
687
891
 
688
892
  // Instance type: query if not already provided via CLI/config
689
- if (!explicitConfig.instanceType && mcpServers.includes('instance-recommender')) {
893
+ if (!explicitConfig.instanceType && mcpServers.includes('instance-sizer')) {
690
894
  const { instanceSearch } = await this._runPrompts([{
691
895
  type: 'input',
692
896
  name: 'instanceSearch',
@@ -695,8 +899,8 @@ export default class PromptRunner {
695
899
  }]);
696
900
 
697
901
  if (instanceSearch && instanceSearch.trim()) {
698
- console.log(` 🔍 Querying instance-recommender${smart ? ' [smart]' : ''}...`);
699
- const result = await cm.queryMcpServer('instance-recommender', {
902
+ console.log(` 🔍 Querying instance-sizer [search]${smart ? ' [smart]' : ''}...`);
903
+ const result = await cm.queryMcpServer('instance-sizer', {
700
904
  ...frameworkAnswers,
701
905
  instanceSearch: instanceSearch.trim()
702
906
  });
@@ -713,6 +917,150 @@ export default class PromptRunner {
713
917
  }
714
918
  }
715
919
 
920
+ /**
921
+ * Query the instance-sizer MCP server after model is known.
922
+ * Estimates VRAM requirements and returns filtered, ranked instance recommendations.
923
+ * Stores results in this._mcpInstanceSizerChoices and this._instanceSizerMetadata.
924
+ * Requirements: 4.4, 4.5, 4.7, 3.6, 3.7
925
+ * @param {object} frameworkAnswers - Framework/architecture answers
926
+ * @param {object} modelFormatAnswers - Model format answers (contains modelName)
927
+ * @param {object} explicitConfig - Explicit CLI/config values
928
+ * @param {object} [sizerContext={}] - Additional context for the sizer query
929
+ * @param {string} [sizerContext.cudaVersion] - CUDA version from base image
930
+ * @param {object} [sizerContext.profileEnvVars] - Profile ENV overrides
931
+ * @private
932
+ */
933
+ async _queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, sizerContext = {}) {
934
+ const cm = this.configManager;
935
+ if (!cm) return;
936
+
937
+ const mcpServers = cm.getMcpServerNames();
938
+ if (!mcpServers.includes('instance-sizer')) return;
939
+
940
+ // Resolve model name from answers or explicit config
941
+ const modelName = modelFormatAnswers.customModelName || modelFormatAnswers.modelName || explicitConfig.modelName;
942
+ if (!modelName || modelName === 'Custom (enter manually)') return;
943
+
944
+ const smart = this.options.smart === true;
945
+ const discover = this.options.discover === true;
946
+
947
+ const modeLabel = [smart && '[smart]', discover && '[discover]'].filter(Boolean).join(' ');
948
+ console.log(` 🔍 Querying instance-sizer${modeLabel ? ` ${modeLabel}` : ''}...`);
949
+
950
+ try {
951
+ const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
952
+ if (!fs.existsSync(mcpConfigPath)) return;
953
+
954
+ const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
955
+ const serverConfig = mcpConfig.mcpServers?.['instance-sizer'];
956
+ if (!serverConfig) return;
957
+
958
+ const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
959
+ const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
960
+
961
+ const serverArgs = [...(serverConfig.args || [])];
962
+ if (discover && !serverArgs.includes('--discover')) {
963
+ serverArgs.push('--discover');
964
+ }
965
+
966
+ const transport = new StdioClientTransport({
967
+ command: serverConfig.command,
968
+ args: serverArgs,
969
+ env: {
970
+ ...process.env,
971
+ ...(serverConfig.env || {}),
972
+ ...(smart ? { BEDROCK_SMART: 'true' } : {})
973
+ },
974
+ stderr: 'pipe'
975
+ });
976
+
977
+ const mcpClient = new Client(
978
+ { name: 'ml-container-creator', version: '1.0.0' },
979
+ { capabilities: {} }
980
+ );
981
+
982
+ await mcpClient.connect(transport);
983
+
984
+ const toolArgs = {
985
+ modelName,
986
+ limit: 10,
987
+ context: {
988
+ architecture: frameworkAnswers.architecture || undefined,
989
+ backend: frameworkAnswers.backend || undefined,
990
+ deploymentTarget: frameworkAnswers.deploymentTarget || undefined,
991
+ profileEnvVars: sizerContext.profileEnvVars || undefined
992
+ }
993
+ };
994
+
995
+ // Add CUDA version from base image for filtering
996
+ if (sizerContext.cudaVersion) {
997
+ toolArgs.cudaVersion = sizerContext.cudaVersion;
998
+ }
999
+
1000
+ // Add quantization if available from model format answers
1001
+ if (modelFormatAnswers.quantization) {
1002
+ toolArgs.quantization = modelFormatAnswers.quantization;
1003
+ }
1004
+
1005
+ const result = await mcpClient.callTool({
1006
+ name: 'get_instance_recommendation',
1007
+ arguments: toolArgs
1008
+ });
1009
+
1010
+ await mcpClient.close();
1011
+
1012
+ // Parse the response
1013
+ const textBlock = result?.content?.find(b => b.type === 'text');
1014
+ if (textBlock) {
1015
+ const parsed = JSON.parse(textBlock.text);
1016
+
1017
+ if (parsed.choices?.instanceType?.length > 0) {
1018
+ this._instanceSizerMetadata = parsed.metadata || null;
1019
+
1020
+ // Build display labels with VRAM estimate and utilization percentage
1021
+ const recommendations = parsed.metadata?.recommendations || [];
1022
+ const estimatedVramGb = parsed.metadata?.estimatedVramGb;
1023
+
1024
+ // Store choices with display labels for the instance prompt
1025
+ this._mcpInstanceSizerChoices = parsed.choices.instanceType;
1026
+ this._mcpInstanceSizerDisplayChoices = recommendations.map(rec => ({
1027
+ name: rec.displayLabel || `${rec.instanceType} (${estimatedVramGb ? estimatedVramGb.toFixed(1) : '?'}GB / ${rec.totalVramGb || '?'}GB — ${rec.utilizationPercent || '?'}% utilization)`,
1028
+ value: rec.instanceType,
1029
+ short: rec.instanceType
1030
+ }));
1031
+
1032
+ const choices = parsed.choices.instanceType;
1033
+ const topRec = recommendations[0];
1034
+ const vramInfo = estimatedVramGb
1035
+ ? ` (model needs ~${estimatedVramGb.toFixed(1)}GB VRAM)`
1036
+ : '';
1037
+
1038
+ console.log(` ✓ ${choices.length} compatible instance(s) found${vramInfo}`);
1039
+ // Display compact recommendation table
1040
+ for (const rec of recommendations) {
1041
+ const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
1042
+ const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
1043
+ const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
1044
+ console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}`);
1045
+ }
1046
+ } else if (parsed.metadata?.warning) {
1047
+ console.log(` ⚠️ ${parsed.metadata.warning}`);
1048
+ } else {
1049
+ // Apply architecture heuristic fallback when sizer returns empty
1050
+ const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
1051
+ this._architectureHeuristicDefault = this._getArchitectureHeuristicDefault(archForHeuristic);
1052
+ console.log(` ↳ No instance-sizer results, using heuristic default: ${this._architectureHeuristicDefault}`);
1053
+ }
1054
+ }
1055
+ } catch (err) {
1056
+ // Sizer unavailable — apply architecture heuristic fallback
1057
+ const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
1058
+ this._architectureHeuristicDefault = this._getArchitectureHeuristicDefault(archForHeuristic);
1059
+ console.log(` ⚠️ instance-sizer: ${err.message}`);
1060
+ console.log(` ↳ Using heuristic default: ${this._architectureHeuristicDefault}`);
1061
+ }
1062
+ }
1063
+
716
1064
  /**
717
1065
  * Query the hyperpod-cluster-picker MCP server for available HyperPod EKS clusters.
718
1066
  * Populates configManager.mcpChoices.hyperPodCluster so _runPhase injects them into the list prompt.
@@ -1098,6 +1446,12 @@ export default class PromptRunner {
1098
1446
  modelFamily = vals.family;
1099
1447
  }
1100
1448
 
1449
+ // Extract model_type for architecture validation
1450
+ // Requirements: 4.1
1451
+ if (vals.model_type) {
1452
+ this._modelType = vals.model_type;
1453
+ }
1454
+
1101
1455
  // Extract model source metadata for loading adapter
1102
1456
  // Requirements: 2.1, 2.2, 2.3, 2.4
1103
1457
  if (vals.provider) {
@@ -1149,6 +1503,11 @@ export default class PromptRunner {
1149
1503
  if (hfData.chatTemplate) {
1150
1504
  chatTemplate = hfData.chatTemplate;
1151
1505
  }
1506
+ // Extract model_type for architecture validation
1507
+ // Requirements: 4.1
1508
+ if (hfData.modelConfig?.model_type) {
1509
+ this._modelType = hfData.modelConfig.model_type;
1510
+ }
1152
1511
  console.log(' ✅ Found on HuggingFace Hub');
1153
1512
  } else {
1154
1513
  console.log(' ℹ️ Not found on HuggingFace Hub (may be private or offline)');
@@ -1276,6 +1635,332 @@ export default class PromptRunner {
1276
1635
  }
1277
1636
  }
1278
1637
 
1638
+ /**
1639
+ * Run secret prompts using the Secret_Classification registry.
1640
+ * For each secret type whose stages apply to the current context:
1641
+ * - Query for managed secrets of that type
1642
+ * - If managed secrets exist: show selection list (secrets + "Enter plaintext token" + "Skip")
1643
+ * - If no managed secrets exist: fall back to existing plaintext prompt
1644
+ *
1645
+ * Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9
1646
+ * @param {object} previousAnswers - Answers from previous prompt phases
1647
+ * @param {object} explicitConfig - Explicit CLI/config values
1648
+ * @param {object} existingConfig - Existing project configuration
1649
+ * @returns {Promise<object>} Object with token/ARN values keyed by config field names
1650
+ * @private
1651
+ */
1652
+ async _runSecretPrompts(previousAnswers, explicitConfig, existingConfig) {
1653
+ const results = {};
1654
+
1655
+ for (const classification of SECRET_CLASSIFICATIONS) {
1656
+ // Check if this secret type's stages apply to the current context
1657
+ if (!this._secretStagesApply(classification, previousAnswers)) continue;
1658
+
1659
+ // Determine the config keys for this classification
1660
+ const arnConfigKey = this._getArnConfigKey(classification);
1661
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
1662
+
1663
+ // Skip if ARN already provided via CLI flag
1664
+ if (explicitConfig[arnConfigKey]) {
1665
+ results[arnConfigKey] = explicitConfig[arnConfigKey];
1666
+ continue;
1667
+ }
1668
+
1669
+ // Skip if plaintext already provided via CLI flag
1670
+ if (explicitConfig[plaintextConfigKey]) {
1671
+ results[plaintextConfigKey] = explicitConfig[plaintextConfigKey];
1672
+ continue;
1673
+ }
1674
+
1675
+ // Query for existing managed secrets of this type
1676
+ const managedSecrets = await this._listManagedSecrets(classification.identifier);
1677
+
1678
+ if (managedSecrets.length > 0) {
1679
+ // Show selection list: managed secrets + plaintext entry + skip
1680
+ const answer = await this._promptSecretSelection(classification, managedSecrets, previousAnswers);
1681
+ Object.assign(results, answer);
1682
+ } else {
1683
+ // Fall back to existing plaintext prompt
1684
+ const answer = await this._promptPlaintextFallback(classification, previousAnswers, explicitConfig, existingConfig);
1685
+ Object.assign(results, answer);
1686
+ }
1687
+ }
1688
+
1689
+ return results;
1690
+ }
1691
+
1692
+ /**
1693
+ * Determine if a secret classification's stages apply to the current generation context.
1694
+ * Build-time secrets apply when the project involves a Docker build step.
1695
+ * Runtime secrets apply when the architecture uses HuggingFace Hub models.
1696
+ * Requirements: 8.9
1697
+ * @param {object} classification - Secret classification entry
1698
+ * @param {object} answers - Current answers from previous phases
1699
+ * @returns {boolean} True if the secret type is applicable
1700
+ * @private
1701
+ */
1702
+ _secretStagesApply(classification, answers) {
1703
+ const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
1704
+ const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
1705
+
1706
+ if (classification.identifier === 'hf-token') {
1707
+ // HF token applies to transformers, diffusors, and Triton LLM backends
1708
+ const isTransformers = architecture === 'transformers';
1709
+ const isDiffusors = architecture === 'diffusors';
1710
+ const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
1711
+
1712
+ if (!isTransformers && !isDiffusors && !isTritonLlm) return false;
1713
+
1714
+ // Skip for non-HuggingFace model sources
1715
+ const modelSource = answers.modelSource;
1716
+ if (modelSource && modelSource !== 'huggingface') return false;
1717
+
1718
+ return true;
1719
+ }
1720
+
1721
+ if (classification.identifier === 'ngc-token') {
1722
+ // NGC token only applies to transformers-tensorrt-llm (build-time only)
1723
+ if (architecture === 'triton') return false;
1724
+ if (architecture === 'diffusors') return false;
1725
+ return architecture === 'transformers' && backend === 'tensorrt-llm';
1726
+ }
1727
+
1728
+ // For future secret types, check if any stage applies
1729
+ // Build-time applies to all Docker-based deployments
1730
+ // Runtime applies to architectures that download at startup
1731
+ return classification.stages.length > 0;
1732
+ }
1733
+
1734
+ /**
1735
+ * Get the ARN config key for a classification.
1736
+ * Maps classification identifiers to config field names.
1737
+ * @param {object} classification - Secret classification entry
1738
+ * @returns {string} Config key for the ARN value
1739
+ * @private
1740
+ */
1741
+ _getArnConfigKey(classification) {
1742
+ const keyMap = {
1743
+ 'hf-token': 'hfTokenArn',
1744
+ 'ngc-token': 'ngcTokenArn'
1745
+ };
1746
+ return keyMap[classification.identifier] || `${classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase())}Arn`;
1747
+ }
1748
+
1749
+ /**
1750
+ * Get the plaintext config key for a classification.
1751
+ * Maps classification identifiers to config field names.
1752
+ * @param {object} classification - Secret classification entry
1753
+ * @returns {string} Config key for the plaintext value
1754
+ * @private
1755
+ */
1756
+ _getPlaintextConfigKey(classification) {
1757
+ const keyMap = {
1758
+ 'hf-token': 'hfToken',
1759
+ 'ngc-token': 'ngcApiKey'
1760
+ };
1761
+ return keyMap[classification.identifier] || classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
1762
+ }
1763
+
1764
+ /**
1765
+ * List managed secrets of a given type from AWS Secrets Manager.
1766
+ * Uses the active bootstrap profile to query for secrets tagged with
1767
+ * the mlcc:secret-type matching the given identifier.
1768
+ * @param {string} secretType - The secret type identifier (e.g., 'hf-token')
1769
+ * @returns {Promise<Array<{name: string, arn: string}>>} Array of managed secrets
1770
+ * @private
1771
+ */
1772
+ async _listManagedSecrets(secretType) {
1773
+ try {
1774
+ const bootstrapConfig = new BootstrapConfig();
1775
+ const activeProfile = bootstrapConfig.getActiveProfile();
1776
+ if (!activeProfile) return [];
1777
+
1778
+ const profile = activeProfile.config.awsProfile;
1779
+ const region = activeProfile.config.awsRegion;
1780
+ if (!profile || !region) return [];
1781
+
1782
+ const command = `aws secretsmanager list-secrets --filters Key=tag-key,Values=mlcc:managed-by Key=tag-value,Values=ml-container-creator --region ${region} --profile ${profile} --output json`;
1783
+ const output = execSync(command, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 });
1784
+ const trimmed = output.trim();
1785
+ if (!trimmed) return [];
1786
+
1787
+ const result = JSON.parse(trimmed);
1788
+ const secrets = result.SecretList || [];
1789
+
1790
+ // Filter by secret type tag
1791
+ return secrets
1792
+ .filter(secret => {
1793
+ const typeTag = (secret.Tags || []).find(t => t.Key === 'mlcc:secret-type');
1794
+ return typeTag && typeTag.Value === secretType;
1795
+ })
1796
+ .map(secret => ({
1797
+ name: secret.Name,
1798
+ arn: secret.ARN
1799
+ }));
1800
+ } catch {
1801
+ // If AWS CLI fails (not configured, no credentials, etc.), return empty
1802
+ return [];
1803
+ }
1804
+ }
1805
+
1806
+ /**
1807
+ * Display a selection list for managed secrets of a given type.
1808
+ * Shows available secrets plus options for plaintext entry and skip.
1809
+ * Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6
1810
+ * @param {object} classification - Secret classification entry
1811
+ * @param {Array<{name: string, arn: string}>} managedSecrets - Available managed secrets
1812
+ * @param {object} previousAnswers - Answers from previous phases
1813
+ * @returns {Promise<object>} Object with the selected value keyed by config field name
1814
+ * @private
1815
+ */
1816
+ async _promptSecretSelection(classification, managedSecrets, previousAnswers) {
1817
+ const arnConfigKey = this._getArnConfigKey(classification);
1818
+
1819
+ console.log(`\n🔐 ${classification.displayName}`);
1820
+ console.log(` ${classification.purpose}`);
1821
+
1822
+ // Build choices: managed secrets + enter plaintext + skip
1823
+ const choices = [
1824
+ ...managedSecrets.map(secret => ({
1825
+ name: `🔒 ${secret.name} (${secret.arn})`,
1826
+ value: secret.arn,
1827
+ short: secret.name
1828
+ })),
1829
+ { name: '✏️ Enter plaintext token', value: '__plaintext__', short: 'Plaintext' },
1830
+ { name: '⏭️ Skip (use environment variable)', value: '__skip__', short: 'Skip' }
1831
+ ];
1832
+
1833
+ const { secretSelection } = await this._runPrompts([{
1834
+ type: 'list',
1835
+ name: 'secretSelection',
1836
+ message: `Select ${classification.promptLabel}:`,
1837
+ choices
1838
+ }]);
1839
+
1840
+ if (secretSelection === '__skip__') {
1841
+ return {};
1842
+ }
1843
+
1844
+ if (secretSelection === '__plaintext__') {
1845
+ // Use existing plaintext flow
1846
+ return this._promptPlaintextEntry(classification, previousAnswers);
1847
+ }
1848
+
1849
+ // User selected a managed secret ARN
1850
+ return { [arnConfigKey]: secretSelection };
1851
+ }
1852
+
1853
+ /**
1854
+ * Prompt for plaintext token entry with ARN detection.
1855
+ * If the user enters an ARN, store it as an ARN reference.
1856
+ * Requirements: 8.4, 8.5, 8.6
1857
+ * @param {object} classification - Secret classification entry
1858
+ * @param {object} previousAnswers - Answers from previous phases
1859
+ * @returns {Promise<object>} Object with the value keyed by config field name
1860
+ * @private
1861
+ */
1862
+ async _promptPlaintextEntry(classification, _previousAnswers) {
1863
+ const arnConfigKey = this._getArnConfigKey(classification);
1864
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
1865
+
1866
+ const { tokenValue } = await this._runPrompts([{
1867
+ type: 'input',
1868
+ name: 'tokenValue',
1869
+ message: `${classification.promptLabel} (enter token, ARN, or leave empty):`,
1870
+ validate: (input) => {
1871
+ // Empty is valid
1872
+ if (!input || input.trim() === '') return true;
1873
+ // Environment variable reference is valid
1874
+ if (input.trim().startsWith('$')) return true;
1875
+ return true;
1876
+ }
1877
+ }]);
1878
+
1879
+ if (!tokenValue || tokenValue.trim() === '') {
1880
+ return {};
1881
+ }
1882
+
1883
+ const value = tokenValue.trim();
1884
+
1885
+ // ARN detection: if the value is a Secrets Manager ARN, store as ARN
1886
+ if (isSecretsManagerArn(value)) {
1887
+ return { [arnConfigKey]: value };
1888
+ }
1889
+
1890
+ // Otherwise store as plaintext
1891
+ return { [plaintextConfigKey]: value };
1892
+ }
1893
+
1894
+ /**
1895
+ * Fall back to existing plaintext prompt when no managed secrets exist.
1896
+ * Uses the same prompts as the original hfTokenPrompts/ngcApiKeyPrompts
1897
+ * but with ARN detection on the input.
1898
+ * Requirements: 8.7
1899
+ * @param {object} classification - Secret classification entry
1900
+ * @param {object} previousAnswers - Answers from previous phases
1901
+ * @param {object} explicitConfig - Explicit CLI/config values
1902
+ * @param {object} existingConfig - Existing project configuration
1903
+ * @returns {Promise<object>} Object with the value keyed by config field name
1904
+ * @private
1905
+ */
1906
+ async _promptPlaintextFallback(classification, _previousAnswers, _explicitConfig, _existingConfig) {
1907
+ const arnConfigKey = this._getArnConfigKey(classification);
1908
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
1909
+
1910
+ // If in auto-prompt mode, skip
1911
+ if (this.configManager?.isAutoPrompt()) {
1912
+ return {};
1913
+ }
1914
+
1915
+ // Display context-appropriate security message
1916
+ if (classification.identifier === 'hf-token') {
1917
+ console.log('\n🔐 HuggingFace Authentication');
1918
+ console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
1919
+ console.log(' 💡 Tip: Use `ml-container-creator secrets create --type hf-token` to store');
1920
+ console.log(' your token in AWS Secrets Manager for zero-knowledge operation.');
1921
+ console.log(' For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.\n');
1922
+ } else if (classification.identifier === 'ngc-token') {
1923
+ console.log('\n🔐 NVIDIA NGC Authentication');
1924
+ console.log(' TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
1925
+ console.log(' 💡 Tip: Use `ml-container-creator secrets create --type ngc-token` to store');
1926
+ console.log(' your key in AWS Secrets Manager for zero-knowledge operation.');
1927
+ console.log(' For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
1928
+ } else {
1929
+ console.log(`\n🔐 ${classification.displayName}`);
1930
+ console.log(` ${classification.purpose}\n`);
1931
+ }
1932
+
1933
+ const { tokenValue } = await this._runPrompts([{
1934
+ type: 'input',
1935
+ name: 'tokenValue',
1936
+ message: `${classification.promptLabel} (enter token, ARN, "$${classification.envVar}" for env var, or leave empty):`,
1937
+ validate: (input) => {
1938
+ if (!input || input.trim() === '') return true;
1939
+ if (input.trim().startsWith('$')) return true;
1940
+ // Warn about HF token format
1941
+ if (classification.identifier === 'hf-token' && !input.startsWith('hf_') && !isSecretsManagerArn(input)) {
1942
+ console.warn('\n⚠️ Warning: HuggingFace tokens typically start with "hf_"');
1943
+ console.warn(' If this is intentional, you can ignore this warning.');
1944
+ }
1945
+ return true;
1946
+ }
1947
+ }]);
1948
+
1949
+ if (!tokenValue || tokenValue.trim() === '') {
1950
+ return {};
1951
+ }
1952
+
1953
+ const value = tokenValue.trim();
1954
+
1955
+ // ARN detection: if the value is a Secrets Manager ARN, store as ARN
1956
+ if (isSecretsManagerArn(value)) {
1957
+ return { [arnConfigKey]: value };
1958
+ }
1959
+
1960
+ // Otherwise store as plaintext
1961
+ return { [plaintextConfigKey]: value };
1962
+ }
1963
+
1279
1964
  /**
1280
1965
  * CUDA-to-AMI mapping.
1281
1966
  * Maps CUDA major.minor versions to the SageMaker inference AMI that provides
@@ -1283,13 +1968,13 @@ export default class PromptRunner {
1283
1968
  * @private
1284
1969
  */
1285
1970
  static CUDA_AMI_MAP = {
1286
- '11.0': 'al2-ami-sagemaker-inference-gpu-2-1',
1971
+ '11.0': 'al2-ami-sagemaker-inference-gpu-2',
1287
1972
  '11.4': 'al2-ami-sagemaker-inference-gpu-2-1',
1288
- '11.8': 'al2-ami-sagemaker-inference-gpu-3-1',
1973
+ '11.8': 'al2-ami-sagemaker-inference-gpu-2-1',
1289
1974
  '12.1': 'al2-ami-sagemaker-inference-gpu-3-1',
1290
- '12.2': 'al2-ami-sagemaker-inference-gpu-3-2',
1291
- '12.4': 'al2-ami-sagemaker-inference-gpu-3-2',
1292
- '12.6': 'al2-ami-sagemaker-inference-gpu-3-2'
1975
+ '12.2': 'al2023-ami-sagemaker-inference-gpu-4-1',
1976
+ '12.4': 'al2023-ami-sagemaker-inference-gpu-4-1',
1977
+ '12.6': 'al2023-ami-sagemaker-inference-gpu-4-1'
1293
1978
  };
1294
1979
 
1295
1980
  /**
@@ -1297,16 +1982,21 @@ export default class PromptRunner {
1297
1982
  * supports multiple versions. The choice transparently resolves to the
1298
1983
  * correct SageMaker inference AMI.
1299
1984
  *
1985
+ * When a base image CUDA version is provided, auto-resolves by intersecting
1986
+ * with the instance's supported versions. Removes the CUDA prompt from the
1987
+ * interactive flow when auto-resolution succeeds.
1988
+ *
1300
1989
  * Skipped for CPU instances, non-CUDA accelerators, or when only one
1301
1990
  * compatible CUDA version exists.
1302
1991
  *
1303
1992
  * @param {string} instanceType - Selected instance type (e.g. "ml.g5.2xlarge")
1304
1993
  * @param {string} framework - Selected framework name
1305
1994
  * @param {string} frameworkVersion - Selected framework version
1995
+ * @param {string} [baseImageCuda] - CUDA version from selected base image (for auto-resolution)
1306
1996
  * @returns {Promise<{cudaVersion: string, inferenceAmiVersion: string}|null>}
1307
1997
  * @private
1308
1998
  */
1309
- async _promptCudaVersion(instanceType, framework, frameworkVersion) {
1999
+ async _promptCudaVersion(instanceType, framework, frameworkVersion, baseImageCuda) {
1310
2000
  if (!instanceType) return null;
1311
2001
 
1312
2002
  // Look up instance in accelerator mapping
@@ -1316,6 +2006,33 @@ export default class PromptRunner {
1316
2006
  const instanceCudaVersions = instanceInfo.accelerator.versions;
1317
2007
  if (!instanceCudaVersions || instanceCudaVersions.length === 0) return null;
1318
2008
 
2009
+ // Auto-resolution: when base image specifies a CUDA version, intersect with instance support
2010
+ // Requirements: 3.11, 4.9, 4.10, 4.11
2011
+ if (baseImageCuda) {
2012
+ const majorRequired = baseImageCuda.split('.')[0];
2013
+ const intersection = instanceCudaVersions.filter(v => {
2014
+ if (v === baseImageCuda) return true;
2015
+ if (v.startsWith(`${majorRequired }.`)) return true;
2016
+ return false;
2017
+ });
2018
+
2019
+ if (intersection.length > 0) {
2020
+ // Auto-select: pick exact match or highest compatible
2021
+ const exactMatch = intersection.find(v => v === baseImageCuda);
2022
+ const selectedVersion = exactMatch || intersection.sort().pop();
2023
+ const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[selectedVersion];
2024
+ if (inferenceAmiVersion) {
2025
+ console.log(`\n🔧 CUDA ${selectedVersion} auto-resolved from base image (requires ${baseImageCuda})`);
2026
+ console.log(` AMI: ${inferenceAmiVersion}`);
2027
+ return { cudaVersion: selectedVersion, inferenceAmiVersion };
2028
+ }
2029
+ } else {
2030
+ // No intersection — warn and fall through to manual prompt
2031
+ console.log(`\n ⚠️ Base image requires CUDA ${baseImageCuda} but instance ${instanceType} supports: ${instanceCudaVersions.join(', ')}`);
2032
+ console.log(' No compatible CUDA version found. Falling back to manual selection.');
2033
+ }
2034
+ }
2035
+
1319
2036
  // Get framework CUDA requirements (if available)
1320
2037
  const registryConfigManager = this.registryConfigManager;
1321
2038
  const frameworkConfig = registryConfigManager?.frameworkRegistry?.[framework]?.[frameworkVersion];