@aws/ml-container-creator 0.2.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/bin/cli.js +38 -2
  2. package/config/bootstrap-stack.json +94 -1
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/package-lock.json +22 -9
  5. package/package.json +3 -1
  6. package/servers/instance-sizer/index.js +45 -8
  7. package/servers/instance-sizer/lib/instance-ranker.js +140 -11
  8. package/servers/instance-sizer/lib/model-resolver.js +10 -6
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +298 -20
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +101 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +15 -1
  16. package/servers/model-picker/index.js +2 -1
  17. package/src/app.js +96 -2
  18. package/src/lib/architecture-sync.js +171 -0
  19. package/src/lib/arn-detection.js +22 -0
  20. package/src/lib/bootstrap-command-handler.js +178 -3
  21. package/src/lib/cli-handler.js +2 -2
  22. package/src/lib/config-manager.js +121 -1
  23. package/src/lib/cross-cutting-checker.js +119 -0
  24. package/src/lib/deployment-entry-schema.js +1 -2
  25. package/src/lib/prompt-runner.js +514 -20
  26. package/src/lib/prompts.js +67 -5
  27. package/src/lib/registry-command-handler.js +236 -0
  28. package/src/lib/schema-sync.js +31 -0
  29. package/src/lib/secret-classification.js +56 -0
  30. package/src/lib/secrets-command-handler.js +550 -0
  31. package/src/lib/template-manager.js +49 -1
  32. package/src/lib/validate-runner.js +174 -2
  33. package/src/lib/validation-report.js +8 -1
  34. package/src/prompt-adapter.js +3 -2
  35. package/templates/Dockerfile +10 -2
  36. package/templates/code/cuda_compat.sh +22 -0
  37. package/templates/code/serve +3 -0
  38. package/templates/code/start_server.sh +3 -0
  39. package/templates/diffusors/Dockerfile +2 -1
  40. package/templates/diffusors/serve +3 -0
  41. package/templates/do/README.md +33 -0
  42. package/templates/do/benchmark +646 -0
  43. package/templates/do/build +22 -0
  44. package/templates/do/clean +86 -0
  45. package/templates/do/config +41 -6
  46. package/templates/do/deploy +66 -6
  47. package/templates/do/logs +18 -3
  48. package/templates/do/register +8 -1
  49. package/templates/do/run +10 -0
  50. package/templates/triton/Dockerfile +5 -0
@@ -17,9 +17,8 @@ import {
17
17
  modelServerPrompts,
18
18
  modelLoadStrategyPrompts,
19
19
  modelProfilePrompts,
20
- hfTokenPrompts,
21
- ngcApiKeyPrompts,
22
20
  modulePrompts,
21
+ benchmarkPrompts,
23
22
  infraRegionAndTargetPrompts,
24
23
  infraInstancePrompts,
25
24
  infraAsyncPrompts,
@@ -35,9 +34,13 @@ import {
35
34
 
36
35
  import fs from 'fs';
37
36
  import path from 'path';
37
+ import { execSync } from 'node:child_process';
38
38
  import { fileURLToPath } from 'node:url';
39
39
  import RegistryLoader from './registry-loader.js';
40
40
  import { runPrompts } from '../prompt-adapter.js';
41
+ import { SECRET_CLASSIFICATIONS } from './secret-classification.js';
42
+ import { isSecretsManagerArn } from './arn-detection.js';
43
+ import BootstrapConfig from './bootstrap-config.js';
41
44
 
42
45
  const __pr_filename = fileURLToPath(import.meta.url);
43
46
  const __pr_dirname = path.dirname(__pr_filename);
@@ -170,6 +173,9 @@ export default class PromptRunner {
170
173
  existingConfig
171
174
  );
172
175
 
176
+ // Requirements: 4.2-4.5 — Check model architecture compatibility after base image selection
177
+ this._checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers);
178
+
173
179
  // Extract CUDA version from selected base image for instance-sizer context
174
180
  const selectedBaseImageCuda = this._extractCudaFromBaseImage(baseImageAnswers);
175
181
 
@@ -247,6 +253,29 @@ export default class PromptRunner {
247
253
  this._autoGpuCount = tpRec.gpuCount;
248
254
  console.log(` ✓ Auto-set tensor parallelism: TP=${tpRec.tensorParallelism} (${tpRec.gpuCount} GPUs)`);
249
255
  }
256
+
257
+ // Display capacity type confirmation for selected instance
258
+ // Requirements: 5.4
259
+ if (matchingRec && matchingRec.capacityType) {
260
+ if (matchingRec.capacityType === 'reserved') {
261
+ const resType = matchingRec.reservationType === 'capacity-block' ? 'Capacity Block' : 'ODCR';
262
+ const endInfo = matchingRec.reservationType === 'capacity-block' && matchingRec.reservationInfo?.endDate
263
+ ? `, ends ${new Date(matchingRec.reservationInfo.endDate).toLocaleDateString()}`
264
+ : '';
265
+ console.log(` ✓ Using reserved capacity — ${resType} (reservation ${matchingRec.reservationInfo?.reservationId || 'unknown'}${endInfo})`);
266
+ } else if (matchingRec.capacityType === 'ftp') {
267
+ console.log(` ✓ Using reserved capacity (plan ${matchingRec.ftpInfo?.planName || 'unknown'})`);
268
+ } else {
269
+ const headroom = matchingRec.quotaHeadroom;
270
+ console.log(` ✓ Using on-demand capacity (quota headroom: ${headroom ?? 'unknown'})`);
271
+ }
272
+ }
273
+
274
+ // Extract reservation ARN from selected instance for deployment config
275
+ // Requirements: 2.3
276
+ if (matchingRec && matchingRec.capacityType === 'reserved' && matchingRec.reservationInfo?.reservationArn) {
277
+ this._selectedCapacityReservationArn = matchingRec.reservationInfo.reservationArn;
278
+ }
250
279
  }
251
280
 
252
281
  // 3c. Async-specific prompts (only when deploymentTarget === 'async-inference')
@@ -353,13 +382,11 @@ export default class PromptRunner {
353
382
  existingConfig
354
383
  );
355
384
 
356
- const hfTokenAnswers = await this._runPhase(hfTokenPrompts,
357
- { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
358
- explicitConfig, existingConfig);
359
-
360
- const ngcApiKeyAnswers = await this._runPhase(ngcApiKeyPrompts,
361
- { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
362
- explicitConfig, existingConfig);
385
+ // Secret prompts registry-driven secret selection (replaces hardcoded hfToken/ngcApiKey prompts)
386
+ const secretPreviousAnswers = { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers };
387
+ const secretAnswers = await this._runSecretPrompts(secretPreviousAnswers, explicitConfig, existingConfig);
388
+ const hfTokenAnswers = { hfToken: secretAnswers.hfToken, hfTokenArn: secretAnswers.hfTokenArn };
389
+ const ngcApiKeyAnswers = { ngcApiKey: secretAnswers.ngcApiKey, ngcTokenArn: secretAnswers.ngcTokenArn };
363
390
 
364
391
  // Module selection
365
392
  const moduleAnswers = await this._runPhase(modulePrompts, { ...frameworkAnswers, ...engineAnswers }, explicitConfig, existingConfig);
@@ -372,6 +399,21 @@ export default class PromptRunner {
372
399
  moduleAnswers.includeSampleModel = false;
373
400
  }
374
401
 
402
+ // Benchmark prompts — derive includeBenchmark from testTypes selection or CLI flag
403
+ // Requirements: 1.1, 1.2
404
+ let benchmarkAnswers = {};
405
+ if (frameworkAnswers.architecture === 'transformers' || frameworkAnswers.architecture === 'diffusors') {
406
+ const testTypes = moduleAnswers.testTypes || [];
407
+ const includeBenchmark = testTypes.includes('sagemaker-ai-automated-benchmarking') ||
408
+ explicitConfig.includeBenchmark === true ||
409
+ explicitConfig.includeBenchmark === 'true';
410
+ benchmarkAnswers.includeBenchmark = includeBenchmark;
411
+ if (includeBenchmark) {
412
+ const subAnswers = await this._runPhase(benchmarkPrompts, { ...frameworkAnswers, ...moduleAnswers, includeBenchmark }, explicitConfig, existingConfig);
413
+ benchmarkAnswers = { ...benchmarkAnswers, ...subAnswers };
414
+ }
415
+ }
416
+
375
417
  // Validate instance type against framework requirements (now that framework version is known)
376
418
  const finalInstanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
377
419
  if (finalInstanceType && frameworkVersionAnswers.frameworkVersion) {
@@ -413,6 +455,7 @@ export default class PromptRunner {
413
455
  ...hfTokenAnswers,
414
456
  ...ngcApiKeyAnswers,
415
457
  ...moduleAnswers,
458
+ ...benchmarkAnswers,
416
459
  ...projectAnswers,
417
460
  ...destinationAnswers,
418
461
  buildTimestamp
@@ -432,6 +475,12 @@ export default class PromptRunner {
432
475
  combinedAnswers.artifactUri = this._mcpArtifactUri;
433
476
  }
434
477
 
478
+ // Flow capacity reservation ARN from instance-sizer selection
479
+ // Requirements: 2.3
480
+ if (this._selectedCapacityReservationArn) {
481
+ combinedAnswers.capacityReservationArn = this._selectedCapacityReservationArn;
482
+ }
483
+
435
484
  // Validate: non-HF model sources require an artifact URI
436
485
  // Without it, the serve script can't download the model at runtime
437
486
  // Infer modelSource from model name prefix if not set by MCP
@@ -741,6 +790,69 @@ export default class PromptRunner {
741
790
  return null;
742
791
  }
743
792
 
793
+ /**
794
+ * Check model architecture compatibility against the selected base image.
795
+ * Emits an advisory warning if the model's model_type is not in the server's
796
+ * supportedModelTypes. Skips silently if supportedModelTypes is empty (sync not run).
797
+ * Requirements: 4.2, 4.3, 4.4, 4.5
798
+ * @param {Object} baseImageAnswers - Answers from base image selection phase
799
+ * @param {Object} frameworkAnswers - Answers from framework/deployment config phase
800
+ * @private
801
+ */
802
+ _checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers) {
803
+ // Requirement 4.5: skip if no model_type was resolved
804
+ if (!this._modelType) return;
805
+
806
+ // Determine the selected image
807
+ const selectedImage = baseImageAnswers.baseImage || baseImageAnswers.customBaseImage;
808
+ if (!selectedImage || selectedImage === 'custom') return;
809
+
810
+ // Resolve the matching choice from MCP base image choices
811
+ if (!this._mcpBaseImageChoices) return;
812
+ const matchingChoice = this._mcpBaseImageChoices.find(c => c.value === selectedImage);
813
+ if (!matchingChoice) return;
814
+
815
+ // Determine the server name from framework answers
816
+ const server = frameworkAnswers.modelServer || frameworkAnswers.backend;
817
+ if (!server) return;
818
+
819
+ // Load the model-servers catalog to find the entry with supportedModelTypes
820
+ try {
821
+ const catalogPath = path.resolve(GENERATOR_ROOT, 'servers', 'lib', 'catalogs', 'model-servers.json');
822
+ const catalog = JSON.parse(fs.readFileSync(catalogPath, 'utf8'));
823
+
824
+ const serverEntries = catalog[server];
825
+ if (!Array.isArray(serverEntries)) return;
826
+
827
+ // Find the catalog entry matching the selected image
828
+ const entry = serverEntries.find(e => e.image === selectedImage);
829
+ if (!entry) return;
830
+
831
+ const supported = entry.supportedModelTypes;
832
+ // Requirement 4.5: skip silently when supportedModelTypes is empty (sync not run)
833
+ if (!supported || supported.length === 0) return;
834
+
835
+ // Requirement 4.2-4.3: cross-reference model_type (case-insensitive)
836
+ const modelTypeLower = this._modelType.toLowerCase();
837
+ if (!supported.includes(modelTypeLower)) {
838
+ const version = entry.labels?.framework_version || entry.tag || 'unknown';
839
+ const docsUrls = {
840
+ vllm: 'https://docs.vllm.ai/en/latest/models/supported_models.html',
841
+ sglang: 'https://sgl-project.github.io/references/supported_models.html',
842
+ 'tensorrt-llm': 'https://nvidia.github.io/TensorRT-LLM/reference/support-matrix.html'
843
+ };
844
+ const docsUrl = docsUrls[server] || `https://github.com/search?q=${server}+supported+models`;
845
+
846
+ // Requirement 4.3-4.4: emit advisory warning (does not block generation)
847
+ console.log(`\n ⚠️ Model architecture "${this._modelType}" may not be supported by ${server} ${version}`);
848
+ console.log(' Consider upgrading to a newer base image, or verify compatibility at:');
849
+ console.log(` ${docsUrl}`);
850
+ }
851
+ } catch (err) {
852
+ // Graceful degradation: if catalog can't be read, skip silently
853
+ }
854
+ }
855
+
744
856
  /**
745
857
  * Get architecture-based heuristic default instance type.
746
858
  * Used when the instance-sizer cannot produce a recommendation.
@@ -917,7 +1029,7 @@ export default class PromptRunner {
917
1029
 
918
1030
  const toolArgs = {
919
1031
  modelName,
920
- limit: 8,
1032
+ limit: 10,
921
1033
  context: {
922
1034
  architecture: frameworkAnswers.architecture || undefined,
923
1035
  backend: frameworkAnswers.backend || undefined,
@@ -966,13 +1078,57 @@ export default class PromptRunner {
966
1078
  const choices = parsed.choices.instanceType;
967
1079
  const topRec = recommendations[0];
968
1080
  const vramInfo = estimatedVramGb
969
- ? ` (VRAM: ${estimatedVramGb.toFixed(1)}GB)`
970
- : '';
971
- const tpInfo = topRec?.tensorParallelism > 1
972
- ? ` [TP=${topRec.tensorParallelism}]`
1081
+ ? ` (model needs ~${estimatedVramGb.toFixed(1)}GB VRAM)`
973
1082
  : '';
974
1083
 
975
- console.log(` ✓ ${choices.length} sized instance(s): ${choices[0]}${vramInfo}${tpInfo}`);
1084
+ console.log(` ✓ ${choices.length} compatible instance(s) found${vramInfo}`);
1085
+
1086
+ // Check if availability data is present (recommendations have capacityType)
1087
+ const hasAvailabilityData = recommendations.some(r => r.capacityType);
1088
+
1089
+ if (hasAvailabilityData) {
1090
+ // Group by capacityType for display
1091
+ const reserved = recommendations.filter(r => r.capacityType === 'reserved' || r.capacityType === 'ftp');
1092
+ const onDemand = recommendations.filter(r => r.capacityType === 'on-demand');
1093
+
1094
+ if (reserved.length > 0) {
1095
+ console.log(' ── Reserved Capacity ──');
1096
+ for (const rec of reserved) {
1097
+ const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
1098
+ const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
1099
+ const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
1100
+ const tag = rec.capacityType === 'reserved'
1101
+ ? ` [CR] ${rec.reservationInfo?.planName || rec.reservationInfo?.reservationId || ''}`
1102
+ : ` [FTP] ${rec.ftpInfo?.planName || ''}`;
1103
+ console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
1104
+ }
1105
+ }
1106
+
1107
+ if (onDemand.length > 0) {
1108
+ console.log(' ── On-Demand ──');
1109
+ for (const rec of onDemand) {
1110
+ const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
1111
+ const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
1112
+ const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
1113
+ const deployed = rec.quotaDeployed;
1114
+ const quota = rec.quotaLimit;
1115
+ const tag = quota !== null && quota !== undefined ? ` [Q:${deployed ?? 0}/${quota}]` : '';
1116
+ console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
1117
+ }
1118
+ }
1119
+ } else {
1120
+ // Fallback: display compact recommendation table (no availability data)
1121
+ for (const rec of recommendations) {
1122
+ const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
1123
+ const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
1124
+ const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
1125
+ console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}`);
1126
+ }
1127
+ }
1128
+ } else if (parsed.metadata?.allFilteredByQuota) {
1129
+ // All VRAM-compatible instances had zero quota
1130
+ console.log(' ⚠️ No quota available for compatible instances. Request a quota increase.');
1131
+ this._instanceSizerMetadata = parsed.metadata || null;
976
1132
  } else if (parsed.metadata?.warning) {
977
1133
  console.log(` ⚠️ ${parsed.metadata.warning}`);
978
1134
  } else {
@@ -1376,6 +1532,12 @@ export default class PromptRunner {
1376
1532
  modelFamily = vals.family;
1377
1533
  }
1378
1534
 
1535
+ // Extract model_type for architecture validation
1536
+ // Requirements: 4.1
1537
+ if (vals.model_type) {
1538
+ this._modelType = vals.model_type;
1539
+ }
1540
+
1379
1541
  // Extract model source metadata for loading adapter
1380
1542
  // Requirements: 2.1, 2.2, 2.3, 2.4
1381
1543
  if (vals.provider) {
@@ -1427,6 +1589,11 @@ export default class PromptRunner {
1427
1589
  if (hfData.chatTemplate) {
1428
1590
  chatTemplate = hfData.chatTemplate;
1429
1591
  }
1592
+ // Extract model_type for architecture validation
1593
+ // Requirements: 4.1
1594
+ if (hfData.modelConfig?.model_type) {
1595
+ this._modelType = hfData.modelConfig.model_type;
1596
+ }
1430
1597
  console.log(' ✅ Found on HuggingFace Hub');
1431
1598
  } else {
1432
1599
  console.log(' ℹ️ Not found on HuggingFace Hub (may be private or offline)');
@@ -1554,6 +1721,332 @@ export default class PromptRunner {
1554
1721
  }
1555
1722
  }
1556
1723
 
1724
+ /**
1725
+ * Run secret prompts using the Secret_Classification registry.
1726
+ * For each secret type whose stages apply to the current context:
1727
+ * - Query for managed secrets of that type
1728
+ * - If managed secrets exist: show selection list (secrets + "Enter plaintext token" + "Skip")
1729
+ * - If no managed secrets exist: fall back to existing plaintext prompt
1730
+ *
1731
+ * Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9
1732
+ * @param {object} previousAnswers - Answers from previous prompt phases
1733
+ * @param {object} explicitConfig - Explicit CLI/config values
1734
+ * @param {object} existingConfig - Existing project configuration
1735
+ * @returns {Promise<object>} Object with token/ARN values keyed by config field names
1736
+ * @private
1737
+ */
1738
+ async _runSecretPrompts(previousAnswers, explicitConfig, existingConfig) {
1739
+ const results = {};
1740
+
1741
+ for (const classification of SECRET_CLASSIFICATIONS) {
1742
+ // Check if this secret type's stages apply to the current context
1743
+ if (!this._secretStagesApply(classification, previousAnswers)) continue;
1744
+
1745
+ // Determine the config keys for this classification
1746
+ const arnConfigKey = this._getArnConfigKey(classification);
1747
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
1748
+
1749
+ // Skip if ARN already provided via CLI flag
1750
+ if (explicitConfig[arnConfigKey]) {
1751
+ results[arnConfigKey] = explicitConfig[arnConfigKey];
1752
+ continue;
1753
+ }
1754
+
1755
+ // Skip if plaintext already provided via CLI flag
1756
+ if (explicitConfig[plaintextConfigKey]) {
1757
+ results[plaintextConfigKey] = explicitConfig[plaintextConfigKey];
1758
+ continue;
1759
+ }
1760
+
1761
+ // Query for existing managed secrets of this type
1762
+ const managedSecrets = await this._listManagedSecrets(classification.identifier);
1763
+
1764
+ if (managedSecrets.length > 0) {
1765
+ // Show selection list: managed secrets + plaintext entry + skip
1766
+ const answer = await this._promptSecretSelection(classification, managedSecrets, previousAnswers);
1767
+ Object.assign(results, answer);
1768
+ } else {
1769
+ // Fall back to existing plaintext prompt
1770
+ const answer = await this._promptPlaintextFallback(classification, previousAnswers, explicitConfig, existingConfig);
1771
+ Object.assign(results, answer);
1772
+ }
1773
+ }
1774
+
1775
+ return results;
1776
+ }
1777
+
1778
+ /**
1779
+ * Determine if a secret classification's stages apply to the current generation context.
1780
+ * Build-time secrets apply when the project involves a Docker build step.
1781
+ * Runtime secrets apply when the architecture uses HuggingFace Hub models.
1782
+ * Requirements: 8.9
1783
+ * @param {object} classification - Secret classification entry
1784
+ * @param {object} answers - Current answers from previous phases
1785
+ * @returns {boolean} True if the secret type is applicable
1786
+ * @private
1787
+ */
1788
+ _secretStagesApply(classification, answers) {
1789
+ const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
1790
+ const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
1791
+
1792
+ if (classification.identifier === 'hf-token') {
1793
+ // HF token applies to transformers, diffusors, and Triton LLM backends
1794
+ const isTransformers = architecture === 'transformers';
1795
+ const isDiffusors = architecture === 'diffusors';
1796
+ const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
1797
+
1798
+ if (!isTransformers && !isDiffusors && !isTritonLlm) return false;
1799
+
1800
+ // Skip for non-HuggingFace model sources
1801
+ const modelSource = answers.modelSource;
1802
+ if (modelSource && modelSource !== 'huggingface') return false;
1803
+
1804
+ return true;
1805
+ }
1806
+
1807
+ if (classification.identifier === 'ngc-token') {
1808
+ // NGC token only applies to transformers-tensorrt-llm (build-time only)
1809
+ if (architecture === 'triton') return false;
1810
+ if (architecture === 'diffusors') return false;
1811
+ return architecture === 'transformers' && backend === 'tensorrt-llm';
1812
+ }
1813
+
1814
+ // For future secret types, check if any stage applies
1815
+ // Build-time applies to all Docker-based deployments
1816
+ // Runtime applies to architectures that download at startup
1817
+ return classification.stages.length > 0;
1818
+ }
1819
+
1820
+ /**
1821
+ * Get the ARN config key for a classification.
1822
+ * Maps classification identifiers to config field names.
1823
+ * @param {object} classification - Secret classification entry
1824
+ * @returns {string} Config key for the ARN value
1825
+ * @private
1826
+ */
1827
+ _getArnConfigKey(classification) {
1828
+ const keyMap = {
1829
+ 'hf-token': 'hfTokenArn',
1830
+ 'ngc-token': 'ngcTokenArn'
1831
+ };
1832
+ return keyMap[classification.identifier] || `${classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase())}Arn`;
1833
+ }
1834
+
1835
+ /**
1836
+ * Get the plaintext config key for a classification.
1837
+ * Maps classification identifiers to config field names.
1838
+ * @param {object} classification - Secret classification entry
1839
+ * @returns {string} Config key for the plaintext value
1840
+ * @private
1841
+ */
1842
+ _getPlaintextConfigKey(classification) {
1843
+ const keyMap = {
1844
+ 'hf-token': 'hfToken',
1845
+ 'ngc-token': 'ngcApiKey'
1846
+ };
1847
+ return keyMap[classification.identifier] || classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
1848
+ }
1849
+
1850
+ /**
1851
+ * List managed secrets of a given type from AWS Secrets Manager.
1852
+ * Uses the active bootstrap profile to query for secrets tagged with
1853
+ * the mlcc:secret-type matching the given identifier.
1854
+ * @param {string} secretType - The secret type identifier (e.g., 'hf-token')
1855
+ * @returns {Promise<Array<{name: string, arn: string}>>} Array of managed secrets
1856
+ * @private
1857
+ */
1858
+ async _listManagedSecrets(secretType) {
1859
+ try {
1860
+ const bootstrapConfig = new BootstrapConfig();
1861
+ const activeProfile = bootstrapConfig.getActiveProfile();
1862
+ if (!activeProfile) return [];
1863
+
1864
+ const profile = activeProfile.config.awsProfile;
1865
+ const region = activeProfile.config.awsRegion;
1866
+ if (!profile || !region) return [];
1867
+
1868
+ const command = `aws secretsmanager list-secrets --filters Key=tag-key,Values=mlcc:managed-by Key=tag-value,Values=ml-container-creator --region ${region} --profile ${profile} --output json`;
1869
+ const output = execSync(command, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 });
1870
+ const trimmed = output.trim();
1871
+ if (!trimmed) return [];
1872
+
1873
+ const result = JSON.parse(trimmed);
1874
+ const secrets = result.SecretList || [];
1875
+
1876
+ // Filter by secret type tag
1877
+ return secrets
1878
+ .filter(secret => {
1879
+ const typeTag = (secret.Tags || []).find(t => t.Key === 'mlcc:secret-type');
1880
+ return typeTag && typeTag.Value === secretType;
1881
+ })
1882
+ .map(secret => ({
1883
+ name: secret.Name,
1884
+ arn: secret.ARN
1885
+ }));
1886
+ } catch {
1887
+ // If AWS CLI fails (not configured, no credentials, etc.), return empty
1888
+ return [];
1889
+ }
1890
+ }
1891
+
1892
+ /**
1893
+ * Display a selection list for managed secrets of a given type.
1894
+ * Shows available secrets plus options for plaintext entry and skip.
1895
+ * Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6
1896
+ * @param {object} classification - Secret classification entry
1897
+ * @param {Array<{name: string, arn: string}>} managedSecrets - Available managed secrets
1898
+ * @param {object} previousAnswers - Answers from previous phases
1899
+ * @returns {Promise<object>} Object with the selected value keyed by config field name
1900
+ * @private
1901
+ */
1902
+ async _promptSecretSelection(classification, managedSecrets, previousAnswers) {
1903
+ const arnConfigKey = this._getArnConfigKey(classification);
1904
+
1905
+ console.log(`\n🔐 ${classification.displayName}`);
1906
+ console.log(` ${classification.purpose}`);
1907
+
1908
+ // Build choices: managed secrets + enter plaintext + skip
1909
+ const choices = [
1910
+ ...managedSecrets.map(secret => ({
1911
+ name: `🔒 ${secret.name} (${secret.arn})`,
1912
+ value: secret.arn,
1913
+ short: secret.name
1914
+ })),
1915
+ { name: '✏️ Enter plaintext token', value: '__plaintext__', short: 'Plaintext' },
1916
+ { name: '⏭️ Skip (use environment variable)', value: '__skip__', short: 'Skip' }
1917
+ ];
1918
+
1919
+ const { secretSelection } = await this._runPrompts([{
1920
+ type: 'list',
1921
+ name: 'secretSelection',
1922
+ message: `Select ${classification.promptLabel}:`,
1923
+ choices
1924
+ }]);
1925
+
1926
+ if (secretSelection === '__skip__') {
1927
+ return {};
1928
+ }
1929
+
1930
+ if (secretSelection === '__plaintext__') {
1931
+ // Use existing plaintext flow
1932
+ return this._promptPlaintextEntry(classification, previousAnswers);
1933
+ }
1934
+
1935
+ // User selected a managed secret ARN
1936
+ return { [arnConfigKey]: secretSelection };
1937
+ }
1938
+
1939
+ /**
1940
+ * Prompt for plaintext token entry with ARN detection.
1941
+ * If the user enters an ARN, store it as an ARN reference.
1942
+ * Requirements: 8.4, 8.5, 8.6
1943
+ * @param {object} classification - Secret classification entry
1944
+ * @param {object} previousAnswers - Answers from previous phases
1945
+ * @returns {Promise<object>} Object with the value keyed by config field name
1946
+ * @private
1947
+ */
1948
+ async _promptPlaintextEntry(classification, _previousAnswers) {
1949
+ const arnConfigKey = this._getArnConfigKey(classification);
1950
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
1951
+
1952
+ const { tokenValue } = await this._runPrompts([{
1953
+ type: 'input',
1954
+ name: 'tokenValue',
1955
+ message: `${classification.promptLabel} (enter token, ARN, or leave empty):`,
1956
+ validate: (input) => {
1957
+ // Empty is valid
1958
+ if (!input || input.trim() === '') return true;
1959
+ // Environment variable reference is valid
1960
+ if (input.trim().startsWith('$')) return true;
1961
+ return true;
1962
+ }
1963
+ }]);
1964
+
1965
+ if (!tokenValue || tokenValue.trim() === '') {
1966
+ return {};
1967
+ }
1968
+
1969
+ const value = tokenValue.trim();
1970
+
1971
+ // ARN detection: if the value is a Secrets Manager ARN, store as ARN
1972
+ if (isSecretsManagerArn(value)) {
1973
+ return { [arnConfigKey]: value };
1974
+ }
1975
+
1976
+ // Otherwise store as plaintext
1977
+ return { [plaintextConfigKey]: value };
1978
+ }
1979
+
1980
+ /**
1981
+ * Fall back to existing plaintext prompt when no managed secrets exist.
1982
+ * Uses the same prompts as the original hfTokenPrompts/ngcApiKeyPrompts
1983
+ * but with ARN detection on the input.
1984
+ * Requirements: 8.7
1985
+ * @param {object} classification - Secret classification entry
1986
+ * @param {object} previousAnswers - Answers from previous phases
1987
+ * @param {object} explicitConfig - Explicit CLI/config values
1988
+ * @param {object} existingConfig - Existing project configuration
1989
+ * @returns {Promise<object>} Object with the value keyed by config field name
1990
+ * @private
1991
+ */
1992
+ async _promptPlaintextFallback(classification, _previousAnswers, _explicitConfig, _existingConfig) {
1993
+ const arnConfigKey = this._getArnConfigKey(classification);
1994
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
1995
+
1996
+ // If in auto-prompt mode, skip
1997
+ if (this.configManager?.isAutoPrompt()) {
1998
+ return {};
1999
+ }
2000
+
2001
+ // Display context-appropriate security message
2002
+ if (classification.identifier === 'hf-token') {
2003
+ console.log('\n🔐 HuggingFace Authentication');
2004
+ console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
2005
+ console.log(' 💡 Tip: Use `ml-container-creator secrets create --type hf-token` to store');
2006
+ console.log(' your token in AWS Secrets Manager for zero-knowledge operation.');
2007
+ console.log(' For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.\n');
2008
+ } else if (classification.identifier === 'ngc-token') {
2009
+ console.log('\n🔐 NVIDIA NGC Authentication');
2010
+ console.log(' TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
2011
+ console.log(' 💡 Tip: Use `ml-container-creator secrets create --type ngc-token` to store');
2012
+ console.log(' your key in AWS Secrets Manager for zero-knowledge operation.');
2013
+ console.log(' For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
2014
+ } else {
2015
+ console.log(`\n🔐 ${classification.displayName}`);
2016
+ console.log(` ${classification.purpose}\n`);
2017
+ }
2018
+
2019
+ const { tokenValue } = await this._runPrompts([{
2020
+ type: 'input',
2021
+ name: 'tokenValue',
2022
+ message: `${classification.promptLabel} (enter token, ARN, "$${classification.envVar}" for env var, or leave empty):`,
2023
+ validate: (input) => {
2024
+ if (!input || input.trim() === '') return true;
2025
+ if (input.trim().startsWith('$')) return true;
2026
+ // Warn about HF token format
2027
+ if (classification.identifier === 'hf-token' && !input.startsWith('hf_') && !isSecretsManagerArn(input)) {
2028
+ console.warn('\n⚠️ Warning: HuggingFace tokens typically start with "hf_"');
2029
+ console.warn(' If this is intentional, you can ignore this warning.');
2030
+ }
2031
+ return true;
2032
+ }
2033
+ }]);
2034
+
2035
+ if (!tokenValue || tokenValue.trim() === '') {
2036
+ return {};
2037
+ }
2038
+
2039
+ const value = tokenValue.trim();
2040
+
2041
+ // ARN detection: if the value is a Secrets Manager ARN, store as ARN
2042
+ if (isSecretsManagerArn(value)) {
2043
+ return { [arnConfigKey]: value };
2044
+ }
2045
+
2046
+ // Otherwise store as plaintext
2047
+ return { [plaintextConfigKey]: value };
2048
+ }
2049
+
1557
2050
  /**
1558
2051
  * CUDA-to-AMI mapping.
1559
2052
  * Maps CUDA major.minor versions to the SageMaker inference AMI that provides
@@ -1561,13 +2054,14 @@ export default class PromptRunner {
1561
2054
  * @private
1562
2055
  */
1563
2056
  static CUDA_AMI_MAP = {
1564
- '11.0': 'al2-ami-sagemaker-inference-gpu-2-1',
2057
+ '11.0': 'al2-ami-sagemaker-inference-gpu-2',
1565
2058
  '11.4': 'al2-ami-sagemaker-inference-gpu-2-1',
1566
- '11.8': 'al2-ami-sagemaker-inference-gpu-3-1',
2059
+ '11.8': 'al2-ami-sagemaker-inference-gpu-2-1',
1567
2060
  '12.1': 'al2-ami-sagemaker-inference-gpu-3-1',
1568
- '12.2': 'al2-ami-sagemaker-inference-gpu-3-2',
1569
- '12.4': 'al2-ami-sagemaker-inference-gpu-3-2',
1570
- '12.6': 'al2-ami-sagemaker-inference-gpu-3-2'
2061
+ '12.2': 'al2-ami-sagemaker-inference-gpu-3-1',
2062
+ '12.4': 'al2-ami-sagemaker-inference-gpu-3-1',
2063
+ '12.6': 'al2-ami-sagemaker-inference-gpu-3-1',
2064
+ '13.0': 'al2023-ami-sagemaker-inference-gpu-4-1'
1571
2065
  };
1572
2066
 
1573
2067
  /**