@aws/ml-container-creator 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/bin/cli.js +31 -137
  2. package/config/parameter-schema-v2.json +2065 -0
  3. package/package.json +6 -3
  4. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  5. package/servers/lib/catalogs/models.json +182 -26
  6. package/src/app.js +6 -389
  7. package/src/lib/bootstrap-command-handler.js +75 -1078
  8. package/src/lib/bootstrap-profile-manager.js +634 -0
  9. package/src/lib/bootstrap-provisioners.js +421 -0
  10. package/src/lib/config-loader.js +405 -0
  11. package/src/lib/config-manager.js +59 -1668
  12. package/src/lib/config-mcp-client.js +118 -0
  13. package/src/lib/config-validator.js +634 -0
  14. package/src/lib/cuda-resolver.js +140 -0
  15. package/src/lib/e2e-catalog-validator.js +251 -3
  16. package/src/lib/e2e-ci-recorder.js +103 -0
  17. package/src/lib/generated/cli-options.js +471 -0
  18. package/src/lib/generated/parameter-matrix.js +671 -0
  19. package/src/lib/generated/validation-rules.js +202 -0
  20. package/src/lib/marketplace-flow.js +276 -0
  21. package/src/lib/mcp-query-runner.js +768 -0
  22. package/src/lib/parameter-schema-validator.js +62 -18
  23. package/src/lib/prompt-runner.js +41 -1504
  24. package/src/lib/prompts/feature-prompts.js +172 -0
  25. package/src/lib/prompts/index.js +48 -0
  26. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  27. package/src/lib/prompts/model-prompts.js +552 -0
  28. package/src/lib/prompts/project-prompts.js +70 -0
  29. package/src/lib/prompts.js +2 -1446
  30. package/src/lib/registry-command-handler.js +135 -3
  31. package/src/lib/secrets-prompt-runner.js +251 -0
  32. package/src/lib/template-variable-resolver.js +398 -0
  33. package/templates/code/serve +5 -134
  34. package/templates/code/serve.d/lmi.ejs +19 -0
  35. package/templates/code/serve.d/sglang.ejs +47 -0
  36. package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
  37. package/templates/code/serve.d/vllm.ejs +48 -0
  38. package/templates/do/clean +1 -1387
  39. package/templates/do/clean.d/async-inference.ejs +508 -0
  40. package/templates/do/clean.d/batch-transform.ejs +512 -0
  41. package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
  42. package/templates/do/clean.d/managed-inference.ejs +1043 -0
  43. package/templates/do/deploy +1 -1766
  44. package/templates/do/deploy.d/async-inference.ejs +501 -0
  45. package/templates/do/deploy.d/batch-transform.ejs +529 -0
  46. package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
  47. package/templates/do/deploy.d/managed-inference.ejs +726 -0
  48. package/config/parameter-schema.json +0 -88
@@ -29,40 +29,25 @@ import {
29
29
  infraBuildPrompts,
30
30
  projectPrompts,
31
31
  destinationPrompts,
32
- baseImageSearchPrompts,
33
32
  baseImagePrompts,
34
- formatImageChoices,
35
33
  filterByCudaGeneration,
36
34
  instanceCatalogRaw
37
- } from './prompts.js';
35
+ } from './prompts/index.js';
38
36
 
39
37
  import fs from 'fs';
40
38
  import path from 'path';
41
- import { execSync } from 'node:child_process';
42
39
  import { fileURLToPath } from 'node:url';
43
40
  import RegistryLoader from './registry-loader.js';
44
41
  import { runPrompts } from '../prompt-adapter.js';
45
- import { SECRET_CLASSIFICATIONS } from './secret-classification.js';
46
- import { isSecretsManagerArn } from './arn-detection.js';
47
- import BootstrapConfig from './bootstrap-config.js';
42
+ import McpQueryRunner from './mcp-query-runner.js';
43
+ import SecretsPromptRunner from './secrets-prompt-runner.js';
44
+ import CudaResolver from './cuda-resolver.js';
45
+ import MarketplaceFlow from './marketplace-flow.js';
48
46
 
49
47
  const __pr_filename = fileURLToPath(import.meta.url);
50
48
  const __pr_dirname = path.dirname(__pr_filename);
51
49
  const GENERATOR_ROOT = path.resolve(__pr_dirname, '..', '..');
52
50
 
53
- /**
54
- * Resolve MCP server args — converts relative paths to absolute using GENERATOR_ROOT.
55
- * @param {string[]} args - The args array from mcp.json serverConfig
56
- * @returns {string[]} Args with relative paths resolved
57
- */
58
- function resolveMcpArgs(args) {
59
- return (args || []).map(arg => {
60
- if (arg && !path.isAbsolute(arg) && !arg.startsWith('-')) {
61
- return path.resolve(GENERATOR_ROOT, arg);
62
- }
63
- return arg;
64
- });
65
- }
66
51
 
67
52
  export default class PromptRunner {
68
53
  constructor({ configManager, options, registryConfigManager, baseConfig, promptFn }) {
@@ -71,8 +56,32 @@ export default class PromptRunner {
71
56
  this.registryConfigManager = registryConfigManager || null;
72
57
  this.baseConfig = baseConfig || {};
73
58
  this._runPrompts = promptFn || runPrompts;
59
+ this.mcpQueryRunner = new McpQueryRunner(this);
60
+ this.secretsPromptRunner = new SecretsPromptRunner(this);
61
+ this.cudaResolver = new CudaResolver(this);
62
+ this.marketplaceFlow = new MarketplaceFlow(this);
74
63
  }
75
64
 
65
+ // ── Sub-object delegations (backward compat for tests) ──────────
66
+
67
+ _queryMcpForBaseImage(...args) { return this.mcpQueryRunner._queryMcpForBaseImage(...args); }
68
+ _queryMcpForModels(...args) { return this.mcpQueryRunner._queryMcpForModels(...args); }
69
+ _queryMcpForRegion(...args) { return this.mcpQueryRunner._queryMcpForRegion(...args); }
70
+ _queryMcpForInstance(...args) { return this.mcpQueryRunner._queryMcpForInstance(...args); }
71
+ _queryMcpForInstanceSizing(...args) { return this.mcpQueryRunner._queryMcpForInstanceSizing(...args); }
72
+ _queryMcpForEndpoints(...args) { return this.mcpQueryRunner._queryMcpForEndpoints(...args); }
73
+ _queryMcpForHyperPod(...args) { return this.mcpQueryRunner._queryMcpForHyperPod(...args); }
74
+ _fetchAndDisplayModelInfo(...args) { return this.mcpQueryRunner._fetchAndDisplayModelInfo(...args); }
75
+ _validateAndDisplayInstanceType(...args) { return this.mcpQueryRunner._validateAndDisplayInstanceType(...args); }
76
+ _runSecretPrompts(...args) { return this.secretsPromptRunner._runSecretPrompts(...args); }
77
+ _secretStagesApply(...args) { return this.secretsPromptRunner._secretStagesApply(...args); }
78
+ _getArnConfigKey(...args) { return this.secretsPromptRunner._getArnConfigKey(...args); }
79
+ _getPlaintextConfigKey(...args) { return this.secretsPromptRunner._getPlaintextConfigKey(...args); }
80
+ _promptSecretSelection(...args) { return this.secretsPromptRunner._promptSecretSelection(...args); }
81
+ _promptPlaintextEntry(...args) { return this.secretsPromptRunner._promptPlaintextEntry(...args); }
82
+ _promptPlaintextFallback(...args) { return this.secretsPromptRunner._promptPlaintextFallback(...args); }
83
+ _promptCudaVersion(...args) { return this.cudaResolver._promptCudaVersion(...args); }
84
+
76
85
  /**
77
86
  * Runs all prompting phases and returns combined answers
78
87
  *
@@ -131,7 +140,7 @@ export default class PromptRunner {
131
140
  // Requirements: 2.3, 2.4, 2.5
132
141
  // ──────────────────────────────────────────────────────────────────────
133
142
  if (frameworkAnswers.architecture === 'marketplace') {
134
- return this._runMarketplaceFlow(frameworkAnswers, explicitConfig, existingConfig, buildTimestamp);
143
+ return this.marketplaceFlow._runMarketplaceFlow(frameworkAnswers, explicitConfig, existingConfig, buildTimestamp);
135
144
  }
136
145
 
137
146
  // Engine prompt for http architecture
@@ -141,7 +150,7 @@ export default class PromptRunner {
141
150
  const tritonAutoFormat = this._getTritonAutoModelFormat(architecture, backend);
142
151
 
143
152
  // Query model-picker MCP server for model choices
144
- this._queryMcpForModels(frameworkAnswers.architecture);
153
+ this.mcpQueryRunner._queryMcpForModels(frameworkAnswers.architecture);
145
154
  if (this._mcpModelChoices) {
146
155
  console.log(' 🔍 Querying model-picker...');
147
156
  console.log(` ✓ ${this._mcpModelChoices.length} model(s) available from catalog`);
@@ -171,7 +180,7 @@ export default class PromptRunner {
171
180
 
172
181
  // Fetch model information from HuggingFace and Model Registry
173
182
  if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
174
- await this._fetchAndDisplayModelInfo(phase1ModelId);
183
+ await this.mcpQueryRunner._fetchAndDisplayModelInfo(phase1ModelId);
175
184
  }
176
185
 
177
186
  // ══════════════════════════════════════════════════════════════════════
@@ -186,7 +195,7 @@ export default class PromptRunner {
186
195
  const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, { ...frameworkAnswers, ...regionPreviousAnswers }, explicitConfig, existingConfig);
187
196
 
188
197
  // 2b. Query base-image-picker MCP server for base image choices
189
- await this._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
198
+ await this.mcpQueryRunner._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
190
199
  const baseImagePreviousAnswers = {
191
200
  ...frameworkAnswers,
192
201
  ...engineAnswers,
@@ -211,7 +220,7 @@ export default class PromptRunner {
211
220
  // ══════════════════════════════════════════════════════════════════════
212
221
 
213
222
  // 3a. Region query
214
- await this._queryMcpForRegion(frameworkAnswers, explicitConfig);
223
+ await this.mcpQueryRunner._queryMcpForRegion(frameworkAnswers, explicitConfig);
215
224
 
216
225
  // 3a2. Existing endpoint prompt (only for realtime-inference)
217
226
  // Requirements: 3.3, 4.3, 4.4 — endpoint-picker MCP query
@@ -219,7 +228,7 @@ export default class PromptRunner {
219
228
  if (regionAndTargetAnswers.deploymentTarget === 'realtime-inference') {
220
229
  // Query endpoint-picker MCP server for available endpoints
221
230
  const resolvedRegion = regionAndTargetAnswers.customAwsRegion || regionAndTargetAnswers.awsRegion;
222
- await this._queryMcpForEndpoints({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
231
+ await this.mcpQueryRunner._queryMcpForEndpoints({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
223
232
 
224
233
  const endpointPreviousAnswers = {
225
234
  ...regionAndTargetAnswers,
@@ -261,13 +270,13 @@ export default class PromptRunner {
261
270
  this._architectureHeuristicDefault = 'ml.m5.large';
262
271
  } else if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
263
272
  // Query instance-sizer with full context
264
- await this._queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, {
273
+ await this.mcpQueryRunner._queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, {
265
274
  cudaVersion: selectedBaseImageCuda,
266
275
  profileEnvVars: this._selectedProfileEnvVars || {}
267
276
  });
268
277
  } else {
269
278
  // No model known — use architecture heuristic
270
- await this._queryMcpForInstance(frameworkAnswers, explicitConfig);
279
+ await this.mcpQueryRunner._queryMcpForInstance(frameworkAnswers, explicitConfig);
271
280
  }
272
281
  }
273
282
 
@@ -419,7 +428,7 @@ export default class PromptRunner {
419
428
 
420
429
  // 3e. CUDA/AMI auto-resolution
421
430
  const instanceType = instanceAnswers.customInstanceType || instanceAnswers.instanceType;
422
- const cudaAnswer = await this._promptCudaVersion(
431
+ const cudaAnswer = await this.cudaResolver._promptCudaVersion(
423
432
  instanceType,
424
433
  frameworkAnswers.framework,
425
434
  null, // frameworkVersion not yet known in Phase 3
@@ -430,7 +439,7 @@ export default class PromptRunner {
430
439
  let hyperPodAnswers = {};
431
440
  if (regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks') {
432
441
  const resolvedRegion = regionAndTargetAnswers.customAwsRegion || regionAndTargetAnswers.awsRegion;
433
- await this._queryMcpForHyperPod({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
442
+ await this.mcpQueryRunner._queryMcpForHyperPod({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
434
443
  hyperPodAnswers = await this._runPhase(infraHyperPodPrompts, { ...regionAndTargetAnswers }, explicitConfig, existingConfig);
435
444
  }
436
445
 
@@ -507,7 +516,7 @@ export default class PromptRunner {
507
516
 
508
517
  // Secret prompts — registry-driven secret selection (replaces hardcoded hfToken/ngcApiKey prompts)
509
518
  const secretPreviousAnswers = { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers };
510
- const secretAnswers = await this._runSecretPrompts(secretPreviousAnswers, explicitConfig, existingConfig);
519
+ const secretAnswers = await this.secretsPromptRunner._runSecretPrompts(secretPreviousAnswers, explicitConfig, existingConfig);
511
520
  const hfTokenAnswers = { hfToken: secretAnswers.hfToken, hfTokenArn: secretAnswers.hfTokenArn };
512
521
  const ngcApiKeyAnswers = { ngcApiKey: secretAnswers.ngcApiKey, ngcTokenArn: secretAnswers.ngcTokenArn };
513
522
 
@@ -548,7 +557,7 @@ export default class PromptRunner {
548
557
  // Validate instance type against framework requirements (now that framework version is known)
549
558
  const finalInstanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
550
559
  if (finalInstanceType && frameworkVersionAnswers.frameworkVersion) {
551
- await this._validateAndDisplayInstanceType(
560
+ await this.mcpQueryRunner._validateAndDisplayInstanceType(
552
561
  finalInstanceType,
553
562
  frameworkAnswers.framework,
554
563
  frameworkVersionAnswers.frameworkVersion
@@ -756,264 +765,6 @@ export default class PromptRunner {
756
765
  return combinedAnswers;
757
766
  }
758
767
 
759
- /**
760
- * Marketplace-specific prompt flow.
761
- * Skips all container-related prompts (framework, model server, base image, CUDA version)
762
- * and prompts only for: model package ARN, instance type, deployment target, region.
763
- *
764
- * Requirements: 2.3, 2.4, 2.5
765
- * @private
766
- */
767
- async _runMarketplaceFlow(frameworkAnswers, explicitConfig, existingConfig, buildTimestamp) {
768
- console.log('\n🏪 Marketplace Model Package Configuration');
769
-
770
- // Query marketplace-picker MCP server for subscription discovery
771
- // Requirements: 2.4, 6.1, 6.2
772
- let mcpSubscriptions = [];
773
- const cm = this.configManager;
774
- if (cm && cm.getMcpServerNames && cm.getMcpServerNames().includes('marketplace-picker')) {
775
- try {
776
- console.log(' 🔍 Querying marketplace-picker for subscriptions...');
777
- const result = await cm.queryMcpServer('marketplace-picker', {
778
- region: explicitConfig.awsRegion || existingConfig.awsRegion || process.env.AWS_REGION || 'us-east-1'
779
- });
780
- if (result && result.metadata?.subscriptions?.length > 0) {
781
- mcpSubscriptions = result.metadata.subscriptions;
782
- console.log(` ✅ Found ${mcpSubscriptions.length} Marketplace subscription(s)`);
783
- } else {
784
- console.log(' ℹ️ No Marketplace subscriptions found — enter ARN manually');
785
- }
786
- } catch (err) {
787
- console.log(` ⚠️ marketplace-picker unavailable: ${err.message}`);
788
- console.log(' Falling back to manual ARN entry');
789
- }
790
- }
791
-
792
- // Marketplace-specific prompts: model package ARN
793
- const marketplacePrompts = [
794
- {
795
- type: mcpSubscriptions.length > 0 ? 'list' : 'input',
796
- name: 'modelPackageArn',
797
- message: mcpSubscriptions.length > 0
798
- ? 'Select a Marketplace model package:'
799
- : 'Model package ARN (arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>):',
800
- ...(mcpSubscriptions.length > 0 ? {
801
- choices: [
802
- ...mcpSubscriptions.map(sub => ({
803
- name: `${sub.modelName} (${sub.vendor}) — ${sub.arn}`,
804
- value: sub.arn,
805
- short: sub.modelName
806
- })),
807
- { type: 'separator', separator: '──────────────' },
808
- { name: 'Enter ARN manually...', value: '__manual__', short: 'manual' }
809
- ]
810
- } : {
811
- validate: (input) => {
812
- if (!input || input.trim() === '') {
813
- return 'Model package ARN is required';
814
- }
815
- const arnPattern = /^arn:aws:sagemaker:[a-z0-9-]+:\d{12}:model-package\/[\w-]+\/\d+$/;
816
- if (!arnPattern.test(input.trim())) {
817
- return 'Invalid ARN format. Expected: arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>';
818
- }
819
- return true;
820
- }
821
- })
822
- },
823
- {
824
- type: 'input',
825
- name: 'modelPackageArnManual',
826
- message: 'Model package ARN (arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>):',
827
- when: (answers) => answers.modelPackageArn === '__manual__',
828
- validate: (input) => {
829
- if (!input || input.trim() === '') {
830
- return 'Model package ARN is required';
831
- }
832
- const arnPattern = /^arn:aws:sagemaker:[a-z0-9-]+:\d{12}:model-package\/[\w-]+\/\d+$/;
833
- if (!arnPattern.test(input.trim())) {
834
- return 'Invalid ARN format. Expected: arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>';
835
- }
836
- return true;
837
- }
838
- }
839
- ];
840
- const marketplaceAnswers = await this._runPhase(marketplacePrompts, { ...frameworkAnswers }, explicitConfig, existingConfig);
841
-
842
- // Handle manual ARN entry fallback
843
- if (marketplaceAnswers.modelPackageArn === '__manual__' && marketplaceAnswers.modelPackageArnManual) {
844
- marketplaceAnswers.modelPackageArn = marketplaceAnswers.modelPackageArnManual;
845
- delete marketplaceAnswers.modelPackageArnManual;
846
- }
847
-
848
- // Infrastructure prompts: region, deployment target, instance type
849
- console.log('\n💪 Infrastructure & Deployment');
850
- const bootstrapRegion = existingConfig.awsRegion || explicitConfig.awsRegion;
851
- const regionPreviousAnswers = bootstrapRegion ? { _bootstrapRegion: bootstrapRegion } : {};
852
-
853
- // Marketplace deployment targets (no HyperPod — vendor controls the container)
854
- const marketplaceInfraPrompts = [
855
- {
856
- type: 'list',
857
- name: 'awsRegion',
858
- message: 'Target AWS region?',
859
- choices: (answers) => {
860
- const bootstrapReg = answers._bootstrapRegion;
861
- const choices = ['us-east-1'];
862
- if (bootstrapReg && bootstrapReg !== 'us-east-1') {
863
- choices.unshift({ name: `${bootstrapReg} (from bootstrap profile)`, value: bootstrapReg });
864
- }
865
- choices.push({ name: 'Custom...', value: 'custom' });
866
- return choices;
867
- },
868
- default: (answers) => answers._bootstrapRegion || 'us-east-1'
869
- },
870
- {
871
- type: 'input',
872
- name: 'customAwsRegion',
873
- message: 'Enter AWS region (e.g., us-west-2, eu-west-1):',
874
- when: answers => answers.awsRegion === 'custom'
875
- },
876
- {
877
- type: 'list',
878
- name: 'deploymentTarget',
879
- message: 'Deployment target?',
880
- choices: [
881
- { name: 'SageMaker Real-Time Inference', value: 'realtime-inference' },
882
- { name: 'SageMaker Async Inference', value: 'async-inference' },
883
- { name: 'SageMaker Batch Transform', value: 'batch-transform' }
884
- ],
885
- default: 'realtime-inference'
886
- },
887
- {
888
- type: 'list',
889
- name: 'instanceType',
890
- message: 'Instance type for deployment?',
891
- choices: [
892
- { name: 'ml.g5.xlarge (1 GPU, 24GB)', value: 'ml.g5.xlarge' },
893
- { name: 'ml.g5.2xlarge (1 GPU, 24GB)', value: 'ml.g5.2xlarge' },
894
- { name: 'ml.g5.4xlarge (1 GPU, 24GB)', value: 'ml.g5.4xlarge' },
895
- { name: 'ml.g5.12xlarge (4 GPUs, 96GB)', value: 'ml.g5.12xlarge' },
896
- { name: 'ml.p3.2xlarge (1 GPU, 16GB V100)', value: 'ml.p3.2xlarge' },
897
- { name: 'ml.m5.xlarge (CPU, 16GB)', value: 'ml.m5.xlarge' },
898
- { name: 'Custom...', value: 'custom' }
899
- ],
900
- default: 'ml.g5.xlarge'
901
- },
902
- {
903
- type: 'input',
904
- name: 'customInstanceType',
905
- message: 'Enter instance type (e.g., ml.g5.xlarge):',
906
- validate: (input) => {
907
- if (!input || input.trim() === '') {
908
- return 'Instance type is required';
909
- }
910
- if (!input.startsWith('ml.')) {
911
- return 'Instance type must start with "ml." (e.g., ml.g5.xlarge)';
912
- }
913
- return true;
914
- },
915
- when: answers => answers.instanceType === 'custom'
916
- }
917
- ];
918
- const infraAnswers = await this._runPhase(marketplaceInfraPrompts, { ...frameworkAnswers, ...regionPreviousAnswers }, explicitConfig, existingConfig);
919
-
920
- // Async-specific prompts (only when deploymentTarget === 'async-inference')
921
- let asyncAnswers = {};
922
- if (infraAnswers.deploymentTarget === 'async-inference') {
923
- asyncAnswers = await this._runPhase(infraAsyncPrompts, { ...infraAnswers }, explicitConfig, existingConfig);
924
- }
925
-
926
- // Batch transform-specific prompts (only when deploymentTarget === 'batch-transform')
927
- let batchTransformAnswers = {};
928
- if (infraAnswers.deploymentTarget === 'batch-transform') {
929
- batchTransformAnswers = await this._runPhase(
930
- infraBatchTransformPrompts,
931
- { ...infraAnswers },
932
- explicitConfig,
933
- existingConfig
934
- );
935
- }
936
-
937
- // Role ARN prompt (always needed for marketplace deploy)
938
- const rolePrompts = [
939
- {
940
- type: 'input',
941
- name: 'awsRoleArn',
942
- message: 'AWS IAM Role ARN for SageMaker execution (optional)?',
943
- validate: (input) => {
944
- if (!input || input.trim() === '') {
945
- return true;
946
- }
947
- const arnPattern = /^arn:aws:iam::\d{12}:role\/[\w+=,.@-]+$/;
948
- if (!arnPattern.test(input)) {
949
- return 'Invalid ARN format. Expected: arn:aws:iam::123456789012:role/RoleName';
950
- }
951
- return true;
952
- }
953
- }
954
- ];
955
- const roleAnswers = await this._runPhase(rolePrompts, { ...infraAnswers }, explicitConfig, existingConfig);
956
-
957
- // Project name + destination
958
- console.log('\n📋 Project Configuration');
959
- const allTechnicalAnswers = {
960
- ...frameworkAnswers,
961
- ...marketplaceAnswers,
962
- ...infraAnswers,
963
- ...asyncAnswers,
964
- ...batchTransformAnswers,
965
- ...roleAnswers
966
- };
967
- const projectAnswers = await this._runPhase(projectPrompts, allTechnicalAnswers, explicitConfig, existingConfig);
968
- const destinationAnswers = await this._runPhase(destinationPrompts,
969
- { ...allTechnicalAnswers, ...projectAnswers }, explicitConfig, existingConfig);
970
-
971
- // Combine all marketplace answers
972
- const combinedAnswers = {
973
- ...frameworkAnswers,
974
- ...marketplaceAnswers,
975
- ...infraAnswers,
976
- ...asyncAnswers,
977
- ...batchTransformAnswers,
978
- ...roleAnswers,
979
- ...projectAnswers,
980
- ...destinationAnswers,
981
- buildTimestamp
982
- };
983
-
984
- // Handle custom instance type
985
- if (combinedAnswers.customInstanceType) {
986
- combinedAnswers.instanceType = combinedAnswers.customInstanceType;
987
- delete combinedAnswers.customInstanceType;
988
- }
989
-
990
- // Handle custom AWS region
991
- if (combinedAnswers.customAwsRegion) {
992
- combinedAnswers.awsRegion = combinedAnswers.customAwsRegion;
993
- delete combinedAnswers.customAwsRegion;
994
- }
995
-
996
- // Map awsRoleArn to roleArn for templates
997
- if (combinedAnswers.awsRoleArn) {
998
- combinedAnswers.roleArn = combinedAnswers.awsRoleArn;
999
- delete combinedAnswers.awsRoleArn;
1000
- }
1001
-
1002
- // Ensure CLI-provided values are in combinedAnswers
1003
- if (explicitConfig.modelPackageArn && !combinedAnswers.modelPackageArn) {
1004
- combinedAnswers.modelPackageArn = explicitConfig.modelPackageArn;
1005
- }
1006
-
1007
- // Handle marketplace:// prefix from --model-name CLI option
1008
- const modelName = explicitConfig.modelName || combinedAnswers.modelName;
1009
- if (modelName && modelName.startsWith('marketplace://')) {
1010
- const arn = modelName.replace(/^marketplace:\/\//, '');
1011
- combinedAnswers.modelPackageArn = arn;
1012
- delete combinedAnswers.modelName;
1013
- }
1014
-
1015
- return combinedAnswers;
1016
- }
1017
768
 
1018
769
  /**
1019
770
  * Checks if a parameter is promptable according to the parameter matrix
@@ -1267,491 +1018,6 @@ export default class PromptRunner {
1267
1018
  return Object.hasOwn(HEURISTIC_DEFAULTS, architecture) ? HEURISTIC_DEFAULTS[architecture] : 'ml.g5.xlarge';
1268
1019
  }
1269
1020
 
1270
- /**
1271
- * Query MCP region-picker server before infrastructure prompts.
1272
- * Populates configManager.mcpChoices so _runPhase injects them into list prompts.
1273
- * @private
1274
- */
1275
- async _queryMcpForRegion(frameworkAnswers, explicitConfig) {
1276
- const cm = this.configManager;
1277
- if (!cm) return;
1278
-
1279
- const mcpServers = cm.getMcpServerNames();
1280
- if (mcpServers.length === 0) return;
1281
-
1282
- const smart = this.options.smart === true;
1283
-
1284
- // Region: skip MCP query if region was explicitly provided via CLI, config file, or bootstrap profile
1285
- const cliRegion = this.options.region;
1286
- const bootstrapRegion = explicitConfig.awsRegion;
1287
- const skipRegionQuery = (cliRegion !== undefined && cliRegion !== null) ||
1288
- (bootstrapRegion !== undefined && bootstrapRegion !== null);
1289
-
1290
- if (!skipRegionQuery && mcpServers.includes('region-picker')) {
1291
- const { regionSearch } = await this._runPrompts([{
1292
- type: 'input',
1293
- name: 'regionSearch',
1294
- message: '🔌 Search for a region (e.g. "europe", "us west", "tokyo"):',
1295
- default: ''
1296
- }]);
1297
-
1298
- if (regionSearch && regionSearch.trim()) {
1299
- console.log(` 🔍 Querying region-picker${smart ? ' [smart]' : ''}...`);
1300
- const result = await cm.queryMcpServer('region-picker', {
1301
- ...frameworkAnswers,
1302
- regionSearch: regionSearch.trim()
1303
- });
1304
- if (result && result.choices?.awsRegion?.length > 0) {
1305
- const choices = result.choices.awsRegion;
1306
- const preview = choices.length <= 5
1307
- ? choices.join(', ')
1308
- : `${choices.slice(0, 5).join(', ') } (+${choices.length - 5} more)`;
1309
- console.log(` ✓ ${choices.length} region(s): [${preview}]`);
1310
- } else {
1311
- console.log(' ↳ No MCP results, using static list');
1312
- }
1313
- }
1314
- }
1315
- }
1316
-
1317
- /**
1318
- * Query MCP instance-sizer server with tag-based search after deployment target is known.
1319
- * Used when no model name is available for VRAM-based sizing.
1320
- * Populates configManager.mcpChoices so _runPhase injects them into list prompts.
1321
- * @private
1322
- */
1323
- async _queryMcpForInstance(frameworkAnswers, explicitConfig) {
1324
- const cm = this.configManager;
1325
- if (!cm) return;
1326
-
1327
- const mcpServers = cm.getMcpServerNames();
1328
- if (mcpServers.length === 0) return;
1329
-
1330
- const smart = this.options.smart === true;
1331
-
1332
- // Instance type: query if not already provided via CLI/config
1333
- if (!explicitConfig.instanceType && mcpServers.includes('instance-sizer')) {
1334
- const { instanceSearch } = await this._runPrompts([{
1335
- type: 'input',
1336
- name: 'instanceSearch',
1337
- message: '🔌 Describe your instance needs (e.g. "multi-gpu", "cost-effective cpu"):',
1338
- default: frameworkAnswers.framework || ''
1339
- }]);
1340
-
1341
- if (instanceSearch && instanceSearch.trim()) {
1342
- console.log(` 🔍 Querying instance-sizer [search]${smart ? ' [smart]' : ''}...`);
1343
- const result = await cm.queryMcpServer('instance-sizer', {
1344
- ...frameworkAnswers,
1345
- instanceSearch: instanceSearch.trim()
1346
- });
1347
- if (result && result.choices?.instanceType?.length > 0) {
1348
- const choices = result.choices.instanceType;
1349
- const preview = choices.length <= 5
1350
- ? choices.join(', ')
1351
- : `${choices.slice(0, 5).join(', ') } (+${choices.length - 5} more)`;
1352
- console.log(` ✓ ${choices.length} instance(s): [${preview}]`);
1353
- } else {
1354
- console.log(' ↳ No MCP results, using static list');
1355
- }
1356
- }
1357
- }
1358
- }
1359
-
1360
- /**
1361
- * Query the instance-sizer MCP server after model is known.
1362
- * Estimates VRAM requirements and returns filtered, ranked instance recommendations.
1363
- * Stores results in this._mcpInstanceSizerChoices and this._instanceSizerMetadata.
1364
- * Requirements: 4.4, 4.5, 4.7, 3.6, 3.7
1365
- * @param {object} frameworkAnswers - Framework/architecture answers
1366
- * @param {object} modelFormatAnswers - Model format answers (contains modelName)
1367
- * @param {object} explicitConfig - Explicit CLI/config values
1368
- * @param {object} [sizerContext={}] - Additional context for the sizer query
1369
- * @param {string} [sizerContext.cudaVersion] - CUDA version from base image
1370
- * @param {object} [sizerContext.profileEnvVars] - Profile ENV overrides
1371
- * @private
1372
- */
1373
- async _queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, sizerContext = {}) {
1374
- const cm = this.configManager;
1375
- if (!cm) return;
1376
-
1377
- const mcpServers = cm.getMcpServerNames();
1378
- if (!mcpServers.includes('instance-sizer')) return;
1379
-
1380
- // Resolve model name from answers or explicit config
1381
- const modelName = modelFormatAnswers.customModelName || modelFormatAnswers.modelName || explicitConfig.modelName;
1382
- if (!modelName || modelName === 'Custom (enter manually)') return;
1383
-
1384
- const smart = this.options.smart === true;
1385
- const discover = this.options.discover !== false;
1386
-
1387
- const modeLabel = [smart && '[smart]', !discover && '[no-discover]'].filter(Boolean).join(' ');
1388
- console.log(` 🔍 Querying instance-sizer${modeLabel ? ` ${modeLabel}` : ''}...`);
1389
-
1390
- try {
1391
- const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
1392
- if (!fs.existsSync(mcpConfigPath)) return;
1393
-
1394
- const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
1395
- const serverConfig = mcpConfig.mcpServers?.['instance-sizer'];
1396
- if (!serverConfig) return;
1397
-
1398
- const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
1399
- const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
1400
-
1401
- const serverArgs = [...resolveMcpArgs(serverConfig.args)];
1402
- if (!discover && !serverArgs.includes('--no-discover')) {
1403
- serverArgs.push('--no-discover');
1404
- }
1405
-
1406
- const transport = new StdioClientTransport({
1407
- command: serverConfig.command,
1408
- args: serverArgs,
1409
- env: {
1410
- ...process.env,
1411
- ...(serverConfig.env || {}),
1412
- ...(smart ? { BEDROCK_SMART: 'true' } : {})
1413
- },
1414
- stderr: 'pipe'
1415
- });
1416
-
1417
- const mcpClient = new Client(
1418
- { name: 'ml-container-creator', version: '1.0.0' },
1419
- { capabilities: {} }
1420
- );
1421
-
1422
- await mcpClient.connect(transport);
1423
-
1424
- const toolArgs = {
1425
- modelName,
1426
- limit: 10,
1427
- context: {
1428
- architecture: frameworkAnswers.architecture || undefined,
1429
- backend: frameworkAnswers.backend || undefined,
1430
- deploymentTarget: frameworkAnswers.deploymentTarget || undefined,
1431
- profileEnvVars: sizerContext.profileEnvVars || undefined
1432
- }
1433
- };
1434
-
1435
- // Add CUDA version from base image for filtering
1436
- if (sizerContext.cudaVersion) {
1437
- toolArgs.cudaVersion = sizerContext.cudaVersion;
1438
- }
1439
-
1440
- // Add quantization if available from model format answers
1441
- if (modelFormatAnswers.quantization) {
1442
- toolArgs.quantization = modelFormatAnswers.quantization;
1443
- }
1444
-
1445
- const result = await mcpClient.callTool({
1446
- name: 'get_instance_recommendation',
1447
- arguments: toolArgs
1448
- });
1449
-
1450
- await mcpClient.close();
1451
-
1452
- // Parse the response
1453
- const textBlock = result?.content?.find(b => b.type === 'text');
1454
- if (textBlock) {
1455
- const parsed = JSON.parse(textBlock.text);
1456
-
1457
- if (parsed.choices?.instanceType?.length > 0) {
1458
- this._instanceSizerMetadata = parsed.metadata || null;
1459
-
1460
- // Build display labels with VRAM estimate and utilization percentage
1461
- const recommendations = parsed.metadata?.recommendations || [];
1462
- const estimatedVramGb = parsed.metadata?.estimatedVramGb;
1463
-
1464
- // Store choices with display labels for the instance prompt
1465
- this._mcpInstanceSizerChoices = parsed.choices.instanceType;
1466
- this._mcpInstanceSizerDisplayChoices = recommendations.map(rec => ({
1467
- name: rec.displayLabel || `${rec.instanceType} (${estimatedVramGb ? estimatedVramGb.toFixed(1) : '?'}GB / ${rec.totalVramGb || '?'}GB — ${rec.utilizationPercent || '?'}% utilization)`,
1468
- value: rec.instanceType,
1469
- short: rec.instanceType
1470
- }));
1471
-
1472
- const choices = parsed.choices.instanceType;
1473
- const topRec = recommendations[0];
1474
- const vramInfo = estimatedVramGb
1475
- ? ` (model needs ~${estimatedVramGb.toFixed(1)}GB VRAM)`
1476
- : '';
1477
-
1478
- console.log(` ✓ ${choices.length} compatible instance(s) found${vramInfo}`);
1479
-
1480
- // Warn if all instances had zero quota but were restored for visibility
1481
- if (parsed.metadata?.allFilteredByQuota) {
1482
- console.log(' ⚠️ All instances have zero quota — request a quota increase for your preferred type');
1483
- }
1484
-
1485
- // Check if availability data is present (recommendations have capacityType)
1486
- const hasAvailabilityData = recommendations.some(r => r.capacityType);
1487
-
1488
- if (hasAvailabilityData) {
1489
- // Group by capacityType for display
1490
- const reserved = recommendations.filter(r => r.capacityType === 'reserved' || r.capacityType === 'ftp');
1491
- const onDemand = recommendations.filter(r => r.capacityType === 'on-demand');
1492
-
1493
- if (reserved.length > 0) {
1494
- console.log(' ── Reserved Capacity ──');
1495
- for (const rec of reserved) {
1496
- const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
1497
- const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
1498
- const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
1499
- const tag = rec.capacityType === 'reserved'
1500
- ? ` [CR] ${rec.reservationInfo?.planName || rec.reservationInfo?.reservationId || ''}`
1501
- : ` [FTP] ${rec.ftpInfo?.planName || ''}`;
1502
- console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
1503
- }
1504
- }
1505
-
1506
- if (onDemand.length > 0) {
1507
- console.log(' ── On-Demand ──');
1508
- for (const rec of onDemand) {
1509
- const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
1510
- const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
1511
- const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
1512
- const deployed = rec.quotaDeployed;
1513
- const quota = rec.quotaLimit;
1514
- const tag = quota !== null && quota !== undefined ? ` [Q:${deployed ?? 0}/${quota}]` : '';
1515
- console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
1516
- }
1517
- }
1518
- } else {
1519
- // Fallback: display compact recommendation table (no availability data)
1520
- for (const rec of recommendations) {
1521
- const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
1522
- const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
1523
- const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
1524
- console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}`);
1525
- }
1526
- }
1527
- } else if (parsed.metadata?.allFilteredByQuota) {
1528
- // All VRAM-compatible instances had zero quota
1529
- console.log(' ⚠️ No quota available for compatible instances. Request a quota increase.');
1530
- this._instanceSizerMetadata = parsed.metadata || null;
1531
- } else if (parsed.metadata?.warning) {
1532
- console.log(` ⚠️ ${parsed.metadata.warning}`);
1533
- } else {
1534
- // Apply architecture heuristic fallback when sizer returns empty
1535
- const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
1536
- this._architectureHeuristicDefault = this._getArchitectureHeuristicDefault(archForHeuristic);
1537
- console.log(` ↳ No instance-sizer results, using heuristic default: ${this._architectureHeuristicDefault}`);
1538
- }
1539
- }
1540
- } catch (err) {
1541
- // Sizer unavailable — apply architecture heuristic fallback
1542
- const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
1543
- this._architectureHeuristicDefault = this._getArchitectureHeuristicDefault(archForHeuristic);
1544
- console.log(` ⚠️ instance-sizer: ${err.message}`);
1545
- console.log(` ↳ Using heuristic default: ${this._architectureHeuristicDefault}`);
1546
- }
1547
- }
1548
-
1549
- /**
1550
- * Query the hyperpod-cluster-picker MCP server for available HyperPod EKS clusters.
1551
- * Populates configManager.mcpChoices.hyperPodCluster so _runPhase injects them into the list prompt.
1552
- * Falls back to manual entry if the MCP server is not configured or fails.
1553
- * Requirements: 12.1, 12.2, 12.3
1554
- * @private
1555
- */
1556
- async _queryMcpForHyperPod(infraAnswers, explicitConfig) {
1557
- const cm = this.configManager;
1558
- if (!cm) return;
1559
-
1560
- const mcpServers = cm.getMcpServerNames();
1561
- if (!mcpServers.includes('hyperpod-cluster-picker')) return;
1562
-
1563
- // Skip if cluster already provided via CLI/config
1564
- if (explicitConfig.hyperPodCluster) return;
1565
-
1566
- const smart = this.options.smart === true;
1567
- console.log(` 🔍 Querying hyperpod-cluster-picker${smart ? ' [smart]' : ''}...`);
1568
-
1569
- const result = await cm.queryMcpServer('hyperpod-cluster-picker', {
1570
- ...infraAnswers
1571
- });
1572
-
1573
- if (result && result.choices?.hyperPodCluster?.length > 0) {
1574
- const choices = result.choices.hyperPodCluster;
1575
- const preview = choices.length <= 5
1576
- ? choices.join(', ')
1577
- : `${choices.slice(0, 5).join(', ')} (+${choices.length - 5} more)`;
1578
- console.log(` ✓ ${choices.length} cluster(s): [${preview}]`);
1579
- } else {
1580
- // Surface any error message from the MCP server
1581
- if (result?.message) {
1582
- console.log(` ⚠️ ${result.message}`);
1583
- } else {
1584
- console.log(' ↳ No HyperPod clusters found via MCP, manual entry available');
1585
- }
1586
- }
1587
- }
1588
-
1589
- /**
1590
- * Query the endpoint-picker MCP server for available InService real-time endpoints.
1591
- * Populates this._mcpEndpointChoices for the existing endpoint selection prompt.
1592
- * Graceful fallback: if MCP server fails (no credentials, timeout), skip and create new endpoint.
1593
- * Requirements: 3.3, 4.3, 4.4
1594
- * @private
1595
- */
1596
- async _queryMcpForEndpoints(infraAnswers, explicitConfig) {
1597
- const cm = this.configManager;
1598
- if (!cm) return;
1599
-
1600
- const mcpServers = cm.getMcpServerNames();
1601
- if (!mcpServers.includes('endpoint-picker')) return;
1602
-
1603
- // Skip if existing endpoint already provided via CLI/config
1604
- if (explicitConfig.existingEndpointName) return;
1605
-
1606
- console.log(' 🔍 Querying endpoint-picker...');
1607
-
1608
- try {
1609
- const result = await cm.queryMcpServer('endpoint-picker', {
1610
- awsRegion: infraAnswers.awsRegion,
1611
- deploymentTarget: 'realtime-inference'
1612
- });
1613
-
1614
- if (result && result.choices?.endpointName?.length > 0) {
1615
- const endpointNames = result.choices.endpointName;
1616
- const metadata = result.metadata || {};
1617
-
1618
- // Build choices with metadata annotations
1619
- this._mcpEndpointChoices = endpointNames.map(name => {
1620
- const meta = metadata[name];
1621
- if (meta) {
1622
- const gpuInfo = meta.availableGpus === '?' ? 'GPUs: ?' : `${meta.availableGpus} GPUs free`;
1623
- return {
1624
- name: `${name} (${meta.instanceType}, ${gpuInfo}, ${meta.icCount} IC${meta.icCount !== 1 ? 's' : ''})`,
1625
- value: name
1626
- };
1627
- }
1628
- return { name, value: name };
1629
- });
1630
-
1631
- console.log(` ✓ ${endpointNames.length} endpoint(s) with available capacity`);
1632
- } else {
1633
- if (result?.message) {
1634
- console.log(` ↳ ${result.message}`);
1635
- } else {
1636
- console.log(' ↳ No endpoints with available capacity found');
1637
- }
1638
- }
1639
- } catch (err) {
1640
- // Graceful fallback: if MCP server fails, skip and create new endpoint
1641
- console.log(` ⚠️ endpoint-picker: ${err.message || 'query failed'} — will create new endpoint`);
1642
- }
1643
- }
1644
-
1645
- /**
1646
- * Query MCP base-image-picker server after deployment config is selected.
1647
- * Populates _mcpBaseImageChoices for the base image selection prompt.
1648
- * Requirements: 5.1, 5.2, 5.3, 5.4, 9.1, 9.2, 9.3
1649
- * @private
1650
- */
1651
- async _queryMcpForBaseImage(frameworkAnswers, _explicitConfig) {
1652
- // Skip if base image provided via CLI --base-image flag
1653
- if (this.options['base-image']) return;
1654
-
1655
- const cm = this.configManager;
1656
- if (!cm) return;
1657
-
1658
- const mcpServers = cm.getMcpServerNames();
1659
- if (!mcpServers.includes('base-image-picker')) return;
1660
-
1661
- const smart = this.options.smart === true;
1662
- const discover = this.options.discover !== false;
1663
- const framework = frameworkAnswers.framework;
1664
- const modelServer = frameworkAnswers.modelServer;
1665
- const architecture = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
1666
- const isTransformer = framework === 'transformers';
1667
- const isTriton = architecture === 'triton';
1668
- const isDiffusors = architecture === 'diffusors';
1669
-
1670
- // For non-transformer, non-triton, non-diffusors frameworks, prompt for optional search criteria
1671
- let searchCriteria;
1672
- if (!isTransformer && !isTriton && !isDiffusors) {
1673
- const searchAnswer = await this._runPrompts(baseImageSearchPrompts.map(p => ({
1674
- ...p,
1675
- when: () => true // Always show for non-transformer since we already checked
1676
- })));
1677
- searchCriteria = searchAnswer.baseImageSearch;
1678
- }
1679
-
1680
- const modeLabel = [smart && '[smart]', discover && '[discover]'].filter(Boolean).join(' ');
1681
- console.log(` 🔍 Querying base-image-picker${modeLabel ? ` ${modeLabel}` : ''}...`);
1682
-
1683
- const context = { framework, modelServer, architecture };
1684
- if (searchCriteria && searchCriteria.trim()) {
1685
- context.searchCriteria = searchCriteria.trim();
1686
- }
1687
-
1688
- const result = await cm.queryMcpServer('base-image-picker', context);
1689
-
1690
- if (result && result.metadata?.baseImage?.length > 0) {
1691
- const entries = result.metadata.baseImage;
1692
- this._mcpBaseImageChoices = formatImageChoices(entries, isTransformer || isTriton || isDiffusors);
1693
- const count = entries.length;
1694
- console.log(` ✓ ${count} base image(s) available`);
1695
- } else {
1696
- console.log(' ↳ No MCP results, using default image');
1697
- }
1698
- }
1699
-
1700
- /**
1701
- * Query model-picker MCP server catalog for model choices.
1702
- * Reads the architecture-specific catalog (popular-transformers.json or
1703
- * popular-diffusors.json) to populate the model selection prompt.
1704
- * @param {string} [architecture] - Current architecture ('transformers', 'diffusors', etc.)
1705
- * @private
1706
- */
1707
- _queryMcpForModels(architecture) {
1708
- const cm = this.configManager;
1709
- if (!cm) return;
1710
-
1711
- const mcpServers = cm.getMcpServerNames();
1712
- if (!mcpServers.includes('model-picker')) return;
1713
-
1714
- try {
1715
- const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
1716
- if (!fs.existsSync(mcpConfigPath)) return;
1717
-
1718
- const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
1719
- const serverConfig = mcpConfig.mcpServers?.['model-picker'];
1720
- if (!serverConfig?.args?.length) return;
1721
-
1722
- // Resolve the server entry point directory from the args
1723
- const serverEntryPoint = serverConfig.args[serverConfig.args.length - 1];
1724
- const serverDir = path.dirname(serverEntryPoint);
1725
-
1726
- // Read manifest to find catalog path
1727
- const manifestPath = path.join(serverDir, 'manifest.json');
1728
- if (!fs.existsSync(manifestPath)) return;
1729
-
1730
- const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
1731
-
1732
- // Select catalog based on architecture
1733
- const catalogKey = architecture === 'diffusors'
1734
- ? 'popular-diffusors'
1735
- : 'popular-transformers';
1736
- const catalogRelPath = manifest.catalogs?.[catalogKey];
1737
- if (!catalogRelPath) return;
1738
-
1739
- const catalogPath = path.resolve(serverDir, catalogRelPath);
1740
- if (!fs.existsSync(catalogPath)) return;
1741
-
1742
- const catalog = JSON.parse(fs.readFileSync(catalogPath, 'utf8'));
1743
-
1744
- // Extract model IDs, filtering out glob patterns (entries with *)
1745
- const modelIds = Object.keys(catalog).filter(id => !id.includes('*'));
1746
-
1747
- if (modelIds.length > 0) {
1748
- this._mcpModelChoices = modelIds;
1749
- }
1750
- } catch {
1751
- // Silently fall back to hardcoded defaults
1752
- }
1753
- }
1754
-
1755
1021
  /**
1756
1022
  * Get framework version choices from registry
1757
1023
  * Requirements: 2.1, 2.6, 8.2, 8.3
@@ -1916,735 +1182,6 @@ export default class PromptRunner {
1916
1182
  return choices;
1917
1183
  }
1918
1184
 
1919
- /**
1920
- * Fetch and display model information from HuggingFace API and Model Registry
1921
- * Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.11, 11.1, 11.2, 11.3, 11.5, 11.6, 11.7
1922
- * @private
1923
- */
1924
- async _fetchAndDisplayModelInfo(modelId) {
1925
- console.log('\n 🔍 Querying model-picker [discover]...');
1926
-
1927
- const sources = [];
1928
- let chatTemplate = null;
1929
- let modelFamily = null;
1930
- let mcpUsed = false;
1931
-
1932
- // Try model-picker MCP server in discover mode (queries HuggingFace + merges with catalog)
1933
- const cm = this.configManager;
1934
- if (cm) {
1935
- const mcpServers = cm.getMcpServerNames();
1936
- if (mcpServers.includes('model-picker')) {
1937
- try {
1938
- const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
1939
- if (fs.existsSync(mcpConfigPath)) {
1940
- const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
1941
- const serverConfig = mcpConfig.mcpServers?.['model-picker'];
1942
- if (serverConfig) {
1943
- const { McpClient } = await import('./mcp-client.js');
1944
- const client = new McpClient(serverConfig, { timeout: 15000 });
1945
-
1946
- // Override _buildContext to pass model_id and mode directly
1947
- client._getUnboundedParameterNames = () => [];
1948
- client._buildContext = () => ({});
1949
-
1950
- // Connect and call get_models directly
1951
- const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
1952
- const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
1953
-
1954
- const transport = new StdioClientTransport({
1955
- command: serverConfig.command,
1956
- args: resolveMcpArgs(serverConfig.args),
1957
- env: { ...process.env, ...(serverConfig.env || {}) },
1958
- stderr: 'pipe'
1959
- });
1960
-
1961
- const mcpClient = new Client(
1962
- { name: 'ml-container-creator', version: '1.0.0' },
1963
- { capabilities: {} }
1964
- );
1965
-
1966
- await mcpClient.connect(transport);
1967
-
1968
- const result = await mcpClient.callTool({
1969
- name: 'get_models',
1970
- arguments: { model_id: modelId, mode: 'discover' }
1971
- });
1972
-
1973
- await mcpClient.close();
1974
-
1975
- // Parse the response
1976
- const textBlock = result?.content?.find(b => b.type === 'text');
1977
- if (textBlock) {
1978
- const parsed = JSON.parse(textBlock.text);
1979
- if (parsed.values && Object.keys(parsed.values).length > 0) {
1980
- mcpUsed = true;
1981
- const vals = parsed.values;
1982
-
1983
- if (vals.chat_template) {
1984
- chatTemplate = vals.chat_template;
1985
- }
1986
- if (vals.family) {
1987
- modelFamily = vals.family;
1988
- }
1989
-
1990
- // Extract model_type for architecture validation
1991
- // Requirements: 4.1
1992
- if (vals.model_type) {
1993
- this._modelType = vals.model_type;
1994
- }
1995
-
1996
- // Extract model source metadata for loading adapter
1997
- // Requirements: 2.1, 2.2, 2.3, 2.4
1998
- if (vals.provider) {
1999
- this._mcpModelSource = vals.provider;
2000
- }
2001
- if (vals.artifactUri) {
2002
- this._mcpArtifactUri = vals.artifactUri;
2003
- }
2004
-
2005
- // Determine sources based on what was returned
2006
- if (vals.tags || vals.pipeline_tag) {
2007
- sources.push('HuggingFace_Hub_API');
2008
- }
2009
- if (vals.validation_level || vals.framework_compatibility) {
2010
- sources.push('Model_Picker_Catalog');
2011
- }
2012
- if (sources.length === 0) {
2013
- sources.push('model-picker');
2014
- }
2015
- console.log(` ✓ Resolved: ${modelId}`);
2016
- } else if (parsed.message) {
2017
- console.log(` ↳ ${parsed.message}`);
2018
- }
2019
- }
2020
- }
2021
- }
2022
- } catch (err) {
2023
- console.log(' ↳ model-picker unavailable, using fallback');
2024
- }
2025
- }
2026
- }
2027
-
2028
- // Fallback to legacy path if MCP didn't resolve
2029
- if (!mcpUsed) {
2030
- const registryConfigManager = this.registryConfigManager;
2031
- if (registryConfigManager) {
2032
- // Only try HuggingFace API for bare model IDs (not prefixed URIs)
2033
- const isNonHfUri = modelId.startsWith('s3://') ||
2034
- modelId.startsWith('registry://');
2035
-
2036
- if (!isNonHfUri) {
2037
- // Try HuggingFace API directly
2038
- try {
2039
- const hfData = await registryConfigManager._fetchHuggingFaceData(modelId);
2040
- if (hfData) {
2041
- sources.push('HuggingFace_Hub_API');
2042
- if (hfData.chatTemplate) {
2043
- chatTemplate = hfData.chatTemplate;
2044
- }
2045
- // Extract model_type for architecture validation
2046
- // Requirements: 4.1
2047
- if (hfData.modelConfig?.model_type) {
2048
- this._modelType = hfData.modelConfig.model_type;
2049
- }
2050
- console.log(' ✅ Found on HuggingFace Hub');
2051
- } else {
2052
- console.log(' ℹ️ Not found on HuggingFace Hub (may be private or offline)');
2053
- }
2054
- } catch (error) {
2055
- console.log(' ⚠️ HuggingFace API unavailable');
2056
- }
2057
- } else {
2058
- // Non-HF URI (s3://, registry://, etc.) — skip HF lookup silently
2059
- // The summary at the end of this function will report "No additional model information"
2060
- }
2061
-
2062
- // Check Model Registry for overrides
2063
- if (registryConfigManager.modelRegistry) {
2064
- let modelConfig = registryConfigManager.modelRegistry[modelId];
2065
-
2066
- if (!modelConfig) {
2067
- for (const [pattern, config] of Object.entries(registryConfigManager.modelRegistry)) {
2068
- if (pattern.includes('*')) {
2069
- const regex = new RegExp(`^${ pattern.replace(/\*/g, '.*') }$`);
2070
- if (regex.test(modelId)) {
2071
- modelConfig = config;
2072
- console.log(` ✅ Matched pattern in Model_Registry: ${pattern}`);
2073
- break;
2074
- }
2075
- }
2076
- }
2077
- } else {
2078
- console.log(' ✅ Found in Model_Registry');
2079
- }
2080
-
2081
- if (modelConfig) {
2082
- sources.push('Model_Registry');
2083
- if (modelConfig.chatTemplate) {
2084
- chatTemplate = modelConfig.chatTemplate;
2085
- }
2086
- if (modelConfig.family) {
2087
- modelFamily = modelConfig.family;
2088
- }
2089
- }
2090
- }
2091
- }
2092
- }
2093
-
2094
- // Display information
2095
- if (sources.length > 0) {
2096
- console.log('\n📋 Model Information:');
2097
- console.log(` • Model ID: ${modelId}`);
2098
- if (modelFamily) {
2099
- console.log(` • Family: ${modelFamily}`);
2100
- }
2101
- if (chatTemplate) {
2102
- console.log(' • Chat Template: ✅ Available');
2103
- console.log(' (Will be injected into generated files)');
2104
- } else {
2105
- console.log(' • Chat Template: ❌ Not available');
2106
- console.log(' (Chat endpoints may require manual configuration)');
2107
- }
2108
- console.log(` • Sources: ${sources.join(', ')}`);
2109
- } else {
2110
- console.log(' ℹ️ No additional model information available');
2111
- console.log(' Proceeding with default configuration');
2112
- }
2113
- }
2114
-
2115
-
2116
-
2117
- /**
2118
- * Validate and display instance type compatibility
2119
- * Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6
2120
- * @private
2121
- */
2122
- async _validateAndDisplayInstanceType(instanceType, framework, version) {
2123
- const registryConfigManager = this.registryConfigManager;
2124
-
2125
- if (!registryConfigManager) {
2126
- return;
2127
- }
2128
-
2129
- // Get framework configuration
2130
- const frameworkConfig = registryConfigManager.frameworkRegistry?.[framework]?.[version];
2131
- if (!frameworkConfig) {
2132
- return; // No framework config, skip validation
2133
- }
2134
-
2135
- console.log(`\n🔍 Validating instance type: ${instanceType}`);
2136
-
2137
- // Validate instance type
2138
- const validationResult = registryConfigManager.validateInstanceType(instanceType, frameworkConfig);
2139
-
2140
- if (validationResult.compatible) {
2141
- console.log(' ✅ Instance type is compatible');
2142
- if (validationResult.info) {
2143
- console.log(` ℹ️ ${validationResult.info}`);
2144
- }
2145
- } else {
2146
- console.log(' ❌ Instance type compatibility issue detected');
2147
- if (validationResult.error) {
2148
- console.log(` Error: ${validationResult.error}`);
2149
- }
2150
- if (validationResult.recommendations && validationResult.recommendations.length > 0) {
2151
- console.log(` 💡 Recommended instances: ${validationResult.recommendations.join(', ')}`);
2152
- }
2153
-
2154
- // In test mode or non-interactive mode, throw error instead of prompting
2155
- if (this.options.skipPrompts || process.env.NODE_ENV === 'test') {
2156
- throw new Error('Instance type validation failed. Please select a compatible instance type.');
2157
- }
2158
-
2159
- // Ask user if they want to proceed
2160
- const proceed = await this._runPrompts([{
2161
- type: 'confirm',
2162
- name: 'proceedWithIncompatible',
2163
- message: 'Instance type may not be compatible. Proceed anyway?',
2164
- default: false
2165
- }]);
2166
-
2167
- if (!proceed.proceedWithIncompatible) {
2168
- throw new Error('Instance type validation failed. Please select a compatible instance type.');
2169
- }
2170
- }
2171
-
2172
- if (validationResult.warning) {
2173
- console.log(` ⚠️ Warning: ${validationResult.warning}`);
2174
- }
2175
- }
2176
-
2177
- /**
2178
- * Run secret prompts using the Secret_Classification registry.
2179
- * For each secret type whose stages apply to the current context:
2180
- * - Query for managed secrets of that type
2181
- * - If managed secrets exist: show selection list (secrets + "Enter plaintext token" + "Skip")
2182
- * - If no managed secrets exist: fall back to existing plaintext prompt
2183
- *
2184
- * Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9
2185
- * @param {object} previousAnswers - Answers from previous prompt phases
2186
- * @param {object} explicitConfig - Explicit CLI/config values
2187
- * @param {object} existingConfig - Existing project configuration
2188
- * @returns {Promise<object>} Object with token/ARN values keyed by config field names
2189
- * @private
2190
- */
2191
- async _runSecretPrompts(previousAnswers, explicitConfig, existingConfig) {
2192
- const results = {};
2193
-
2194
- for (const classification of SECRET_CLASSIFICATIONS) {
2195
- // Check if this secret type's stages apply to the current context
2196
- if (!this._secretStagesApply(classification, previousAnswers)) continue;
2197
-
2198
- // Determine the config keys for this classification
2199
- const arnConfigKey = this._getArnConfigKey(classification);
2200
- const plaintextConfigKey = this._getPlaintextConfigKey(classification);
2201
-
2202
- // Skip if ARN already provided via CLI flag
2203
- if (explicitConfig[arnConfigKey]) {
2204
- results[arnConfigKey] = explicitConfig[arnConfigKey];
2205
- continue;
2206
- }
2207
-
2208
- // Skip if plaintext already provided via CLI flag
2209
- if (explicitConfig[plaintextConfigKey]) {
2210
- results[plaintextConfigKey] = explicitConfig[plaintextConfigKey];
2211
- continue;
2212
- }
2213
-
2214
- // Query for existing managed secrets of this type
2215
- const managedSecrets = await this._listManagedSecrets(classification.identifier);
2216
-
2217
- if (managedSecrets.length > 0) {
2218
- // Show selection list: managed secrets + plaintext entry + skip
2219
- const answer = await this._promptSecretSelection(classification, managedSecrets, previousAnswers);
2220
- Object.assign(results, answer);
2221
- } else {
2222
- // Fall back to existing plaintext prompt
2223
- const answer = await this._promptPlaintextFallback(classification, previousAnswers, explicitConfig, existingConfig);
2224
- Object.assign(results, answer);
2225
- }
2226
- }
2227
-
2228
- return results;
2229
- }
2230
-
2231
- /**
2232
- * Determine if a secret classification's stages apply to the current generation context.
2233
- * Build-time secrets apply when the project involves a Docker build step.
2234
- * Runtime secrets apply when the architecture uses HuggingFace Hub models.
2235
- * Requirements: 8.9
2236
- * @param {object} classification - Secret classification entry
2237
- * @param {object} answers - Current answers from previous phases
2238
- * @returns {boolean} True if the secret type is applicable
2239
- * @private
2240
- */
2241
- _secretStagesApply(classification, answers) {
2242
- const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
2243
- const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
2244
-
2245
- if (classification.identifier === 'hf-token') {
2246
- // HF token applies to transformers, diffusors, and Triton LLM backends
2247
- const isTransformers = architecture === 'transformers';
2248
- const isDiffusors = architecture === 'diffusors';
2249
- const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
2250
-
2251
- if (!isTransformers && !isDiffusors && !isTritonLlm) return false;
2252
-
2253
- // Skip for non-HuggingFace model sources
2254
- const modelSource = answers.modelSource;
2255
- if (modelSource && modelSource !== 'huggingface') return false;
2256
-
2257
- return true;
2258
- }
2259
-
2260
- if (classification.identifier === 'ngc-token') {
2261
- // NGC token only applies to transformers-tensorrt-llm (build-time only)
2262
- if (architecture === 'triton') return false;
2263
- if (architecture === 'diffusors') return false;
2264
- return architecture === 'transformers' && backend === 'tensorrt-llm';
2265
- }
2266
-
2267
- // For future secret types, check if any stage applies
2268
- // Build-time applies to all Docker-based deployments
2269
- // Runtime applies to architectures that download at startup
2270
- return classification.stages.length > 0;
2271
- }
2272
-
2273
- /**
2274
- * Get the ARN config key for a classification.
2275
- * Maps classification identifiers to config field names.
2276
- * @param {object} classification - Secret classification entry
2277
- * @returns {string} Config key for the ARN value
2278
- * @private
2279
- */
2280
- _getArnConfigKey(classification) {
2281
- const keyMap = {
2282
- 'hf-token': 'hfTokenArn',
2283
- 'ngc-token': 'ngcTokenArn'
2284
- };
2285
- return keyMap[classification.identifier] || `${classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase())}Arn`;
2286
- }
2287
-
2288
- /**
2289
- * Get the plaintext config key for a classification.
2290
- * Maps classification identifiers to config field names.
2291
- * @param {object} classification - Secret classification entry
2292
- * @returns {string} Config key for the plaintext value
2293
- * @private
2294
- */
2295
- _getPlaintextConfigKey(classification) {
2296
- const keyMap = {
2297
- 'hf-token': 'hfToken',
2298
- 'ngc-token': 'ngcApiKey'
2299
- };
2300
- return keyMap[classification.identifier] || classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
2301
- }
2302
-
2303
- /**
2304
- * List managed secrets of a given type from AWS Secrets Manager.
2305
- * Uses the active bootstrap profile to query for secrets tagged with
2306
- * the mlcc:secret-type matching the given identifier.
2307
- * @param {string} secretType - The secret type identifier (e.g., 'hf-token')
2308
- * @returns {Promise<Array<{name: string, arn: string}>>} Array of managed secrets
2309
- * @private
2310
- */
2311
- async _listManagedSecrets(secretType) {
2312
- try {
2313
- const bootstrapConfig = new BootstrapConfig();
2314
- const activeProfile = bootstrapConfig.getActiveProfile();
2315
- if (!activeProfile) return [];
2316
-
2317
- const profile = activeProfile.config.awsProfile;
2318
- const region = activeProfile.config.awsRegion;
2319
- if (!profile || !region) return [];
2320
-
2321
- const command = `aws secretsmanager list-secrets --filters Key=tag-key,Values=mlcc:managed-by Key=tag-value,Values=ml-container-creator --region ${region} --profile ${profile} --output json`;
2322
- const output = execSync(command, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 });
2323
- const trimmed = output.trim();
2324
- if (!trimmed) return [];
2325
-
2326
- const result = JSON.parse(trimmed);
2327
- const secrets = result.SecretList || [];
2328
-
2329
- // Filter by secret type tag
2330
- return secrets
2331
- .filter(secret => {
2332
- const typeTag = (secret.Tags || []).find(t => t.Key === 'mlcc:secret-type');
2333
- return typeTag && typeTag.Value === secretType;
2334
- })
2335
- .map(secret => ({
2336
- name: secret.Name,
2337
- arn: secret.ARN
2338
- }));
2339
- } catch {
2340
- // If AWS CLI fails (not configured, no credentials, etc.), return empty
2341
- return [];
2342
- }
2343
- }
2344
-
2345
- /**
2346
- * Display a selection list for managed secrets of a given type.
2347
- * Shows available secrets plus options for plaintext entry and skip.
2348
- * Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6
2349
- * @param {object} classification - Secret classification entry
2350
- * @param {Array<{name: string, arn: string}>} managedSecrets - Available managed secrets
2351
- * @param {object} previousAnswers - Answers from previous phases
2352
- * @returns {Promise<object>} Object with the selected value keyed by config field name
2353
- * @private
2354
- */
2355
- async _promptSecretSelection(classification, managedSecrets, previousAnswers) {
2356
- const arnConfigKey = this._getArnConfigKey(classification);
2357
-
2358
- console.log(`\n🔐 ${classification.displayName}`);
2359
- console.log(` ${classification.purpose}`);
2360
-
2361
- // Build choices: managed secrets + enter plaintext + skip
2362
- const choices = [
2363
- ...managedSecrets.map(secret => ({
2364
- name: `🔒 ${secret.name} (${secret.arn})`,
2365
- value: secret.arn,
2366
- short: secret.name
2367
- })),
2368
- { name: '✏️ Enter plaintext token', value: '__plaintext__', short: 'Plaintext' },
2369
- { name: '⏭️ Skip (use environment variable)', value: '__skip__', short: 'Skip' }
2370
- ];
2371
-
2372
- const { secretSelection } = await this._runPrompts([{
2373
- type: 'list',
2374
- name: 'secretSelection',
2375
- message: `Select ${classification.promptLabel}:`,
2376
- choices
2377
- }]);
2378
-
2379
- if (secretSelection === '__skip__') {
2380
- return {};
2381
- }
2382
-
2383
- if (secretSelection === '__plaintext__') {
2384
- // Use existing plaintext flow
2385
- return this._promptPlaintextEntry(classification, previousAnswers);
2386
- }
2387
1185
 
2388
- // User selected a managed secret ARN
2389
- return { [arnConfigKey]: secretSelection };
2390
- }
2391
-
2392
- /**
2393
- * Prompt for plaintext token entry with ARN detection.
2394
- * If the user enters an ARN, store it as an ARN reference.
2395
- * Requirements: 8.4, 8.5, 8.6
2396
- * @param {object} classification - Secret classification entry
2397
- * @param {object} previousAnswers - Answers from previous phases
2398
- * @returns {Promise<object>} Object with the value keyed by config field name
2399
- * @private
2400
- */
2401
- async _promptPlaintextEntry(classification, _previousAnswers) {
2402
- const arnConfigKey = this._getArnConfigKey(classification);
2403
- const plaintextConfigKey = this._getPlaintextConfigKey(classification);
2404
-
2405
- const { tokenValue } = await this._runPrompts([{
2406
- type: 'input',
2407
- name: 'tokenValue',
2408
- message: `${classification.promptLabel} (enter token, ARN, or leave empty):`,
2409
- validate: (input) => {
2410
- // Empty is valid
2411
- if (!input || input.trim() === '') return true;
2412
- // Environment variable reference is valid
2413
- if (input.trim().startsWith('$')) return true;
2414
- return true;
2415
- }
2416
- }]);
2417
-
2418
- if (!tokenValue || tokenValue.trim() === '') {
2419
- return {};
2420
- }
2421
-
2422
- const value = tokenValue.trim();
2423
-
2424
- // ARN detection: if the value is a Secrets Manager ARN, store as ARN
2425
- if (isSecretsManagerArn(value)) {
2426
- return { [arnConfigKey]: value };
2427
- }
2428
-
2429
- // Otherwise store as plaintext
2430
- return { [plaintextConfigKey]: value };
2431
- }
2432
-
2433
- /**
2434
- * Fall back to existing plaintext prompt when no managed secrets exist.
2435
- * Uses the same prompts as the original hfTokenPrompts/ngcApiKeyPrompts
2436
- * but with ARN detection on the input.
2437
- * Requirements: 8.7
2438
- * @param {object} classification - Secret classification entry
2439
- * @param {object} previousAnswers - Answers from previous phases
2440
- * @param {object} explicitConfig - Explicit CLI/config values
2441
- * @param {object} existingConfig - Existing project configuration
2442
- * @returns {Promise<object>} Object with the value keyed by config field name
2443
- * @private
2444
- */
2445
- async _promptPlaintextFallback(classification, _previousAnswers, _explicitConfig, _existingConfig) {
2446
- const arnConfigKey = this._getArnConfigKey(classification);
2447
- const plaintextConfigKey = this._getPlaintextConfigKey(classification);
2448
-
2449
- // If in auto-prompt mode, skip
2450
- if (this.configManager?.isAutoPrompt()) {
2451
- return {};
2452
- }
2453
-
2454
- // Display context-appropriate security message
2455
- if (classification.identifier === 'hf-token') {
2456
- console.log('\n🔐 HuggingFace Authentication');
2457
- console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
2458
- console.log(' 💡 Tip: Use `ml-container-creator secrets create --type hf-token` to store');
2459
- console.log(' your token in AWS Secrets Manager for zero-knowledge operation.');
2460
- console.log(' For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.\n');
2461
- } else if (classification.identifier === 'ngc-token') {
2462
- console.log('\n🔐 NVIDIA NGC Authentication');
2463
- console.log(' TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
2464
- console.log(' 💡 Tip: Use `ml-container-creator secrets create --type ngc-token` to store');
2465
- console.log(' your key in AWS Secrets Manager for zero-knowledge operation.');
2466
- console.log(' For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
2467
- } else {
2468
- console.log(`\n🔐 ${classification.displayName}`);
2469
- console.log(` ${classification.purpose}\n`);
2470
- }
2471
-
2472
- const { tokenValue } = await this._runPrompts([{
2473
- type: 'input',
2474
- name: 'tokenValue',
2475
- message: `${classification.promptLabel} (enter token, ARN, "$${classification.envVar}" for env var, or leave empty):`,
2476
- validate: (input) => {
2477
- if (!input || input.trim() === '') return true;
2478
- if (input.trim().startsWith('$')) return true;
2479
- // Warn about HF token format
2480
- if (classification.identifier === 'hf-token' && !input.startsWith('hf_') && !isSecretsManagerArn(input)) {
2481
- console.warn('\n⚠️ Warning: HuggingFace tokens typically start with "hf_"');
2482
- console.warn(' If this is intentional, you can ignore this warning.');
2483
- }
2484
- return true;
2485
- }
2486
- }]);
2487
-
2488
- if (!tokenValue || tokenValue.trim() === '') {
2489
- return {};
2490
- }
2491
-
2492
- const value = tokenValue.trim();
2493
-
2494
- // ARN detection: if the value is a Secrets Manager ARN, store as ARN
2495
- if (isSecretsManagerArn(value)) {
2496
- return { [arnConfigKey]: value };
2497
- }
2498
-
2499
- // Otherwise store as plaintext
2500
- return { [plaintextConfigKey]: value };
2501
- }
2502
-
2503
- /**
2504
- * CUDA-to-AMI mapping.
2505
- * Maps CUDA major.minor versions to the SageMaker inference AMI that provides
2506
- * the matching CUDA driver. Derived from the framework registry patterns.
2507
- * @private
2508
- */
2509
- static CUDA_AMI_MAP = {
2510
- '11.0': 'al2-ami-sagemaker-inference-gpu-2',
2511
- '11.4': 'al2-ami-sagemaker-inference-gpu-2-1',
2512
- '11.8': 'al2-ami-sagemaker-inference-gpu-2-1',
2513
- '12.1': 'al2-ami-sagemaker-inference-gpu-3-1',
2514
- '12.2': 'al2-ami-sagemaker-inference-gpu-3-1',
2515
- '12.4': 'al2-ami-sagemaker-inference-gpu-3-1',
2516
- '12.6': 'al2-ami-sagemaker-inference-gpu-3-1',
2517
- '13.0': 'al2023-ami-sagemaker-inference-gpu-4-1'
2518
- };
2519
-
2520
- /**
2521
- * Prompt the user to select a CUDA version when the selected GPU instance
2522
- * supports multiple versions. The choice transparently resolves to the
2523
- * correct SageMaker inference AMI.
2524
- *
2525
- * When a base image CUDA version is provided, auto-resolves by intersecting
2526
- * with the instance's supported versions. Removes the CUDA prompt from the
2527
- * interactive flow when auto-resolution succeeds.
2528
- *
2529
- * Skipped for CPU instances, non-CUDA accelerators, or when only one
2530
- * compatible CUDA version exists.
2531
- *
2532
- * @param {string} instanceType - Selected instance type (e.g. "ml.g5.2xlarge")
2533
- * @param {string} framework - Selected framework name
2534
- * @param {string} frameworkVersion - Selected framework version
2535
- * @param {string} [baseImageCuda] - CUDA version from selected base image (for auto-resolution)
2536
- * @returns {Promise<{cudaVersion: string, inferenceAmiVersion: string}|null>}
2537
- * @private
2538
- */
2539
- async _promptCudaVersion(instanceType, framework, frameworkVersion, baseImageCuda) {
2540
- if (!instanceType) return null;
2541
-
2542
- // Look up instance in accelerator mapping
2543
- const instanceInfo = this._instanceAcceleratorMapping[instanceType];
2544
- if (!instanceInfo || instanceInfo.accelerator.type !== 'cuda') return null;
2545
-
2546
- const instanceCudaVersions = instanceInfo.accelerator.versions;
2547
- if (!instanceCudaVersions || instanceCudaVersions.length === 0) return null;
2548
-
2549
- // Auto-resolution: when base image specifies a CUDA version, intersect with instance support
2550
- // Requirements: 3.11, 4.9, 4.10, 4.11
2551
- if (baseImageCuda) {
2552
- const majorRequired = baseImageCuda.split('.')[0];
2553
- const intersection = instanceCudaVersions.filter(v => {
2554
- if (v === baseImageCuda) return true;
2555
- if (v.startsWith(`${majorRequired }.`)) return true;
2556
- return false;
2557
- });
2558
-
2559
- if (intersection.length > 0) {
2560
- // Auto-select: pick exact match or highest compatible
2561
- const exactMatch = intersection.find(v => v === baseImageCuda);
2562
- const selectedVersion = exactMatch || intersection.sort().pop();
2563
- const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[selectedVersion];
2564
- if (inferenceAmiVersion) {
2565
- console.log(`\n🔧 CUDA ${selectedVersion} auto-resolved from base image (requires ${baseImageCuda})`);
2566
- console.log(` AMI: ${inferenceAmiVersion}`);
2567
- return { cudaVersion: selectedVersion, inferenceAmiVersion };
2568
- }
2569
- } else {
2570
- // No intersection — warn and fall through to manual prompt
2571
- console.log(`\n ⚠️ Base image requires CUDA ${baseImageCuda} but instance ${instanceType} supports: ${instanceCudaVersions.join(', ')}`);
2572
- console.log(' No compatible CUDA version found. Falling back to manual selection.');
2573
- }
2574
- }
2575
-
2576
- // Get framework CUDA requirements (if available)
2577
- const registryConfigManager = this.registryConfigManager;
2578
- const frameworkConfig = registryConfigManager?.frameworkRegistry?.[framework]?.[frameworkVersion];
2579
- const frameworkAccel = frameworkConfig?.accelerator;
2580
-
2581
- // Compute compatible CUDA versions: intersection of instance support and framework range
2582
- let compatibleVersions;
2583
- if (frameworkAccel?.versionRange) {
2584
- const { min, max } = frameworkAccel.versionRange;
2585
- compatibleVersions = instanceCudaVersions.filter(v => {
2586
- return v >= min && v <= max;
2587
- });
2588
- } else {
2589
- compatibleVersions = [...instanceCudaVersions];
2590
- }
2591
-
2592
- if (compatibleVersions.length === 0) {
2593
- // No overlap — fall back to all instance versions (validation already warned)
2594
- compatibleVersions = [...instanceCudaVersions];
2595
- }
2596
-
2597
- // If only one option, auto-select it silently
2598
- if (compatibleVersions.length === 1) {
2599
- const cudaVersion = compatibleVersions[0];
2600
- const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[cudaVersion];
2601
- if (inferenceAmiVersion) {
2602
- console.log(`\n🔧 CUDA ${cudaVersion} auto-selected (only compatible version for ${instanceType})`);
2603
- console.log(` AMI: ${inferenceAmiVersion}`);
2604
- }
2605
- return inferenceAmiVersion ? { cudaVersion, inferenceAmiVersion } : null;
2606
- }
2607
-
2608
- // Multiple options — let the user choose (or auto-select in auto-prompt mode)
2609
- const defaultVersion = frameworkAccel?.version
2610
- && compatibleVersions.includes(frameworkAccel.version)
2611
- ? frameworkAccel.version
2612
- : instanceInfo.accelerator.default || compatibleVersions[compatibleVersions.length - 1];
2613
-
2614
- // In auto-prompt mode, auto-select the default without prompting
2615
- if (this.configManager?.isAutoPrompt()) {
2616
- const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[defaultVersion];
2617
- if (inferenceAmiVersion) {
2618
- console.log(`\n🔧 CUDA ${defaultVersion} auto-selected (auto-prompt mode)`);
2619
- console.log(` AMI: ${inferenceAmiVersion}`);
2620
- }
2621
- return inferenceAmiVersion ? { cudaVersion: defaultVersion, inferenceAmiVersion } : null;
2622
- }
2623
-
2624
- const choices = compatibleVersions.map(v => {
2625
- const ami = PromptRunner.CUDA_AMI_MAP[v] || 'unknown';
2626
- const isDefault = v === defaultVersion ? ' (recommended)' : '';
2627
- return {
2628
- name: `CUDA ${v}${isDefault} → AMI: ${ami}`,
2629
- value: v,
2630
- short: `CUDA ${v}`
2631
- };
2632
- });
2633
-
2634
- const { cudaVersion } = await this._runPrompts([{
2635
- type: 'list',
2636
- name: 'cudaVersion',
2637
- message: `Select CUDA version for ${instanceType} (${instanceInfo.accelerator.hardware}):`,
2638
- choices,
2639
- default: defaultVersion
2640
- }]);
2641
-
2642
- const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[cudaVersion];
2643
- if (inferenceAmiVersion) {
2644
- console.log(` ✅ CUDA ${cudaVersion} → AMI: ${inferenceAmiVersion}`);
2645
- }
2646
-
2647
- return inferenceAmiVersion ? { cudaVersion, inferenceAmiVersion } : null;
2648
- }
2649
1186
  }
2650
1187