@aws/ml-container-creator 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +86 -7
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/buildspec.yml +60 -0
  5. package/package.json +3 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +42 -2
  8. package/servers/instance-sizer/lib/instance-ranker.js +114 -10
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +15 -15
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +71 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +9 -1
  16. package/src/app.js +109 -3
  17. package/src/lib/bootstrap-command-handler.js +96 -3
  18. package/src/lib/cli-handler.js +2 -2
  19. package/src/lib/config-manager.js +117 -1
  20. package/src/lib/deployment-entry-schema.js +16 -0
  21. package/src/lib/prompt-runner.js +270 -12
  22. package/src/lib/prompts.js +288 -6
  23. package/src/lib/registry-command-handler.js +12 -0
  24. package/src/lib/schema-sync.js +31 -0
  25. package/src/lib/template-manager.js +49 -1
  26. package/src/lib/validate-runner.js +125 -2
  27. package/templates/Dockerfile +22 -2
  28. package/templates/code/cuda_compat.sh +22 -0
  29. package/templates/code/serve +3 -0
  30. package/templates/code/serving.properties +14 -0
  31. package/templates/code/start_server.sh +3 -0
  32. package/templates/diffusors/Dockerfile +2 -1
  33. package/templates/diffusors/serve +3 -0
  34. package/templates/do/README.md +33 -0
  35. package/templates/do/adapter +1214 -0
  36. package/templates/do/adapters/.gitkeep +2 -0
  37. package/templates/do/add-ic +130 -0
  38. package/templates/do/benchmark +718 -0
  39. package/templates/do/clean +593 -17
  40. package/templates/do/config +49 -4
  41. package/templates/do/deploy +513 -362
  42. package/templates/do/ic/default.conf +32 -0
  43. package/templates/do/lib/endpoint-config.sh +216 -0
  44. package/templates/do/lib/inference-component.sh +167 -0
  45. package/templates/do/lib/secrets.sh +44 -0
  46. package/templates/do/lib/wait.sh +131 -0
  47. package/templates/do/logs +107 -27
  48. package/templates/do/optimize +528 -0
  49. package/templates/do/register +119 -2
  50. package/templates/do/status +337 -0
  51. package/templates/do/test +80 -28
  52. package/templates/triton/Dockerfile +5 -0
@@ -46,6 +46,33 @@
46
46
  "minVramGb": 184,
47
47
  "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
48
48
  },
49
+ "meta-llama/Llama-3.1-8B*": {
50
+ "parameterCount": 8030261248,
51
+ "defaultDtype": "bfloat16",
52
+ "architecture": "LlamaForCausalLM",
53
+ "maxPositionEmbeddings": 131072,
54
+ "recommendedQuantizations": ["awq", "gptq"],
55
+ "minVramGb": 20,
56
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
57
+ },
58
+ "meta-llama/Llama-3.2-1B*": {
59
+ "parameterCount": 1235814400,
60
+ "defaultDtype": "bfloat16",
61
+ "architecture": "LlamaForCausalLM",
62
+ "maxPositionEmbeddings": 131072,
63
+ "recommendedQuantizations": ["awq", "gptq"],
64
+ "minVramGb": 5,
65
+ "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
66
+ },
67
+ "meta-llama/Llama-3.2-3B*": {
68
+ "parameterCount": 3212749824,
69
+ "defaultDtype": "bfloat16",
70
+ "architecture": "LlamaForCausalLM",
71
+ "maxPositionEmbeddings": 131072,
72
+ "recommendedQuantizations": ["awq", "gptq"],
73
+ "minVramGb": 9,
74
+ "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
75
+ },
49
76
  "mistralai/Mistral-7B*": {
50
77
  "parameterCount": 7241732096,
51
78
  "defaultDtype": "bfloat16",
@@ -532,6 +532,77 @@
532
532
  "text-generation"
533
533
  ]
534
534
  },
535
+ "meta-llama/Llama-3.1-8B*": {
536
+ "parameterCount": 8030261248,
537
+ "defaultDtype": "bfloat16",
538
+ "architecture": "LlamaForCausalLM",
539
+ "maxPositionEmbeddings": 131072,
540
+ "recommendedQuantizations": [
541
+ "awq",
542
+ "gptq"
543
+ ],
544
+ "modelType": "transformer",
545
+ "tasks": [
546
+ "text-generation"
547
+ ]
548
+ },
549
+ "meta-llama/Llama-3.1-70B*": {
550
+ "parameterCount": 70553706496,
551
+ "defaultDtype": "bfloat16",
552
+ "architecture": "LlamaForCausalLM",
553
+ "maxPositionEmbeddings": 131072,
554
+ "recommendedQuantizations": [
555
+ "awq",
556
+ "gptq"
557
+ ],
558
+ "modelType": "transformer",
559
+ "tasks": [
560
+ "text-generation"
561
+ ]
562
+ },
563
+ "meta-llama/Llama-3.1-405B*": {
564
+ "parameterCount": 405000000000,
565
+ "defaultDtype": "bfloat16",
566
+ "architecture": "LlamaForCausalLM",
567
+ "maxPositionEmbeddings": 131072,
568
+ "recommendedQuantizations": [
569
+ "awq",
570
+ "gptq",
571
+ "fp8"
572
+ ],
573
+ "modelType": "transformer",
574
+ "tasks": [
575
+ "text-generation"
576
+ ]
577
+ },
578
+ "meta-llama/Llama-3.2-1B*": {
579
+ "parameterCount": 1235814400,
580
+ "defaultDtype": "bfloat16",
581
+ "architecture": "LlamaForCausalLM",
582
+ "maxPositionEmbeddings": 131072,
583
+ "recommendedQuantizations": [
584
+ "awq",
585
+ "gptq"
586
+ ],
587
+ "modelType": "transformer",
588
+ "tasks": [
589
+ "text-generation"
590
+ ]
591
+ },
592
+ "meta-llama/Llama-3.2-3B*": {
593
+ "parameterCount": 3212749824,
594
+ "defaultDtype": "bfloat16",
595
+ "architecture": "LlamaForCausalLM",
596
+ "maxPositionEmbeddings": 131072,
597
+ "recommendedQuantizations": [
598
+ "awq",
599
+ "gptq"
600
+ ],
601
+ "modelType": "transformer",
602
+ "tasks": [
603
+ "text-generation"
604
+ ]
605
+ },
535
606
  "Qwen/Qwen-7B*": {
536
607
  "parameterCount": 7721324544,
537
608
  "defaultDtype": "bfloat16",
@@ -62,7 +62,15 @@
62
62
  }
63
63
  },
64
64
  "inferenceAmiVersion": {
65
- "type": "string"
65
+ "type": "string",
66
+ "enum": [
67
+ "al2023-ami-sagemaker-inference-cpu-0",
68
+ "al2-ami-sagemaker-inference-gpu-2",
69
+ "al2-ami-sagemaker-inference-gpu-2-1",
70
+ "al2-ami-sagemaker-inference-neuron-2",
71
+ "al2-ami-sagemaker-inference-gpu-3-1",
72
+ "al2023-ami-sagemaker-inference-gpu-4-1"
73
+ ]
66
74
  }
67
75
  },
68
76
  "additionalProperties": false
package/src/app.js CHANGED
@@ -302,6 +302,22 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
302
302
  ignorePatterns.push('**/hyperpod/**');
303
303
  }
304
304
 
305
+ // HyperPod is kubectl-based — no shared bash helpers or IC configs
306
+ if (answers.deploymentTarget === 'hyperpod-eks') {
307
+ ignorePatterns.push('**/do/lib/**');
308
+ ignorePatterns.push('**/do/ic/**');
309
+ ignorePatterns.push('**/do/add-ic');
310
+ ignorePatterns.push('**/do/status');
311
+ ignorePatterns.push('**/do/optimize');
312
+ }
313
+
314
+ // Async and batch don't use inference components (IC is real-time only)
315
+ if (answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform') {
316
+ ignorePatterns.push('**/do/ic/**');
317
+ ignorePatterns.push('**/do/add-ic');
318
+ ignorePatterns.push('**/do/status');
319
+ }
320
+
305
321
  // Resolve architecture
306
322
  const resolver = new DeploymentConfigResolver();
307
323
  let architecture = answers.architecture;
@@ -322,6 +338,24 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
322
338
  ignorePatterns.push('**/sample_model/**');
323
339
  }
324
340
 
341
+ // Exclude do/benchmark when benchmarking is not selected
342
+ if (!answers.includeBenchmark) {
343
+ ignorePatterns.push('**/do/benchmark');
344
+ ignorePatterns.push('**/do/optimize');
345
+ }
346
+
347
+ // Exclude do/adapter and do/adapters/ when LoRA is not enabled
348
+ if (!answers.enableLora) {
349
+ ignorePatterns.push('**/do/adapter');
350
+ ignorePatterns.push('**/do/adapters/**');
351
+ }
352
+
353
+ // Exclude do/test when hosted-model-endpoint is not selected
354
+ const testTypes = answers.testTypes || [];
355
+ if (!testTypes.includes('hosted-model-endpoint')) {
356
+ ignorePatterns.push('**/do/test');
357
+ }
358
+
325
359
  // Always exclude triton and diffusors source directories
326
360
  ignorePatterns.push('**/triton/**');
327
361
  ignorePatterns.push('**/diffusors/**');
@@ -417,6 +451,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
417
451
  _copyFile(path.join(LIB_DIR, 'manifest-cli.js'), path.join(doLibDir, 'manifest-cli.js'));
418
452
  _copyFile(path.join(LIB_DIR, 'asset-manager.js'), path.join(doLibDir, 'asset-manager.js'));
419
453
  _copyFile(path.join(LIB_DIR, 'bootstrap-config.js'), path.join(doLibDir, 'bootstrap-config.js'));
454
+
455
+ // Generate .gitignore with benchmarks/ when benchmarking is enabled
456
+ if (answers.includeBenchmark) {
457
+ const gitignorePath = path.join(destDir, '.gitignore');
458
+ const gitignoreContent = '# Benchmark results (generated by do/benchmark)\nbenchmarks/\n';
459
+ if (fs.existsSync(gitignorePath)) {
460
+ const existing = fs.readFileSync(gitignorePath, 'utf8');
461
+ if (!existing.includes('benchmarks/')) {
462
+ fs.appendFileSync(gitignorePath, `\n${gitignoreContent}`);
463
+ }
464
+ } else {
465
+ fs.writeFileSync(gitignorePath, gitignoreContent);
466
+ }
467
+ }
420
468
  }
421
469
 
422
470
  /**
@@ -529,7 +577,7 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
529
577
  codebuildProjectName: null,
530
578
  modelName: null,
531
579
  modelFormat: null,
532
- includeSampleModel: false,
580
+ includeSampleModel: true,
533
581
  includeTesting: true,
534
582
  testTypes: [],
535
583
  buildTimestamp: new Date().toISOString(),
@@ -542,7 +590,11 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
542
590
  baseImage: null,
543
591
  modelSource: 'huggingface',
544
592
  artifactUri: '',
545
- modelLoadStrategy: 'runtime'
593
+ modelLoadStrategy: 'runtime',
594
+ existingEndpointName: null,
595
+ enableLora: false,
596
+ maxLoras: 30,
597
+ maxLoraRank: 64
546
598
  };
547
599
 
548
600
  Object.entries(defaults).forEach(([key, value]) => {
@@ -641,6 +693,55 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
641
693
  // Silently continue - defaults are already set
642
694
  }
643
695
  }
696
+
697
+ // Populate baseImage from the catalog when still falsy (covers --skip-prompts and
698
+ // cases where MCP/CLI/config did not provide a base image).
699
+ // Precedence: MCP > CLI > config > catalog default (this block).
700
+ if (!answers.baseImage && registryConfigManager?.frameworkRegistry) {
701
+ const backendKey = answers.backend || answers.modelServer;
702
+ if (backendKey) {
703
+ const frameworkVersions = registryConfigManager.frameworkRegistry[backendKey];
704
+ if (frameworkVersions) {
705
+ let resolvedConfig = null;
706
+ if (answers.frameworkVersion && frameworkVersions[answers.frameworkVersion]) {
707
+ resolvedConfig = frameworkVersions[answers.frameworkVersion];
708
+ } else {
709
+ // Fall back to latest version
710
+ const versions = Object.keys(frameworkVersions).sort((a, b) =>
711
+ b.localeCompare(a, undefined, { numeric: true })
712
+ );
713
+ if (versions.length > 0) {
714
+ resolvedConfig = frameworkVersions[versions[0]];
715
+ }
716
+ }
717
+ if (resolvedConfig?.baseImage) {
718
+ answers.baseImage = resolvedConfig.baseImage;
719
+ }
720
+ }
721
+ }
722
+ }
723
+
724
+ // Populate icGpuCount from instance catalog when not explicitly set.
725
+ // The deploy template uses IC_GPU_COUNT unconditionally for NumberOfAcceleratorDevicesRequired,
726
+ // so it must always have a value for GPU deployments.
727
+ if (answers.icGpuCount == null && answers.instanceType) {
728
+ // Use gpuCount from instance-sizer recommendation if available
729
+ if (answers.gpuCount) {
730
+ answers.icGpuCount = answers.gpuCount;
731
+ } else {
732
+ // Look up from instances catalog
733
+ try {
734
+ const catalogPath = path.resolve(__dirname, '..', 'servers', 'lib', 'catalogs', 'instances.json');
735
+ const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
736
+ const instanceInfo = catalogData?.catalog?.[answers.instanceType];
737
+ if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
738
+ answers.icGpuCount = instanceInfo.gpus;
739
+ }
740
+ } catch {
741
+ // Silently continue — template fallback handles missing value
742
+ }
743
+ }
744
+ }
644
745
  }
645
746
 
646
747
  /**
@@ -977,7 +1078,12 @@ function _setExecutablePermissions(destDir) {
977
1078
  'do/submit',
978
1079
  'do/register',
979
1080
  'do/ci',
980
- 'do/manifest'
1081
+ 'do/manifest',
1082
+ 'do/benchmark',
1083
+ 'do/optimize',
1084
+ 'do/status',
1085
+ 'do/add-ic',
1086
+ 'do/adapter'
981
1087
  ];
982
1088
 
983
1089
  shellScripts.forEach(script => {
@@ -116,6 +116,11 @@ export default class BootstrapCommandHandler {
116
116
 
117
117
  console.log('\n🚀 Bootstrap — Shared AWS Infrastructure Setup\n');
118
118
 
119
+ // Verify AWS CLI v2 is installed
120
+ if (!this._verifyCliV2()) {
121
+ return;
122
+ }
123
+
119
124
  // Determine bootstrap profile name
120
125
  let profileName;
121
126
  if (nonInteractive) {
@@ -194,6 +199,9 @@ export default class BootstrapCommandHandler {
194
199
  if (stackOutputs.BatchS3BucketName) {
195
200
  profileData.batchS3Bucket = stackOutputs.BatchS3BucketName;
196
201
  }
202
+ if (stackOutputs.BenchmarkS3BucketName) {
203
+ profileData.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
204
+ }
197
205
 
198
206
  console.log(' ✅ Bootstrap stack deployed successfully');
199
207
  } catch (error) {
@@ -382,6 +390,9 @@ export default class BootstrapCommandHandler {
382
390
  if (outputs.BatchS3BucketName) {
383
391
  console.log(` ✅ S3 bucket (batch): ${outputs.BatchS3BucketName}`);
384
392
  }
393
+ if (outputs.BenchmarkS3BucketName) {
394
+ console.log(` ✅ S3 bucket (benchmark): ${outputs.BenchmarkS3BucketName}`);
395
+ }
385
396
  if (outputs.StackVersion) {
386
397
  console.log(` 📋 Stack version: ${outputs.StackVersion}`);
387
398
  }
@@ -452,6 +463,20 @@ export default class BootstrapCommandHandler {
452
463
  console.log(` ⚠️ S3 bucket: ${profile.config.batchS3Bucket} — could not validate`);
453
464
  }
454
465
  }
466
+
467
+ if (profile.config.benchmarkS3Bucket) {
468
+ try {
469
+ const benchmarkExists = this._resourceExists(
470
+ `s3api head-bucket --bucket ${profile.config.benchmarkS3Bucket}`,
471
+ profile.config.awsProfile
472
+ );
473
+ console.log(benchmarkExists
474
+ ? ` ✅ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket}`
475
+ : ` ⚠️ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — missing`);
476
+ } catch {
477
+ console.log(` ⚠️ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — could not validate`);
478
+ }
479
+ }
455
480
  }
456
481
 
457
482
  // Display deployed resources from manifest
@@ -1005,6 +1030,7 @@ export default class BootstrapCommandHandler {
1005
1030
  if (stackOutputs.EcrRepositoryName) profileConfig.ecrRepositoryName = stackOutputs.EcrRepositoryName;
1006
1031
  if (stackOutputs.AsyncS3BucketName) profileConfig.asyncS3Bucket = stackOutputs.AsyncS3BucketName;
1007
1032
  if (stackOutputs.BatchS3BucketName) profileConfig.batchS3Bucket = stackOutputs.BatchS3BucketName;
1033
+ if (stackOutputs.BenchmarkS3BucketName) profileConfig.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
1008
1034
  profileConfig.stackName = stackName;
1009
1035
 
1010
1036
  console.log(' ✅ Bootstrap stack updated');
@@ -1278,11 +1304,28 @@ export default class BootstrapCommandHandler {
1278
1304
  'sagemaker:DescribeEndpointConfig',
1279
1305
  'sagemaker:DescribeModel',
1280
1306
  'sagemaker:DescribeInferenceComponent',
1307
+ 'sagemaker:ListInferenceComponents',
1281
1308
  'sagemaker:InvokeEndpoint',
1282
1309
  'sagemaker:InvokeEndpointAsync'
1283
1310
  ],
1284
1311
  Resource: '*'
1285
1312
  },
1313
+ {
1314
+ Sid: 'SageMakerBenchmarking',
1315
+ Effect: 'Allow',
1316
+ Action: [
1317
+ 'sagemaker:CreateAIBenchmarkJob',
1318
+ 'sagemaker:DescribeAIBenchmarkJob',
1319
+ 'sagemaker:ListAIBenchmarkJobs',
1320
+ 'sagemaker:StopAIBenchmarkJob',
1321
+ 'sagemaker:DeleteAIBenchmarkJob',
1322
+ 'sagemaker:CreateAIWorkloadConfig',
1323
+ 'sagemaker:DescribeAIWorkloadConfig',
1324
+ 'sagemaker:ListAIWorkloadConfigs',
1325
+ 'sagemaker:DeleteAIWorkloadConfig'
1326
+ ],
1327
+ Resource: '*'
1328
+ },
1286
1329
  {
1287
1330
  Sid: 'ECRPull',
1288
1331
  Effect: 'Allow',
@@ -1329,6 +1372,29 @@ export default class BootstrapCommandHandler {
1329
1372
  Effect: 'Allow',
1330
1373
  Action: 'sns:Publish',
1331
1374
  Resource: 'arn:aws:sns:*:*:ml-container-creator-*'
1375
+ },
1376
+ {
1377
+ Sid: 'SecretsManagerBenchmark',
1378
+ Effect: 'Allow',
1379
+ Action: [
1380
+ 'secretsmanager:CreateSecret',
1381
+ 'secretsmanager:PutSecretValue',
1382
+ 'secretsmanager:GetSecretValue',
1383
+ 'secretsmanager:DescribeSecret'
1384
+ ],
1385
+ Resource: 'arn:aws:secretsmanager:*:*:secret:ml-container-creator/*'
1386
+ },
1387
+ {
1388
+ Sid: 'QuotaAndAvailability',
1389
+ Effect: 'Allow',
1390
+ Action: [
1391
+ 'service-quotas:GetServiceQuota',
1392
+ 'service-quotas:ListServiceQuotas',
1393
+ 'sagemaker:ListTrainingPlans',
1394
+ 'sagemaker:DescribeTrainingPlan',
1395
+ 'sagemaker:ListEndpoints'
1396
+ ],
1397
+ Resource: '*'
1332
1398
  }
1333
1399
  ]
1334
1400
  };
@@ -1478,9 +1544,15 @@ export default class BootstrapCommandHandler {
1478
1544
 
1479
1545
  /**
1480
1546
  * Optionally create S3 buckets for async/batch deployments.
1547
+ * Always creates the benchmark S3 bucket (unconditional).
1481
1548
  * @returns {Promise<object|null>} Bucket names or null if skipped
1482
1549
  */
1483
1550
  async _setupS3Buckets() {
1551
+ // Always create benchmark bucket (unconditional — avoids re-bootstrap when benchmarking is enabled later)
1552
+ const benchmarkBucketName = `ml-container-creator-benchmark-${this._currentRegion}-${this._currentAccountId}`;
1553
+ const tags = this._buildResourceTags();
1554
+ const benchmarkS3Bucket = await this._createS3Bucket(benchmarkBucketName, tags);
1555
+
1484
1556
  const { useS3 } = await this._promptFn([{
1485
1557
  type: 'confirm',
1486
1558
  name: 'useS3',
@@ -1489,17 +1561,16 @@ export default class BootstrapCommandHandler {
1489
1561
  }]);
1490
1562
 
1491
1563
  if (!useS3) {
1492
- return null;
1564
+ return { benchmarkS3Bucket };
1493
1565
  }
1494
1566
 
1495
1567
  const asyncBucketName = `ml-container-creator-async-${this._currentRegion}-${this._currentAccountId}`;
1496
1568
  const batchBucketName = `ml-container-creator-batch-${this._currentRegion}-${this._currentAccountId}`;
1497
1569
 
1498
- const tags = this._buildResourceTags();
1499
1570
  const asyncS3Bucket = await this._createS3Bucket(asyncBucketName, tags);
1500
1571
  const batchS3Bucket = await this._createS3Bucket(batchBucketName, tags);
1501
1572
 
1502
- return { asyncS3Bucket, batchS3Bucket };
1573
+ return { asyncS3Bucket, batchS3Bucket, benchmarkS3Bucket };
1503
1574
  }
1504
1575
 
1505
1576
  /**
@@ -1556,6 +1627,28 @@ export default class BootstrapCommandHandler {
1556
1627
 
1557
1628
  // ── AWS CLI helpers ─────────────────────────────────────────────
1558
1629
 
1630
+ /**
1631
+ * Verify AWS CLI v2 is installed. Returns true if v2 is detected, false otherwise.
1632
+ * Extracted as a method so tests can override it.
1633
+ * @returns {boolean}
1634
+ */
1635
+ _verifyCliV2() {
1636
+ try {
1637
+ const versionOutput = execSync('aws --version', { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
1638
+ if (!versionOutput.includes('aws-cli/2')) {
1639
+ console.log(` ❌ AWS CLI v2 is required. Detected: ${versionOutput.split(' ')[0]}`);
1640
+ console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html');
1641
+ console.log(' Some features (benchmarking, newer SageMaker APIs) require CLI v2.\n');
1642
+ return false;
1643
+ }
1644
+ return true;
1645
+ } catch {
1646
+ console.log(' ❌ AWS CLI not found.');
1647
+ console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html\n');
1648
+ return false;
1649
+ }
1650
+ }
1651
+
1559
1652
  /**
1560
1653
  * Execute an AWS CLI command and return parsed JSON output.
1561
1654
  * @param {string} command - AWS CLI command (without 'aws' prefix)
@@ -375,7 +375,7 @@ For more information, visit: https://github.com/awslabs/ml-container-creator
375
375
  type: 'confirm',
376
376
  name: 'includeSampleModel',
377
377
  message: 'Include sample model:',
378
- default: false,
378
+ default: true,
379
379
  when: answers => answers.framework !== 'transformers'
380
380
  },
381
381
  {
@@ -515,7 +515,7 @@ ml-container-creator \\
515
515
  'framework': 'sklearn',
516
516
  'modelServer': 'flask',
517
517
  'modelFormat': 'pkl',
518
- 'includeSampleModel': false,
518
+ 'includeSampleModel': true,
519
519
  'includeTesting': true,
520
520
  'testTypes': ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
521
521
  'buildTarget': 'codebuild',
@@ -979,6 +979,116 @@ export default class ConfigManager {
979
979
  default: 1.0,
980
980
  valueSpace: 'bounded',
981
981
  schemaValidated: true
982
+ },
983
+ includeBenchmark: {
984
+ cliOption: 'include-benchmark',
985
+ envVar: 'ML_INCLUDE_BENCHMARK',
986
+ configFile: true,
987
+ packageJson: false,
988
+ mcp: false,
989
+ promptable: true,
990
+ required: false,
991
+ default: false,
992
+ valueSpace: 'bounded'
993
+ },
994
+ benchmarkConcurrency: {
995
+ cliOption: 'benchmark-concurrency',
996
+ envVar: null,
997
+ configFile: true,
998
+ packageJson: false,
999
+ mcp: false,
1000
+ promptable: true,
1001
+ required: false,
1002
+ default: 10,
1003
+ valueSpace: 'bounded'
1004
+ },
1005
+ benchmarkInputTokensMean: {
1006
+ cliOption: 'benchmark-input-tokens',
1007
+ envVar: null,
1008
+ configFile: true,
1009
+ packageJson: false,
1010
+ mcp: false,
1011
+ promptable: true,
1012
+ required: false,
1013
+ default: 550,
1014
+ valueSpace: 'bounded'
1015
+ },
1016
+ benchmarkOutputTokensMean: {
1017
+ cliOption: 'benchmark-output-tokens',
1018
+ envVar: null,
1019
+ configFile: true,
1020
+ packageJson: false,
1021
+ mcp: false,
1022
+ promptable: true,
1023
+ required: false,
1024
+ default: 150,
1025
+ valueSpace: 'bounded'
1026
+ },
1027
+ benchmarkStreaming: {
1028
+ cliOption: 'benchmark-streaming',
1029
+ envVar: null,
1030
+ configFile: true,
1031
+ packageJson: false,
1032
+ mcp: false,
1033
+ promptable: true,
1034
+ required: false,
1035
+ default: true,
1036
+ valueSpace: 'bounded'
1037
+ },
1038
+ benchmarkRequestCount: {
1039
+ cliOption: 'benchmark-request-count',
1040
+ envVar: null,
1041
+ configFile: true,
1042
+ packageJson: false,
1043
+ mcp: false,
1044
+ promptable: true,
1045
+ required: false,
1046
+ default: null,
1047
+ valueSpace: 'bounded'
1048
+ },
1049
+ benchmarkS3OutputPath: {
1050
+ cliOption: 'benchmark-s3-output-path',
1051
+ envVar: 'ML_BENCHMARK_S3_OUTPUT_PATH',
1052
+ configFile: true,
1053
+ packageJson: false,
1054
+ mcp: false,
1055
+ promptable: true,
1056
+ required: false,
1057
+ default: null,
1058
+ valueSpace: 'bounded'
1059
+ },
1060
+ enableLora: {
1061
+ cliOption: 'enable-lora',
1062
+ envVar: null,
1063
+ configFile: true,
1064
+ packageJson: false,
1065
+ mcp: false,
1066
+ promptable: true,
1067
+ required: false,
1068
+ default: false,
1069
+ valueSpace: 'bounded'
1070
+ },
1071
+ maxLoras: {
1072
+ cliOption: 'max-loras',
1073
+ envVar: null,
1074
+ configFile: true,
1075
+ packageJson: false,
1076
+ mcp: false,
1077
+ promptable: true,
1078
+ required: false,
1079
+ default: 30,
1080
+ valueSpace: 'bounded'
1081
+ },
1082
+ maxLoraRank: {
1083
+ cliOption: 'max-lora-rank',
1084
+ envVar: null,
1085
+ configFile: true,
1086
+ packageJson: false,
1087
+ mcp: false,
1088
+ promptable: true,
1089
+ required: false,
1090
+ default: 64,
1091
+ valueSpace: 'bounded'
982
1092
  }
983
1093
  };
984
1094
  }
@@ -1011,7 +1121,7 @@ export default class ConfigManager {
1011
1121
  */
1012
1122
  _parseValue(parameter, value) {
1013
1123
  // Handle boolean parameters
1014
- if (parameter === 'includeSampleModel' || parameter === 'includeTesting' || parameter === 'skipPrompts') {
1124
+ if (parameter === 'includeSampleModel' || parameter === 'includeTesting' || parameter === 'skipPrompts' || parameter === 'includeBenchmark' || parameter === 'benchmarkStreaming' || parameter === 'enableLora') {
1015
1125
  return value === true || value === 'true';
1016
1126
  }
1017
1127
 
@@ -1847,6 +1957,12 @@ export default class ConfigManager {
1847
1957
  if (param === 'instanceType' && finalConfig.deploymentTarget === 'hyperpod-eks' && !finalConfig.instanceType) {
1848
1958
  return; // Skip validation only if truly missing for backward compat
1849
1959
  }
1960
+
1961
+ // Special case: instanceType is not required when attaching to an existing endpoint
1962
+ // The instance type is inherited from the existing endpoint configuration
1963
+ if (param === 'instanceType' && finalConfig.existingEndpointName) {
1964
+ return; // Skip validation — instance is inherited from existing endpoint
1965
+ }
1850
1966
 
1851
1967
  if (isEmpty) {
1852
1968
  if (config.promptable) {
@@ -57,6 +57,22 @@ export default {
57
57
  },
58
58
  buildTarget: {
59
59
  type: ['string', 'null']
60
+ },
61
+ icList: {
62
+ type: 'array',
63
+ items: {
64
+ type: 'object',
65
+ required: ['name'],
66
+ properties: {
67
+ name: { type: 'string', minLength: 1 },
68
+ image: { type: 'string' },
69
+ gpuCount: { type: 'integer', minimum: 0 },
70
+ copyCount: { type: 'integer', minimum: 1 },
71
+ isAdapter: { type: 'boolean' },
72
+ baseIcName: { type: 'string' },
73
+ artifactUrl: { type: 'string' }
74
+ }
75
+ }
60
76
  }
61
77
  }
62
78
  },