@aws/ml-container-creator 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/config/bootstrap-stack.json +86 -7
- package/config/defaults.json +1 -1
- package/infra/ci-harness/buildspec.yml +60 -0
- package/package.json +3 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +42 -2
- package/servers/instance-sizer/lib/instance-ranker.js +114 -10
- package/servers/instance-sizer/lib/quota-resolver.js +368 -0
- package/servers/instance-sizer/package.json +2 -0
- package/servers/lib/catalogs/instances.json +527 -12
- package/servers/lib/catalogs/model-servers.json +15 -15
- package/servers/lib/catalogs/model-sizes.json +27 -0
- package/servers/lib/catalogs/models.json +71 -0
- package/servers/lib/schemas/image-catalog.schema.json +9 -1
- package/src/app.js +109 -3
- package/src/lib/bootstrap-command-handler.js +96 -3
- package/src/lib/cli-handler.js +2 -2
- package/src/lib/config-manager.js +117 -1
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/prompt-runner.js +270 -12
- package/src/lib/prompts.js +288 -6
- package/src/lib/registry-command-handler.js +12 -0
- package/src/lib/schema-sync.js +31 -0
- package/src/lib/template-manager.js +49 -1
- package/src/lib/validate-runner.js +125 -2
- package/templates/Dockerfile +22 -2
- package/templates/code/cuda_compat.sh +22 -0
- package/templates/code/serve +3 -0
- package/templates/code/serving.properties +14 -0
- package/templates/code/start_server.sh +3 -0
- package/templates/diffusors/Dockerfile +2 -1
- package/templates/diffusors/serve +3 -0
- package/templates/do/README.md +33 -0
- package/templates/do/adapter +1214 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +718 -0
- package/templates/do/clean +593 -17
- package/templates/do/config +49 -4
- package/templates/do/deploy +513 -362
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +119 -2
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
- package/templates/triton/Dockerfile +5 -0
|
@@ -46,6 +46,33 @@
|
|
|
46
46
|
"minVramGb": 184,
|
|
47
47
|
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
|
|
48
48
|
},
|
|
49
|
+
"meta-llama/Llama-3.1-8B*": {
|
|
50
|
+
"parameterCount": 8030261248,
|
|
51
|
+
"defaultDtype": "bfloat16",
|
|
52
|
+
"architecture": "LlamaForCausalLM",
|
|
53
|
+
"maxPositionEmbeddings": 131072,
|
|
54
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
55
|
+
"minVramGb": 20,
|
|
56
|
+
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
57
|
+
},
|
|
58
|
+
"meta-llama/Llama-3.2-1B*": {
|
|
59
|
+
"parameterCount": 1235814400,
|
|
60
|
+
"defaultDtype": "bfloat16",
|
|
61
|
+
"architecture": "LlamaForCausalLM",
|
|
62
|
+
"maxPositionEmbeddings": 131072,
|
|
63
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
64
|
+
"minVramGb": 5,
|
|
65
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
66
|
+
},
|
|
67
|
+
"meta-llama/Llama-3.2-3B*": {
|
|
68
|
+
"parameterCount": 3212749824,
|
|
69
|
+
"defaultDtype": "bfloat16",
|
|
70
|
+
"architecture": "LlamaForCausalLM",
|
|
71
|
+
"maxPositionEmbeddings": 131072,
|
|
72
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
73
|
+
"minVramGb": 9,
|
|
74
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
75
|
+
},
|
|
49
76
|
"mistralai/Mistral-7B*": {
|
|
50
77
|
"parameterCount": 7241732096,
|
|
51
78
|
"defaultDtype": "bfloat16",
|
|
@@ -532,6 +532,77 @@
|
|
|
532
532
|
"text-generation"
|
|
533
533
|
]
|
|
534
534
|
},
|
|
535
|
+
"meta-llama/Llama-3.1-8B*": {
|
|
536
|
+
"parameterCount": 8030261248,
|
|
537
|
+
"defaultDtype": "bfloat16",
|
|
538
|
+
"architecture": "LlamaForCausalLM",
|
|
539
|
+
"maxPositionEmbeddings": 131072,
|
|
540
|
+
"recommendedQuantizations": [
|
|
541
|
+
"awq",
|
|
542
|
+
"gptq"
|
|
543
|
+
],
|
|
544
|
+
"modelType": "transformer",
|
|
545
|
+
"tasks": [
|
|
546
|
+
"text-generation"
|
|
547
|
+
]
|
|
548
|
+
},
|
|
549
|
+
"meta-llama/Llama-3.1-70B*": {
|
|
550
|
+
"parameterCount": 70553706496,
|
|
551
|
+
"defaultDtype": "bfloat16",
|
|
552
|
+
"architecture": "LlamaForCausalLM",
|
|
553
|
+
"maxPositionEmbeddings": 131072,
|
|
554
|
+
"recommendedQuantizations": [
|
|
555
|
+
"awq",
|
|
556
|
+
"gptq"
|
|
557
|
+
],
|
|
558
|
+
"modelType": "transformer",
|
|
559
|
+
"tasks": [
|
|
560
|
+
"text-generation"
|
|
561
|
+
]
|
|
562
|
+
},
|
|
563
|
+
"meta-llama/Llama-3.1-405B*": {
|
|
564
|
+
"parameterCount": 405000000000,
|
|
565
|
+
"defaultDtype": "bfloat16",
|
|
566
|
+
"architecture": "LlamaForCausalLM",
|
|
567
|
+
"maxPositionEmbeddings": 131072,
|
|
568
|
+
"recommendedQuantizations": [
|
|
569
|
+
"awq",
|
|
570
|
+
"gptq",
|
|
571
|
+
"fp8"
|
|
572
|
+
],
|
|
573
|
+
"modelType": "transformer",
|
|
574
|
+
"tasks": [
|
|
575
|
+
"text-generation"
|
|
576
|
+
]
|
|
577
|
+
},
|
|
578
|
+
"meta-llama/Llama-3.2-1B*": {
|
|
579
|
+
"parameterCount": 1235814400,
|
|
580
|
+
"defaultDtype": "bfloat16",
|
|
581
|
+
"architecture": "LlamaForCausalLM",
|
|
582
|
+
"maxPositionEmbeddings": 131072,
|
|
583
|
+
"recommendedQuantizations": [
|
|
584
|
+
"awq",
|
|
585
|
+
"gptq"
|
|
586
|
+
],
|
|
587
|
+
"modelType": "transformer",
|
|
588
|
+
"tasks": [
|
|
589
|
+
"text-generation"
|
|
590
|
+
]
|
|
591
|
+
},
|
|
592
|
+
"meta-llama/Llama-3.2-3B*": {
|
|
593
|
+
"parameterCount": 3212749824,
|
|
594
|
+
"defaultDtype": "bfloat16",
|
|
595
|
+
"architecture": "LlamaForCausalLM",
|
|
596
|
+
"maxPositionEmbeddings": 131072,
|
|
597
|
+
"recommendedQuantizations": [
|
|
598
|
+
"awq",
|
|
599
|
+
"gptq"
|
|
600
|
+
],
|
|
601
|
+
"modelType": "transformer",
|
|
602
|
+
"tasks": [
|
|
603
|
+
"text-generation"
|
|
604
|
+
]
|
|
605
|
+
},
|
|
535
606
|
"Qwen/Qwen-7B*": {
|
|
536
607
|
"parameterCount": 7721324544,
|
|
537
608
|
"defaultDtype": "bfloat16",
|
|
@@ -62,7 +62,15 @@
|
|
|
62
62
|
}
|
|
63
63
|
},
|
|
64
64
|
"inferenceAmiVersion": {
|
|
65
|
-
"type": "string"
|
|
65
|
+
"type": "string",
|
|
66
|
+
"enum": [
|
|
67
|
+
"al2023-ami-sagemaker-inference-cpu-0",
|
|
68
|
+
"al2-ami-sagemaker-inference-gpu-2",
|
|
69
|
+
"al2-ami-sagemaker-inference-gpu-2-1",
|
|
70
|
+
"al2-ami-sagemaker-inference-neuron-2",
|
|
71
|
+
"al2-ami-sagemaker-inference-gpu-3-1",
|
|
72
|
+
"al2023-ami-sagemaker-inference-gpu-4-1"
|
|
73
|
+
]
|
|
66
74
|
}
|
|
67
75
|
},
|
|
68
76
|
"additionalProperties": false
|
package/src/app.js
CHANGED
|
@@ -302,6 +302,22 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
302
302
|
ignorePatterns.push('**/hyperpod/**');
|
|
303
303
|
}
|
|
304
304
|
|
|
305
|
+
// HyperPod is kubectl-based — no shared bash helpers or IC configs
|
|
306
|
+
if (answers.deploymentTarget === 'hyperpod-eks') {
|
|
307
|
+
ignorePatterns.push('**/do/lib/**');
|
|
308
|
+
ignorePatterns.push('**/do/ic/**');
|
|
309
|
+
ignorePatterns.push('**/do/add-ic');
|
|
310
|
+
ignorePatterns.push('**/do/status');
|
|
311
|
+
ignorePatterns.push('**/do/optimize');
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Async and batch don't use inference components (IC is real-time only)
|
|
315
|
+
if (answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform') {
|
|
316
|
+
ignorePatterns.push('**/do/ic/**');
|
|
317
|
+
ignorePatterns.push('**/do/add-ic');
|
|
318
|
+
ignorePatterns.push('**/do/status');
|
|
319
|
+
}
|
|
320
|
+
|
|
305
321
|
// Resolve architecture
|
|
306
322
|
const resolver = new DeploymentConfigResolver();
|
|
307
323
|
let architecture = answers.architecture;
|
|
@@ -322,6 +338,24 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
322
338
|
ignorePatterns.push('**/sample_model/**');
|
|
323
339
|
}
|
|
324
340
|
|
|
341
|
+
// Exclude do/benchmark when benchmarking is not selected
|
|
342
|
+
if (!answers.includeBenchmark) {
|
|
343
|
+
ignorePatterns.push('**/do/benchmark');
|
|
344
|
+
ignorePatterns.push('**/do/optimize');
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Exclude do/adapter and do/adapters/ when LoRA is not enabled
|
|
348
|
+
if (!answers.enableLora) {
|
|
349
|
+
ignorePatterns.push('**/do/adapter');
|
|
350
|
+
ignorePatterns.push('**/do/adapters/**');
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// Exclude do/test when hosted-model-endpoint is not selected
|
|
354
|
+
const testTypes = answers.testTypes || [];
|
|
355
|
+
if (!testTypes.includes('hosted-model-endpoint')) {
|
|
356
|
+
ignorePatterns.push('**/do/test');
|
|
357
|
+
}
|
|
358
|
+
|
|
325
359
|
// Always exclude triton and diffusors source directories
|
|
326
360
|
ignorePatterns.push('**/triton/**');
|
|
327
361
|
ignorePatterns.push('**/diffusors/**');
|
|
@@ -417,6 +451,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
417
451
|
_copyFile(path.join(LIB_DIR, 'manifest-cli.js'), path.join(doLibDir, 'manifest-cli.js'));
|
|
418
452
|
_copyFile(path.join(LIB_DIR, 'asset-manager.js'), path.join(doLibDir, 'asset-manager.js'));
|
|
419
453
|
_copyFile(path.join(LIB_DIR, 'bootstrap-config.js'), path.join(doLibDir, 'bootstrap-config.js'));
|
|
454
|
+
|
|
455
|
+
// Generate .gitignore with benchmarks/ when benchmarking is enabled
|
|
456
|
+
if (answers.includeBenchmark) {
|
|
457
|
+
const gitignorePath = path.join(destDir, '.gitignore');
|
|
458
|
+
const gitignoreContent = '# Benchmark results (generated by do/benchmark)\nbenchmarks/\n';
|
|
459
|
+
if (fs.existsSync(gitignorePath)) {
|
|
460
|
+
const existing = fs.readFileSync(gitignorePath, 'utf8');
|
|
461
|
+
if (!existing.includes('benchmarks/')) {
|
|
462
|
+
fs.appendFileSync(gitignorePath, `\n${gitignoreContent}`);
|
|
463
|
+
}
|
|
464
|
+
} else {
|
|
465
|
+
fs.writeFileSync(gitignorePath, gitignoreContent);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
420
468
|
}
|
|
421
469
|
|
|
422
470
|
/**
|
|
@@ -529,7 +577,7 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
|
|
|
529
577
|
codebuildProjectName: null,
|
|
530
578
|
modelName: null,
|
|
531
579
|
modelFormat: null,
|
|
532
|
-
includeSampleModel:
|
|
580
|
+
includeSampleModel: true,
|
|
533
581
|
includeTesting: true,
|
|
534
582
|
testTypes: [],
|
|
535
583
|
buildTimestamp: new Date().toISOString(),
|
|
@@ -542,7 +590,11 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
|
|
|
542
590
|
baseImage: null,
|
|
543
591
|
modelSource: 'huggingface',
|
|
544
592
|
artifactUri: '',
|
|
545
|
-
modelLoadStrategy: 'runtime'
|
|
593
|
+
modelLoadStrategy: 'runtime',
|
|
594
|
+
existingEndpointName: null,
|
|
595
|
+
enableLora: false,
|
|
596
|
+
maxLoras: 30,
|
|
597
|
+
maxLoraRank: 64
|
|
546
598
|
};
|
|
547
599
|
|
|
548
600
|
Object.entries(defaults).forEach(([key, value]) => {
|
|
@@ -641,6 +693,55 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
|
|
|
641
693
|
// Silently continue - defaults are already set
|
|
642
694
|
}
|
|
643
695
|
}
|
|
696
|
+
|
|
697
|
+
// Populate baseImage from the catalog when still falsy (covers --skip-prompts and
|
|
698
|
+
// cases where MCP/CLI/config did not provide a base image).
|
|
699
|
+
// Precedence: MCP > CLI > config > catalog default (this block).
|
|
700
|
+
if (!answers.baseImage && registryConfigManager?.frameworkRegistry) {
|
|
701
|
+
const backendKey = answers.backend || answers.modelServer;
|
|
702
|
+
if (backendKey) {
|
|
703
|
+
const frameworkVersions = registryConfigManager.frameworkRegistry[backendKey];
|
|
704
|
+
if (frameworkVersions) {
|
|
705
|
+
let resolvedConfig = null;
|
|
706
|
+
if (answers.frameworkVersion && frameworkVersions[answers.frameworkVersion]) {
|
|
707
|
+
resolvedConfig = frameworkVersions[answers.frameworkVersion];
|
|
708
|
+
} else {
|
|
709
|
+
// Fall back to latest version
|
|
710
|
+
const versions = Object.keys(frameworkVersions).sort((a, b) =>
|
|
711
|
+
b.localeCompare(a, undefined, { numeric: true })
|
|
712
|
+
);
|
|
713
|
+
if (versions.length > 0) {
|
|
714
|
+
resolvedConfig = frameworkVersions[versions[0]];
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
if (resolvedConfig?.baseImage) {
|
|
718
|
+
answers.baseImage = resolvedConfig.baseImage;
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
// Populate icGpuCount from instance catalog when not explicitly set.
|
|
725
|
+
// The deploy template uses IC_GPU_COUNT unconditionally for NumberOfAcceleratorDevicesRequired,
|
|
726
|
+
// so it must always have a value for GPU deployments.
|
|
727
|
+
if (answers.icGpuCount == null && answers.instanceType) {
|
|
728
|
+
// Use gpuCount from instance-sizer recommendation if available
|
|
729
|
+
if (answers.gpuCount) {
|
|
730
|
+
answers.icGpuCount = answers.gpuCount;
|
|
731
|
+
} else {
|
|
732
|
+
// Look up from instances catalog
|
|
733
|
+
try {
|
|
734
|
+
const catalogPath = path.resolve(__dirname, '..', 'servers', 'lib', 'catalogs', 'instances.json');
|
|
735
|
+
const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
|
|
736
|
+
const instanceInfo = catalogData?.catalog?.[answers.instanceType];
|
|
737
|
+
if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
|
|
738
|
+
answers.icGpuCount = instanceInfo.gpus;
|
|
739
|
+
}
|
|
740
|
+
} catch {
|
|
741
|
+
// Silently continue — template fallback handles missing value
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
}
|
|
644
745
|
}
|
|
645
746
|
|
|
646
747
|
/**
|
|
@@ -977,7 +1078,12 @@ function _setExecutablePermissions(destDir) {
|
|
|
977
1078
|
'do/submit',
|
|
978
1079
|
'do/register',
|
|
979
1080
|
'do/ci',
|
|
980
|
-
'do/manifest'
|
|
1081
|
+
'do/manifest',
|
|
1082
|
+
'do/benchmark',
|
|
1083
|
+
'do/optimize',
|
|
1084
|
+
'do/status',
|
|
1085
|
+
'do/add-ic',
|
|
1086
|
+
'do/adapter'
|
|
981
1087
|
];
|
|
982
1088
|
|
|
983
1089
|
shellScripts.forEach(script => {
|
|
@@ -116,6 +116,11 @@ export default class BootstrapCommandHandler {
|
|
|
116
116
|
|
|
117
117
|
console.log('\n🚀 Bootstrap — Shared AWS Infrastructure Setup\n');
|
|
118
118
|
|
|
119
|
+
// Verify AWS CLI v2 is installed
|
|
120
|
+
if (!this._verifyCliV2()) {
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
|
|
119
124
|
// Determine bootstrap profile name
|
|
120
125
|
let profileName;
|
|
121
126
|
if (nonInteractive) {
|
|
@@ -194,6 +199,9 @@ export default class BootstrapCommandHandler {
|
|
|
194
199
|
if (stackOutputs.BatchS3BucketName) {
|
|
195
200
|
profileData.batchS3Bucket = stackOutputs.BatchS3BucketName;
|
|
196
201
|
}
|
|
202
|
+
if (stackOutputs.BenchmarkS3BucketName) {
|
|
203
|
+
profileData.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
|
|
204
|
+
}
|
|
197
205
|
|
|
198
206
|
console.log(' ✅ Bootstrap stack deployed successfully');
|
|
199
207
|
} catch (error) {
|
|
@@ -382,6 +390,9 @@ export default class BootstrapCommandHandler {
|
|
|
382
390
|
if (outputs.BatchS3BucketName) {
|
|
383
391
|
console.log(` ✅ S3 bucket (batch): ${outputs.BatchS3BucketName}`);
|
|
384
392
|
}
|
|
393
|
+
if (outputs.BenchmarkS3BucketName) {
|
|
394
|
+
console.log(` ✅ S3 bucket (benchmark): ${outputs.BenchmarkS3BucketName}`);
|
|
395
|
+
}
|
|
385
396
|
if (outputs.StackVersion) {
|
|
386
397
|
console.log(` 📋 Stack version: ${outputs.StackVersion}`);
|
|
387
398
|
}
|
|
@@ -452,6 +463,20 @@ export default class BootstrapCommandHandler {
|
|
|
452
463
|
console.log(` ⚠️ S3 bucket: ${profile.config.batchS3Bucket} — could not validate`);
|
|
453
464
|
}
|
|
454
465
|
}
|
|
466
|
+
|
|
467
|
+
if (profile.config.benchmarkS3Bucket) {
|
|
468
|
+
try {
|
|
469
|
+
const benchmarkExists = this._resourceExists(
|
|
470
|
+
`s3api head-bucket --bucket ${profile.config.benchmarkS3Bucket}`,
|
|
471
|
+
profile.config.awsProfile
|
|
472
|
+
);
|
|
473
|
+
console.log(benchmarkExists
|
|
474
|
+
? ` ✅ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket}`
|
|
475
|
+
: ` ⚠️ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — missing`);
|
|
476
|
+
} catch {
|
|
477
|
+
console.log(` ⚠️ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — could not validate`);
|
|
478
|
+
}
|
|
479
|
+
}
|
|
455
480
|
}
|
|
456
481
|
|
|
457
482
|
// Display deployed resources from manifest
|
|
@@ -1005,6 +1030,7 @@ export default class BootstrapCommandHandler {
|
|
|
1005
1030
|
if (stackOutputs.EcrRepositoryName) profileConfig.ecrRepositoryName = stackOutputs.EcrRepositoryName;
|
|
1006
1031
|
if (stackOutputs.AsyncS3BucketName) profileConfig.asyncS3Bucket = stackOutputs.AsyncS3BucketName;
|
|
1007
1032
|
if (stackOutputs.BatchS3BucketName) profileConfig.batchS3Bucket = stackOutputs.BatchS3BucketName;
|
|
1033
|
+
if (stackOutputs.BenchmarkS3BucketName) profileConfig.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
|
|
1008
1034
|
profileConfig.stackName = stackName;
|
|
1009
1035
|
|
|
1010
1036
|
console.log(' ✅ Bootstrap stack updated');
|
|
@@ -1278,11 +1304,28 @@ export default class BootstrapCommandHandler {
|
|
|
1278
1304
|
'sagemaker:DescribeEndpointConfig',
|
|
1279
1305
|
'sagemaker:DescribeModel',
|
|
1280
1306
|
'sagemaker:DescribeInferenceComponent',
|
|
1307
|
+
'sagemaker:ListInferenceComponents',
|
|
1281
1308
|
'sagemaker:InvokeEndpoint',
|
|
1282
1309
|
'sagemaker:InvokeEndpointAsync'
|
|
1283
1310
|
],
|
|
1284
1311
|
Resource: '*'
|
|
1285
1312
|
},
|
|
1313
|
+
{
|
|
1314
|
+
Sid: 'SageMakerBenchmarking',
|
|
1315
|
+
Effect: 'Allow',
|
|
1316
|
+
Action: [
|
|
1317
|
+
'sagemaker:CreateAIBenchmarkJob',
|
|
1318
|
+
'sagemaker:DescribeAIBenchmarkJob',
|
|
1319
|
+
'sagemaker:ListAIBenchmarkJobs',
|
|
1320
|
+
'sagemaker:StopAIBenchmarkJob',
|
|
1321
|
+
'sagemaker:DeleteAIBenchmarkJob',
|
|
1322
|
+
'sagemaker:CreateAIWorkloadConfig',
|
|
1323
|
+
'sagemaker:DescribeAIWorkloadConfig',
|
|
1324
|
+
'sagemaker:ListAIWorkloadConfigs',
|
|
1325
|
+
'sagemaker:DeleteAIWorkloadConfig'
|
|
1326
|
+
],
|
|
1327
|
+
Resource: '*'
|
|
1328
|
+
},
|
|
1286
1329
|
{
|
|
1287
1330
|
Sid: 'ECRPull',
|
|
1288
1331
|
Effect: 'Allow',
|
|
@@ -1329,6 +1372,29 @@ export default class BootstrapCommandHandler {
|
|
|
1329
1372
|
Effect: 'Allow',
|
|
1330
1373
|
Action: 'sns:Publish',
|
|
1331
1374
|
Resource: 'arn:aws:sns:*:*:ml-container-creator-*'
|
|
1375
|
+
},
|
|
1376
|
+
{
|
|
1377
|
+
Sid: 'SecretsManagerBenchmark',
|
|
1378
|
+
Effect: 'Allow',
|
|
1379
|
+
Action: [
|
|
1380
|
+
'secretsmanager:CreateSecret',
|
|
1381
|
+
'secretsmanager:PutSecretValue',
|
|
1382
|
+
'secretsmanager:GetSecretValue',
|
|
1383
|
+
'secretsmanager:DescribeSecret'
|
|
1384
|
+
],
|
|
1385
|
+
Resource: 'arn:aws:secretsmanager:*:*:secret:ml-container-creator/*'
|
|
1386
|
+
},
|
|
1387
|
+
{
|
|
1388
|
+
Sid: 'QuotaAndAvailability',
|
|
1389
|
+
Effect: 'Allow',
|
|
1390
|
+
Action: [
|
|
1391
|
+
'service-quotas:GetServiceQuota',
|
|
1392
|
+
'service-quotas:ListServiceQuotas',
|
|
1393
|
+
'sagemaker:ListTrainingPlans',
|
|
1394
|
+
'sagemaker:DescribeTrainingPlan',
|
|
1395
|
+
'sagemaker:ListEndpoints'
|
|
1396
|
+
],
|
|
1397
|
+
Resource: '*'
|
|
1332
1398
|
}
|
|
1333
1399
|
]
|
|
1334
1400
|
};
|
|
@@ -1478,9 +1544,15 @@ export default class BootstrapCommandHandler {
|
|
|
1478
1544
|
|
|
1479
1545
|
/**
|
|
1480
1546
|
* Optionally create S3 buckets for async/batch deployments.
|
|
1547
|
+
* Always creates the benchmark S3 bucket (unconditional).
|
|
1481
1548
|
* @returns {Promise<object|null>} Bucket names or null if skipped
|
|
1482
1549
|
*/
|
|
1483
1550
|
async _setupS3Buckets() {
|
|
1551
|
+
// Always create benchmark bucket (unconditional — avoids re-bootstrap when benchmarking is enabled later)
|
|
1552
|
+
const benchmarkBucketName = `ml-container-creator-benchmark-${this._currentRegion}-${this._currentAccountId}`;
|
|
1553
|
+
const tags = this._buildResourceTags();
|
|
1554
|
+
const benchmarkS3Bucket = await this._createS3Bucket(benchmarkBucketName, tags);
|
|
1555
|
+
|
|
1484
1556
|
const { useS3 } = await this._promptFn([{
|
|
1485
1557
|
type: 'confirm',
|
|
1486
1558
|
name: 'useS3',
|
|
@@ -1489,17 +1561,16 @@ export default class BootstrapCommandHandler {
|
|
|
1489
1561
|
}]);
|
|
1490
1562
|
|
|
1491
1563
|
if (!useS3) {
|
|
1492
|
-
return
|
|
1564
|
+
return { benchmarkS3Bucket };
|
|
1493
1565
|
}
|
|
1494
1566
|
|
|
1495
1567
|
const asyncBucketName = `ml-container-creator-async-${this._currentRegion}-${this._currentAccountId}`;
|
|
1496
1568
|
const batchBucketName = `ml-container-creator-batch-${this._currentRegion}-${this._currentAccountId}`;
|
|
1497
1569
|
|
|
1498
|
-
const tags = this._buildResourceTags();
|
|
1499
1570
|
const asyncS3Bucket = await this._createS3Bucket(asyncBucketName, tags);
|
|
1500
1571
|
const batchS3Bucket = await this._createS3Bucket(batchBucketName, tags);
|
|
1501
1572
|
|
|
1502
|
-
return { asyncS3Bucket, batchS3Bucket };
|
|
1573
|
+
return { asyncS3Bucket, batchS3Bucket, benchmarkS3Bucket };
|
|
1503
1574
|
}
|
|
1504
1575
|
|
|
1505
1576
|
/**
|
|
@@ -1556,6 +1627,28 @@ export default class BootstrapCommandHandler {
|
|
|
1556
1627
|
|
|
1557
1628
|
// ── AWS CLI helpers ─────────────────────────────────────────────
|
|
1558
1629
|
|
|
1630
|
+
/**
|
|
1631
|
+
* Verify AWS CLI v2 is installed. Returns true if v2 is detected, false otherwise.
|
|
1632
|
+
* Extracted as a method so tests can override it.
|
|
1633
|
+
* @returns {boolean}
|
|
1634
|
+
*/
|
|
1635
|
+
_verifyCliV2() {
|
|
1636
|
+
try {
|
|
1637
|
+
const versionOutput = execSync('aws --version', { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
|
|
1638
|
+
if (!versionOutput.includes('aws-cli/2')) {
|
|
1639
|
+
console.log(` ❌ AWS CLI v2 is required. Detected: ${versionOutput.split(' ')[0]}`);
|
|
1640
|
+
console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html');
|
|
1641
|
+
console.log(' Some features (benchmarking, newer SageMaker APIs) require CLI v2.\n');
|
|
1642
|
+
return false;
|
|
1643
|
+
}
|
|
1644
|
+
return true;
|
|
1645
|
+
} catch {
|
|
1646
|
+
console.log(' ❌ AWS CLI not found.');
|
|
1647
|
+
console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html\n');
|
|
1648
|
+
return false;
|
|
1649
|
+
}
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1559
1652
|
/**
|
|
1560
1653
|
* Execute an AWS CLI command and return parsed JSON output.
|
|
1561
1654
|
* @param {string} command - AWS CLI command (without 'aws' prefix)
|
package/src/lib/cli-handler.js
CHANGED
|
@@ -375,7 +375,7 @@ For more information, visit: https://github.com/awslabs/ml-container-creator
|
|
|
375
375
|
type: 'confirm',
|
|
376
376
|
name: 'includeSampleModel',
|
|
377
377
|
message: 'Include sample model:',
|
|
378
|
-
default:
|
|
378
|
+
default: true,
|
|
379
379
|
when: answers => answers.framework !== 'transformers'
|
|
380
380
|
},
|
|
381
381
|
{
|
|
@@ -515,7 +515,7 @@ ml-container-creator \\
|
|
|
515
515
|
'framework': 'sklearn',
|
|
516
516
|
'modelServer': 'flask',
|
|
517
517
|
'modelFormat': 'pkl',
|
|
518
|
-
'includeSampleModel':
|
|
518
|
+
'includeSampleModel': true,
|
|
519
519
|
'includeTesting': true,
|
|
520
520
|
'testTypes': ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
|
|
521
521
|
'buildTarget': 'codebuild',
|
|
@@ -979,6 +979,116 @@ export default class ConfigManager {
|
|
|
979
979
|
default: 1.0,
|
|
980
980
|
valueSpace: 'bounded',
|
|
981
981
|
schemaValidated: true
|
|
982
|
+
},
|
|
983
|
+
includeBenchmark: {
|
|
984
|
+
cliOption: 'include-benchmark',
|
|
985
|
+
envVar: 'ML_INCLUDE_BENCHMARK',
|
|
986
|
+
configFile: true,
|
|
987
|
+
packageJson: false,
|
|
988
|
+
mcp: false,
|
|
989
|
+
promptable: true,
|
|
990
|
+
required: false,
|
|
991
|
+
default: false,
|
|
992
|
+
valueSpace: 'bounded'
|
|
993
|
+
},
|
|
994
|
+
benchmarkConcurrency: {
|
|
995
|
+
cliOption: 'benchmark-concurrency',
|
|
996
|
+
envVar: null,
|
|
997
|
+
configFile: true,
|
|
998
|
+
packageJson: false,
|
|
999
|
+
mcp: false,
|
|
1000
|
+
promptable: true,
|
|
1001
|
+
required: false,
|
|
1002
|
+
default: 10,
|
|
1003
|
+
valueSpace: 'bounded'
|
|
1004
|
+
},
|
|
1005
|
+
benchmarkInputTokensMean: {
|
|
1006
|
+
cliOption: 'benchmark-input-tokens',
|
|
1007
|
+
envVar: null,
|
|
1008
|
+
configFile: true,
|
|
1009
|
+
packageJson: false,
|
|
1010
|
+
mcp: false,
|
|
1011
|
+
promptable: true,
|
|
1012
|
+
required: false,
|
|
1013
|
+
default: 550,
|
|
1014
|
+
valueSpace: 'bounded'
|
|
1015
|
+
},
|
|
1016
|
+
benchmarkOutputTokensMean: {
|
|
1017
|
+
cliOption: 'benchmark-output-tokens',
|
|
1018
|
+
envVar: null,
|
|
1019
|
+
configFile: true,
|
|
1020
|
+
packageJson: false,
|
|
1021
|
+
mcp: false,
|
|
1022
|
+
promptable: true,
|
|
1023
|
+
required: false,
|
|
1024
|
+
default: 150,
|
|
1025
|
+
valueSpace: 'bounded'
|
|
1026
|
+
},
|
|
1027
|
+
benchmarkStreaming: {
|
|
1028
|
+
cliOption: 'benchmark-streaming',
|
|
1029
|
+
envVar: null,
|
|
1030
|
+
configFile: true,
|
|
1031
|
+
packageJson: false,
|
|
1032
|
+
mcp: false,
|
|
1033
|
+
promptable: true,
|
|
1034
|
+
required: false,
|
|
1035
|
+
default: true,
|
|
1036
|
+
valueSpace: 'bounded'
|
|
1037
|
+
},
|
|
1038
|
+
benchmarkRequestCount: {
|
|
1039
|
+
cliOption: 'benchmark-request-count',
|
|
1040
|
+
envVar: null,
|
|
1041
|
+
configFile: true,
|
|
1042
|
+
packageJson: false,
|
|
1043
|
+
mcp: false,
|
|
1044
|
+
promptable: true,
|
|
1045
|
+
required: false,
|
|
1046
|
+
default: null,
|
|
1047
|
+
valueSpace: 'bounded'
|
|
1048
|
+
},
|
|
1049
|
+
benchmarkS3OutputPath: {
|
|
1050
|
+
cliOption: 'benchmark-s3-output-path',
|
|
1051
|
+
envVar: 'ML_BENCHMARK_S3_OUTPUT_PATH',
|
|
1052
|
+
configFile: true,
|
|
1053
|
+
packageJson: false,
|
|
1054
|
+
mcp: false,
|
|
1055
|
+
promptable: true,
|
|
1056
|
+
required: false,
|
|
1057
|
+
default: null,
|
|
1058
|
+
valueSpace: 'bounded'
|
|
1059
|
+
},
|
|
1060
|
+
enableLora: {
|
|
1061
|
+
cliOption: 'enable-lora',
|
|
1062
|
+
envVar: null,
|
|
1063
|
+
configFile: true,
|
|
1064
|
+
packageJson: false,
|
|
1065
|
+
mcp: false,
|
|
1066
|
+
promptable: true,
|
|
1067
|
+
required: false,
|
|
1068
|
+
default: false,
|
|
1069
|
+
valueSpace: 'bounded'
|
|
1070
|
+
},
|
|
1071
|
+
maxLoras: {
|
|
1072
|
+
cliOption: 'max-loras',
|
|
1073
|
+
envVar: null,
|
|
1074
|
+
configFile: true,
|
|
1075
|
+
packageJson: false,
|
|
1076
|
+
mcp: false,
|
|
1077
|
+
promptable: true,
|
|
1078
|
+
required: false,
|
|
1079
|
+
default: 30,
|
|
1080
|
+
valueSpace: 'bounded'
|
|
1081
|
+
},
|
|
1082
|
+
maxLoraRank: {
|
|
1083
|
+
cliOption: 'max-lora-rank',
|
|
1084
|
+
envVar: null,
|
|
1085
|
+
configFile: true,
|
|
1086
|
+
packageJson: false,
|
|
1087
|
+
mcp: false,
|
|
1088
|
+
promptable: true,
|
|
1089
|
+
required: false,
|
|
1090
|
+
default: 64,
|
|
1091
|
+
valueSpace: 'bounded'
|
|
982
1092
|
}
|
|
983
1093
|
};
|
|
984
1094
|
}
|
|
@@ -1011,7 +1121,7 @@ export default class ConfigManager {
|
|
|
1011
1121
|
*/
|
|
1012
1122
|
_parseValue(parameter, value) {
|
|
1013
1123
|
// Handle boolean parameters
|
|
1014
|
-
if (parameter === 'includeSampleModel' || parameter === 'includeTesting' || parameter === 'skipPrompts') {
|
|
1124
|
+
if (parameter === 'includeSampleModel' || parameter === 'includeTesting' || parameter === 'skipPrompts' || parameter === 'includeBenchmark' || parameter === 'benchmarkStreaming' || parameter === 'enableLora') {
|
|
1015
1125
|
return value === true || value === 'true';
|
|
1016
1126
|
}
|
|
1017
1127
|
|
|
@@ -1847,6 +1957,12 @@ export default class ConfigManager {
|
|
|
1847
1957
|
if (param === 'instanceType' && finalConfig.deploymentTarget === 'hyperpod-eks' && !finalConfig.instanceType) {
|
|
1848
1958
|
return; // Skip validation only if truly missing for backward compat
|
|
1849
1959
|
}
|
|
1960
|
+
|
|
1961
|
+
// Special case: instanceType is not required when attaching to an existing endpoint
|
|
1962
|
+
// The instance type is inherited from the existing endpoint configuration
|
|
1963
|
+
if (param === 'instanceType' && finalConfig.existingEndpointName) {
|
|
1964
|
+
return; // Skip validation — instance is inherited from existing endpoint
|
|
1965
|
+
}
|
|
1850
1966
|
|
|
1851
1967
|
if (isEmpty) {
|
|
1852
1968
|
if (config.promptable) {
|
|
@@ -57,6 +57,22 @@ export default {
|
|
|
57
57
|
},
|
|
58
58
|
buildTarget: {
|
|
59
59
|
type: ['string', 'null']
|
|
60
|
+
},
|
|
61
|
+
icList: {
|
|
62
|
+
type: 'array',
|
|
63
|
+
items: {
|
|
64
|
+
type: 'object',
|
|
65
|
+
required: ['name'],
|
|
66
|
+
properties: {
|
|
67
|
+
name: { type: 'string', minLength: 1 },
|
|
68
|
+
image: { type: 'string' },
|
|
69
|
+
gpuCount: { type: 'integer', minimum: 0 },
|
|
70
|
+
copyCount: { type: 'integer', minimum: 1 },
|
|
71
|
+
isAdapter: { type: 'boolean' },
|
|
72
|
+
baseIcName: { type: 'string' },
|
|
73
|
+
artifactUrl: { type: 'string' }
|
|
74
|
+
}
|
|
75
|
+
}
|
|
60
76
|
}
|
|
61
77
|
}
|
|
62
78
|
},
|