@aws/ml-container-creator 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/bootstrap-stack.json +86 -7
- package/config/defaults.json +1 -1
- package/package.json +3 -1
- package/servers/instance-sizer/index.js +36 -2
- package/servers/instance-sizer/lib/instance-ranker.js +114 -10
- package/servers/instance-sizer/lib/quota-resolver.js +368 -0
- package/servers/instance-sizer/package.json +2 -0
- package/servers/lib/catalogs/instances.json +527 -12
- package/servers/lib/catalogs/model-servers.json +15 -15
- package/servers/lib/catalogs/model-sizes.json +27 -0
- package/servers/lib/catalogs/models.json +71 -0
- package/servers/lib/schemas/image-catalog.schema.json +9 -1
- package/src/app.js +77 -2
- package/src/lib/bootstrap-command-handler.js +96 -3
- package/src/lib/cli-handler.js +2 -2
- package/src/lib/config-manager.js +78 -1
- package/src/lib/prompt-runner.js +96 -9
- package/src/lib/prompts.js +66 -4
- package/src/lib/schema-sync.js +31 -0
- package/src/lib/template-manager.js +49 -1
- package/src/lib/validate-runner.js +125 -2
- package/templates/Dockerfile +10 -2
- package/templates/code/cuda_compat.sh +22 -0
- package/templates/code/serve +3 -0
- package/templates/code/start_server.sh +3 -0
- package/templates/diffusors/Dockerfile +2 -1
- package/templates/diffusors/serve +3 -0
- package/templates/do/README.md +33 -0
- package/templates/do/benchmark +646 -0
- package/templates/do/clean +86 -0
- package/templates/do/config +26 -3
- package/templates/do/deploy +6 -1
- package/templates/do/register +8 -1
- package/templates/triton/Dockerfile +5 -0
|
@@ -46,6 +46,33 @@
|
|
|
46
46
|
"minVramGb": 184,
|
|
47
47
|
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
|
|
48
48
|
},
|
|
49
|
+
"meta-llama/Llama-3.1-8B*": {
|
|
50
|
+
"parameterCount": 8030261248,
|
|
51
|
+
"defaultDtype": "bfloat16",
|
|
52
|
+
"architecture": "LlamaForCausalLM",
|
|
53
|
+
"maxPositionEmbeddings": 131072,
|
|
54
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
55
|
+
"minVramGb": 20,
|
|
56
|
+
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
57
|
+
},
|
|
58
|
+
"meta-llama/Llama-3.2-1B*": {
|
|
59
|
+
"parameterCount": 1235814400,
|
|
60
|
+
"defaultDtype": "bfloat16",
|
|
61
|
+
"architecture": "LlamaForCausalLM",
|
|
62
|
+
"maxPositionEmbeddings": 131072,
|
|
63
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
64
|
+
"minVramGb": 5,
|
|
65
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
66
|
+
},
|
|
67
|
+
"meta-llama/Llama-3.2-3B*": {
|
|
68
|
+
"parameterCount": 3212749824,
|
|
69
|
+
"defaultDtype": "bfloat16",
|
|
70
|
+
"architecture": "LlamaForCausalLM",
|
|
71
|
+
"maxPositionEmbeddings": 131072,
|
|
72
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
73
|
+
"minVramGb": 9,
|
|
74
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
75
|
+
},
|
|
49
76
|
"mistralai/Mistral-7B*": {
|
|
50
77
|
"parameterCount": 7241732096,
|
|
51
78
|
"defaultDtype": "bfloat16",
|
|
@@ -532,6 +532,77 @@
|
|
|
532
532
|
"text-generation"
|
|
533
533
|
]
|
|
534
534
|
},
|
|
535
|
+
"meta-llama/Llama-3.1-8B*": {
|
|
536
|
+
"parameterCount": 8030261248,
|
|
537
|
+
"defaultDtype": "bfloat16",
|
|
538
|
+
"architecture": "LlamaForCausalLM",
|
|
539
|
+
"maxPositionEmbeddings": 131072,
|
|
540
|
+
"recommendedQuantizations": [
|
|
541
|
+
"awq",
|
|
542
|
+
"gptq"
|
|
543
|
+
],
|
|
544
|
+
"modelType": "transformer",
|
|
545
|
+
"tasks": [
|
|
546
|
+
"text-generation"
|
|
547
|
+
]
|
|
548
|
+
},
|
|
549
|
+
"meta-llama/Llama-3.1-70B*": {
|
|
550
|
+
"parameterCount": 70553706496,
|
|
551
|
+
"defaultDtype": "bfloat16",
|
|
552
|
+
"architecture": "LlamaForCausalLM",
|
|
553
|
+
"maxPositionEmbeddings": 131072,
|
|
554
|
+
"recommendedQuantizations": [
|
|
555
|
+
"awq",
|
|
556
|
+
"gptq"
|
|
557
|
+
],
|
|
558
|
+
"modelType": "transformer",
|
|
559
|
+
"tasks": [
|
|
560
|
+
"text-generation"
|
|
561
|
+
]
|
|
562
|
+
},
|
|
563
|
+
"meta-llama/Llama-3.1-405B*": {
|
|
564
|
+
"parameterCount": 405000000000,
|
|
565
|
+
"defaultDtype": "bfloat16",
|
|
566
|
+
"architecture": "LlamaForCausalLM",
|
|
567
|
+
"maxPositionEmbeddings": 131072,
|
|
568
|
+
"recommendedQuantizations": [
|
|
569
|
+
"awq",
|
|
570
|
+
"gptq",
|
|
571
|
+
"fp8"
|
|
572
|
+
],
|
|
573
|
+
"modelType": "transformer",
|
|
574
|
+
"tasks": [
|
|
575
|
+
"text-generation"
|
|
576
|
+
]
|
|
577
|
+
},
|
|
578
|
+
"meta-llama/Llama-3.2-1B*": {
|
|
579
|
+
"parameterCount": 1235814400,
|
|
580
|
+
"defaultDtype": "bfloat16",
|
|
581
|
+
"architecture": "LlamaForCausalLM",
|
|
582
|
+
"maxPositionEmbeddings": 131072,
|
|
583
|
+
"recommendedQuantizations": [
|
|
584
|
+
"awq",
|
|
585
|
+
"gptq"
|
|
586
|
+
],
|
|
587
|
+
"modelType": "transformer",
|
|
588
|
+
"tasks": [
|
|
589
|
+
"text-generation"
|
|
590
|
+
]
|
|
591
|
+
},
|
|
592
|
+
"meta-llama/Llama-3.2-3B*": {
|
|
593
|
+
"parameterCount": 3212749824,
|
|
594
|
+
"defaultDtype": "bfloat16",
|
|
595
|
+
"architecture": "LlamaForCausalLM",
|
|
596
|
+
"maxPositionEmbeddings": 131072,
|
|
597
|
+
"recommendedQuantizations": [
|
|
598
|
+
"awq",
|
|
599
|
+
"gptq"
|
|
600
|
+
],
|
|
601
|
+
"modelType": "transformer",
|
|
602
|
+
"tasks": [
|
|
603
|
+
"text-generation"
|
|
604
|
+
]
|
|
605
|
+
},
|
|
535
606
|
"Qwen/Qwen-7B*": {
|
|
536
607
|
"parameterCount": 7721324544,
|
|
537
608
|
"defaultDtype": "bfloat16",
|
|
@@ -62,7 +62,15 @@
|
|
|
62
62
|
}
|
|
63
63
|
},
|
|
64
64
|
"inferenceAmiVersion": {
|
|
65
|
-
"type": "string"
|
|
65
|
+
"type": "string",
|
|
66
|
+
"enum": [
|
|
67
|
+
"al2023-ami-sagemaker-inference-cpu-0",
|
|
68
|
+
"al2-ami-sagemaker-inference-gpu-2",
|
|
69
|
+
"al2-ami-sagemaker-inference-gpu-2-1",
|
|
70
|
+
"al2-ami-sagemaker-inference-neuron-2",
|
|
71
|
+
"al2-ami-sagemaker-inference-gpu-3-1",
|
|
72
|
+
"al2023-ami-sagemaker-inference-gpu-4-1"
|
|
73
|
+
]
|
|
66
74
|
}
|
|
67
75
|
},
|
|
68
76
|
"additionalProperties": false
|
package/src/app.js
CHANGED
|
@@ -322,6 +322,17 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
322
322
|
ignorePatterns.push('**/sample_model/**');
|
|
323
323
|
}
|
|
324
324
|
|
|
325
|
+
// Exclude do/benchmark when benchmarking is not selected
|
|
326
|
+
if (!answers.includeBenchmark) {
|
|
327
|
+
ignorePatterns.push('**/do/benchmark');
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Exclude do/test when hosted-model-endpoint is not selected
|
|
331
|
+
const testTypes = answers.testTypes || [];
|
|
332
|
+
if (!testTypes.includes('hosted-model-endpoint')) {
|
|
333
|
+
ignorePatterns.push('**/do/test');
|
|
334
|
+
}
|
|
335
|
+
|
|
325
336
|
// Always exclude triton and diffusors source directories
|
|
326
337
|
ignorePatterns.push('**/triton/**');
|
|
327
338
|
ignorePatterns.push('**/diffusors/**');
|
|
@@ -417,6 +428,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
417
428
|
_copyFile(path.join(LIB_DIR, 'manifest-cli.js'), path.join(doLibDir, 'manifest-cli.js'));
|
|
418
429
|
_copyFile(path.join(LIB_DIR, 'asset-manager.js'), path.join(doLibDir, 'asset-manager.js'));
|
|
419
430
|
_copyFile(path.join(LIB_DIR, 'bootstrap-config.js'), path.join(doLibDir, 'bootstrap-config.js'));
|
|
431
|
+
|
|
432
|
+
// Generate .gitignore with benchmarks/ when benchmarking is enabled
|
|
433
|
+
if (answers.includeBenchmark) {
|
|
434
|
+
const gitignorePath = path.join(destDir, '.gitignore');
|
|
435
|
+
const gitignoreContent = '# Benchmark results (generated by do/benchmark)\nbenchmarks/\n';
|
|
436
|
+
if (fs.existsSync(gitignorePath)) {
|
|
437
|
+
const existing = fs.readFileSync(gitignorePath, 'utf8');
|
|
438
|
+
if (!existing.includes('benchmarks/')) {
|
|
439
|
+
fs.appendFileSync(gitignorePath, `\n${gitignoreContent}`);
|
|
440
|
+
}
|
|
441
|
+
} else {
|
|
442
|
+
fs.writeFileSync(gitignorePath, gitignoreContent);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
420
445
|
}
|
|
421
446
|
|
|
422
447
|
/**
|
|
@@ -529,7 +554,7 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
|
|
|
529
554
|
codebuildProjectName: null,
|
|
530
555
|
modelName: null,
|
|
531
556
|
modelFormat: null,
|
|
532
|
-
includeSampleModel:
|
|
557
|
+
includeSampleModel: true,
|
|
533
558
|
includeTesting: true,
|
|
534
559
|
testTypes: [],
|
|
535
560
|
buildTimestamp: new Date().toISOString(),
|
|
@@ -641,6 +666,55 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
|
|
|
641
666
|
// Silently continue - defaults are already set
|
|
642
667
|
}
|
|
643
668
|
}
|
|
669
|
+
|
|
670
|
+
// Populate baseImage from the catalog when still falsy (covers --skip-prompts and
|
|
671
|
+
// cases where MCP/CLI/config did not provide a base image).
|
|
672
|
+
// Precedence: MCP > CLI > config > catalog default (this block).
|
|
673
|
+
if (!answers.baseImage && registryConfigManager?.frameworkRegistry) {
|
|
674
|
+
const backendKey = answers.backend || answers.modelServer;
|
|
675
|
+
if (backendKey) {
|
|
676
|
+
const frameworkVersions = registryConfigManager.frameworkRegistry[backendKey];
|
|
677
|
+
if (frameworkVersions) {
|
|
678
|
+
let resolvedConfig = null;
|
|
679
|
+
if (answers.frameworkVersion && frameworkVersions[answers.frameworkVersion]) {
|
|
680
|
+
resolvedConfig = frameworkVersions[answers.frameworkVersion];
|
|
681
|
+
} else {
|
|
682
|
+
// Fall back to latest version
|
|
683
|
+
const versions = Object.keys(frameworkVersions).sort((a, b) =>
|
|
684
|
+
b.localeCompare(a, undefined, { numeric: true })
|
|
685
|
+
);
|
|
686
|
+
if (versions.length > 0) {
|
|
687
|
+
resolvedConfig = frameworkVersions[versions[0]];
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
if (resolvedConfig?.baseImage) {
|
|
691
|
+
answers.baseImage = resolvedConfig.baseImage;
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// Populate icGpuCount from instance catalog when not explicitly set.
|
|
698
|
+
// The deploy template uses IC_GPU_COUNT unconditionally for NumberOfAcceleratorDevicesRequired,
|
|
699
|
+
// so it must always have a value for GPU deployments.
|
|
700
|
+
if (answers.icGpuCount == null && answers.instanceType) {
|
|
701
|
+
// Use gpuCount from instance-sizer recommendation if available
|
|
702
|
+
if (answers.gpuCount) {
|
|
703
|
+
answers.icGpuCount = answers.gpuCount;
|
|
704
|
+
} else {
|
|
705
|
+
// Look up from instances catalog
|
|
706
|
+
try {
|
|
707
|
+
const catalogPath = path.resolve(__dirname, '..', 'servers', 'lib', 'catalogs', 'instances.json');
|
|
708
|
+
const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
|
|
709
|
+
const instanceInfo = catalogData?.catalog?.[answers.instanceType];
|
|
710
|
+
if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
|
|
711
|
+
answers.icGpuCount = instanceInfo.gpus;
|
|
712
|
+
}
|
|
713
|
+
} catch {
|
|
714
|
+
// Silently continue — template fallback handles missing value
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
}
|
|
644
718
|
}
|
|
645
719
|
|
|
646
720
|
/**
|
|
@@ -977,7 +1051,8 @@ function _setExecutablePermissions(destDir) {
|
|
|
977
1051
|
'do/submit',
|
|
978
1052
|
'do/register',
|
|
979
1053
|
'do/ci',
|
|
980
|
-
'do/manifest'
|
|
1054
|
+
'do/manifest',
|
|
1055
|
+
'do/benchmark'
|
|
981
1056
|
];
|
|
982
1057
|
|
|
983
1058
|
shellScripts.forEach(script => {
|
|
@@ -116,6 +116,11 @@ export default class BootstrapCommandHandler {
|
|
|
116
116
|
|
|
117
117
|
console.log('\n🚀 Bootstrap — Shared AWS Infrastructure Setup\n');
|
|
118
118
|
|
|
119
|
+
// Verify AWS CLI v2 is installed
|
|
120
|
+
if (!this._verifyCliV2()) {
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
|
|
119
124
|
// Determine bootstrap profile name
|
|
120
125
|
let profileName;
|
|
121
126
|
if (nonInteractive) {
|
|
@@ -194,6 +199,9 @@ export default class BootstrapCommandHandler {
|
|
|
194
199
|
if (stackOutputs.BatchS3BucketName) {
|
|
195
200
|
profileData.batchS3Bucket = stackOutputs.BatchS3BucketName;
|
|
196
201
|
}
|
|
202
|
+
if (stackOutputs.BenchmarkS3BucketName) {
|
|
203
|
+
profileData.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
|
|
204
|
+
}
|
|
197
205
|
|
|
198
206
|
console.log(' ✅ Bootstrap stack deployed successfully');
|
|
199
207
|
} catch (error) {
|
|
@@ -382,6 +390,9 @@ export default class BootstrapCommandHandler {
|
|
|
382
390
|
if (outputs.BatchS3BucketName) {
|
|
383
391
|
console.log(` ✅ S3 bucket (batch): ${outputs.BatchS3BucketName}`);
|
|
384
392
|
}
|
|
393
|
+
if (outputs.BenchmarkS3BucketName) {
|
|
394
|
+
console.log(` ✅ S3 bucket (benchmark): ${outputs.BenchmarkS3BucketName}`);
|
|
395
|
+
}
|
|
385
396
|
if (outputs.StackVersion) {
|
|
386
397
|
console.log(` 📋 Stack version: ${outputs.StackVersion}`);
|
|
387
398
|
}
|
|
@@ -452,6 +463,20 @@ export default class BootstrapCommandHandler {
|
|
|
452
463
|
console.log(` ⚠️ S3 bucket: ${profile.config.batchS3Bucket} — could not validate`);
|
|
453
464
|
}
|
|
454
465
|
}
|
|
466
|
+
|
|
467
|
+
if (profile.config.benchmarkS3Bucket) {
|
|
468
|
+
try {
|
|
469
|
+
const benchmarkExists = this._resourceExists(
|
|
470
|
+
`s3api head-bucket --bucket ${profile.config.benchmarkS3Bucket}`,
|
|
471
|
+
profile.config.awsProfile
|
|
472
|
+
);
|
|
473
|
+
console.log(benchmarkExists
|
|
474
|
+
? ` ✅ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket}`
|
|
475
|
+
: ` ⚠️ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — missing`);
|
|
476
|
+
} catch {
|
|
477
|
+
console.log(` ⚠️ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — could not validate`);
|
|
478
|
+
}
|
|
479
|
+
}
|
|
455
480
|
}
|
|
456
481
|
|
|
457
482
|
// Display deployed resources from manifest
|
|
@@ -1005,6 +1030,7 @@ export default class BootstrapCommandHandler {
|
|
|
1005
1030
|
if (stackOutputs.EcrRepositoryName) profileConfig.ecrRepositoryName = stackOutputs.EcrRepositoryName;
|
|
1006
1031
|
if (stackOutputs.AsyncS3BucketName) profileConfig.asyncS3Bucket = stackOutputs.AsyncS3BucketName;
|
|
1007
1032
|
if (stackOutputs.BatchS3BucketName) profileConfig.batchS3Bucket = stackOutputs.BatchS3BucketName;
|
|
1033
|
+
if (stackOutputs.BenchmarkS3BucketName) profileConfig.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
|
|
1008
1034
|
profileConfig.stackName = stackName;
|
|
1009
1035
|
|
|
1010
1036
|
console.log(' ✅ Bootstrap stack updated');
|
|
@@ -1278,11 +1304,28 @@ export default class BootstrapCommandHandler {
|
|
|
1278
1304
|
'sagemaker:DescribeEndpointConfig',
|
|
1279
1305
|
'sagemaker:DescribeModel',
|
|
1280
1306
|
'sagemaker:DescribeInferenceComponent',
|
|
1307
|
+
'sagemaker:ListInferenceComponents',
|
|
1281
1308
|
'sagemaker:InvokeEndpoint',
|
|
1282
1309
|
'sagemaker:InvokeEndpointAsync'
|
|
1283
1310
|
],
|
|
1284
1311
|
Resource: '*'
|
|
1285
1312
|
},
|
|
1313
|
+
{
|
|
1314
|
+
Sid: 'SageMakerBenchmarking',
|
|
1315
|
+
Effect: 'Allow',
|
|
1316
|
+
Action: [
|
|
1317
|
+
'sagemaker:CreateAIBenchmarkJob',
|
|
1318
|
+
'sagemaker:DescribeAIBenchmarkJob',
|
|
1319
|
+
'sagemaker:ListAIBenchmarkJobs',
|
|
1320
|
+
'sagemaker:StopAIBenchmarkJob',
|
|
1321
|
+
'sagemaker:DeleteAIBenchmarkJob',
|
|
1322
|
+
'sagemaker:CreateAIWorkloadConfig',
|
|
1323
|
+
'sagemaker:DescribeAIWorkloadConfig',
|
|
1324
|
+
'sagemaker:ListAIWorkloadConfigs',
|
|
1325
|
+
'sagemaker:DeleteAIWorkloadConfig'
|
|
1326
|
+
],
|
|
1327
|
+
Resource: '*'
|
|
1328
|
+
},
|
|
1286
1329
|
{
|
|
1287
1330
|
Sid: 'ECRPull',
|
|
1288
1331
|
Effect: 'Allow',
|
|
@@ -1329,6 +1372,29 @@ export default class BootstrapCommandHandler {
|
|
|
1329
1372
|
Effect: 'Allow',
|
|
1330
1373
|
Action: 'sns:Publish',
|
|
1331
1374
|
Resource: 'arn:aws:sns:*:*:ml-container-creator-*'
|
|
1375
|
+
},
|
|
1376
|
+
{
|
|
1377
|
+
Sid: 'SecretsManagerBenchmark',
|
|
1378
|
+
Effect: 'Allow',
|
|
1379
|
+
Action: [
|
|
1380
|
+
'secretsmanager:CreateSecret',
|
|
1381
|
+
'secretsmanager:PutSecretValue',
|
|
1382
|
+
'secretsmanager:GetSecretValue',
|
|
1383
|
+
'secretsmanager:DescribeSecret'
|
|
1384
|
+
],
|
|
1385
|
+
Resource: 'arn:aws:secretsmanager:*:*:secret:ml-container-creator/*'
|
|
1386
|
+
},
|
|
1387
|
+
{
|
|
1388
|
+
Sid: 'QuotaAndAvailability',
|
|
1389
|
+
Effect: 'Allow',
|
|
1390
|
+
Action: [
|
|
1391
|
+
'service-quotas:GetServiceQuota',
|
|
1392
|
+
'service-quotas:ListServiceQuotas',
|
|
1393
|
+
'sagemaker:ListTrainingPlans',
|
|
1394
|
+
'sagemaker:DescribeTrainingPlan',
|
|
1395
|
+
'sagemaker:ListEndpoints'
|
|
1396
|
+
],
|
|
1397
|
+
Resource: '*'
|
|
1332
1398
|
}
|
|
1333
1399
|
]
|
|
1334
1400
|
};
|
|
@@ -1478,9 +1544,15 @@ export default class BootstrapCommandHandler {
|
|
|
1478
1544
|
|
|
1479
1545
|
/**
|
|
1480
1546
|
* Optionally create S3 buckets for async/batch deployments.
|
|
1547
|
+
* Always creates the benchmark S3 bucket (unconditional).
|
|
1481
1548
|
* @returns {Promise<object|null>} Bucket names or null if skipped
|
|
1482
1549
|
*/
|
|
1483
1550
|
async _setupS3Buckets() {
|
|
1551
|
+
// Always create benchmark bucket (unconditional — avoids re-bootstrap when benchmarking is enabled later)
|
|
1552
|
+
const benchmarkBucketName = `ml-container-creator-benchmark-${this._currentRegion}-${this._currentAccountId}`;
|
|
1553
|
+
const tags = this._buildResourceTags();
|
|
1554
|
+
const benchmarkS3Bucket = await this._createS3Bucket(benchmarkBucketName, tags);
|
|
1555
|
+
|
|
1484
1556
|
const { useS3 } = await this._promptFn([{
|
|
1485
1557
|
type: 'confirm',
|
|
1486
1558
|
name: 'useS3',
|
|
@@ -1489,17 +1561,16 @@ export default class BootstrapCommandHandler {
|
|
|
1489
1561
|
}]);
|
|
1490
1562
|
|
|
1491
1563
|
if (!useS3) {
|
|
1492
|
-
return
|
|
1564
|
+
return { benchmarkS3Bucket };
|
|
1493
1565
|
}
|
|
1494
1566
|
|
|
1495
1567
|
const asyncBucketName = `ml-container-creator-async-${this._currentRegion}-${this._currentAccountId}`;
|
|
1496
1568
|
const batchBucketName = `ml-container-creator-batch-${this._currentRegion}-${this._currentAccountId}`;
|
|
1497
1569
|
|
|
1498
|
-
const tags = this._buildResourceTags();
|
|
1499
1570
|
const asyncS3Bucket = await this._createS3Bucket(asyncBucketName, tags);
|
|
1500
1571
|
const batchS3Bucket = await this._createS3Bucket(batchBucketName, tags);
|
|
1501
1572
|
|
|
1502
|
-
return { asyncS3Bucket, batchS3Bucket };
|
|
1573
|
+
return { asyncS3Bucket, batchS3Bucket, benchmarkS3Bucket };
|
|
1503
1574
|
}
|
|
1504
1575
|
|
|
1505
1576
|
/**
|
|
@@ -1556,6 +1627,28 @@ export default class BootstrapCommandHandler {
|
|
|
1556
1627
|
|
|
1557
1628
|
// ── AWS CLI helpers ─────────────────────────────────────────────
|
|
1558
1629
|
|
|
1630
|
+
/**
|
|
1631
|
+
* Verify AWS CLI v2 is installed. Returns true if v2 is detected, false otherwise.
|
|
1632
|
+
* Extracted as a method so tests can override it.
|
|
1633
|
+
* @returns {boolean}
|
|
1634
|
+
*/
|
|
1635
|
+
_verifyCliV2() {
|
|
1636
|
+
try {
|
|
1637
|
+
const versionOutput = execSync('aws --version', { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
|
|
1638
|
+
if (!versionOutput.includes('aws-cli/2')) {
|
|
1639
|
+
console.log(` ❌ AWS CLI v2 is required. Detected: ${versionOutput.split(' ')[0]}`);
|
|
1640
|
+
console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html');
|
|
1641
|
+
console.log(' Some features (benchmarking, newer SageMaker APIs) require CLI v2.\n');
|
|
1642
|
+
return false;
|
|
1643
|
+
}
|
|
1644
|
+
return true;
|
|
1645
|
+
} catch {
|
|
1646
|
+
console.log(' ❌ AWS CLI not found.');
|
|
1647
|
+
console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html\n');
|
|
1648
|
+
return false;
|
|
1649
|
+
}
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1559
1652
|
/**
|
|
1560
1653
|
* Execute an AWS CLI command and return parsed JSON output.
|
|
1561
1654
|
* @param {string} command - AWS CLI command (without 'aws' prefix)
|
package/src/lib/cli-handler.js
CHANGED
|
@@ -375,7 +375,7 @@ For more information, visit: https://github.com/awslabs/ml-container-creator
|
|
|
375
375
|
type: 'confirm',
|
|
376
376
|
name: 'includeSampleModel',
|
|
377
377
|
message: 'Include sample model:',
|
|
378
|
-
default:
|
|
378
|
+
default: true,
|
|
379
379
|
when: answers => answers.framework !== 'transformers'
|
|
380
380
|
},
|
|
381
381
|
{
|
|
@@ -515,7 +515,7 @@ ml-container-creator \\
|
|
|
515
515
|
'framework': 'sklearn',
|
|
516
516
|
'modelServer': 'flask',
|
|
517
517
|
'modelFormat': 'pkl',
|
|
518
|
-
'includeSampleModel':
|
|
518
|
+
'includeSampleModel': true,
|
|
519
519
|
'includeTesting': true,
|
|
520
520
|
'testTypes': ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
|
|
521
521
|
'buildTarget': 'codebuild',
|
|
@@ -979,6 +979,83 @@ export default class ConfigManager {
|
|
|
979
979
|
default: 1.0,
|
|
980
980
|
valueSpace: 'bounded',
|
|
981
981
|
schemaValidated: true
|
|
982
|
+
},
|
|
983
|
+
includeBenchmark: {
|
|
984
|
+
cliOption: 'include-benchmark',
|
|
985
|
+
envVar: 'ML_INCLUDE_BENCHMARK',
|
|
986
|
+
configFile: true,
|
|
987
|
+
packageJson: false,
|
|
988
|
+
mcp: false,
|
|
989
|
+
promptable: true,
|
|
990
|
+
required: false,
|
|
991
|
+
default: false,
|
|
992
|
+
valueSpace: 'bounded'
|
|
993
|
+
},
|
|
994
|
+
benchmarkConcurrency: {
|
|
995
|
+
cliOption: 'benchmark-concurrency',
|
|
996
|
+
envVar: null,
|
|
997
|
+
configFile: true,
|
|
998
|
+
packageJson: false,
|
|
999
|
+
mcp: false,
|
|
1000
|
+
promptable: true,
|
|
1001
|
+
required: false,
|
|
1002
|
+
default: 10,
|
|
1003
|
+
valueSpace: 'bounded'
|
|
1004
|
+
},
|
|
1005
|
+
benchmarkInputTokensMean: {
|
|
1006
|
+
cliOption: 'benchmark-input-tokens',
|
|
1007
|
+
envVar: null,
|
|
1008
|
+
configFile: true,
|
|
1009
|
+
packageJson: false,
|
|
1010
|
+
mcp: false,
|
|
1011
|
+
promptable: true,
|
|
1012
|
+
required: false,
|
|
1013
|
+
default: 550,
|
|
1014
|
+
valueSpace: 'bounded'
|
|
1015
|
+
},
|
|
1016
|
+
benchmarkOutputTokensMean: {
|
|
1017
|
+
cliOption: 'benchmark-output-tokens',
|
|
1018
|
+
envVar: null,
|
|
1019
|
+
configFile: true,
|
|
1020
|
+
packageJson: false,
|
|
1021
|
+
mcp: false,
|
|
1022
|
+
promptable: true,
|
|
1023
|
+
required: false,
|
|
1024
|
+
default: 150,
|
|
1025
|
+
valueSpace: 'bounded'
|
|
1026
|
+
},
|
|
1027
|
+
benchmarkStreaming: {
|
|
1028
|
+
cliOption: 'benchmark-streaming',
|
|
1029
|
+
envVar: null,
|
|
1030
|
+
configFile: true,
|
|
1031
|
+
packageJson: false,
|
|
1032
|
+
mcp: false,
|
|
1033
|
+
promptable: true,
|
|
1034
|
+
required: false,
|
|
1035
|
+
default: true,
|
|
1036
|
+
valueSpace: 'bounded'
|
|
1037
|
+
},
|
|
1038
|
+
benchmarkRequestCount: {
|
|
1039
|
+
cliOption: 'benchmark-request-count',
|
|
1040
|
+
envVar: null,
|
|
1041
|
+
configFile: true,
|
|
1042
|
+
packageJson: false,
|
|
1043
|
+
mcp: false,
|
|
1044
|
+
promptable: true,
|
|
1045
|
+
required: false,
|
|
1046
|
+
default: null,
|
|
1047
|
+
valueSpace: 'bounded'
|
|
1048
|
+
},
|
|
1049
|
+
benchmarkS3OutputPath: {
|
|
1050
|
+
cliOption: 'benchmark-s3-output-path',
|
|
1051
|
+
envVar: 'ML_BENCHMARK_S3_OUTPUT_PATH',
|
|
1052
|
+
configFile: true,
|
|
1053
|
+
packageJson: false,
|
|
1054
|
+
mcp: false,
|
|
1055
|
+
promptable: true,
|
|
1056
|
+
required: false,
|
|
1057
|
+
default: null,
|
|
1058
|
+
valueSpace: 'bounded'
|
|
982
1059
|
}
|
|
983
1060
|
};
|
|
984
1061
|
}
|
|
@@ -1011,7 +1088,7 @@ export default class ConfigManager {
|
|
|
1011
1088
|
*/
|
|
1012
1089
|
_parseValue(parameter, value) {
|
|
1013
1090
|
// Handle boolean parameters
|
|
1014
|
-
if (parameter === 'includeSampleModel' || parameter === 'includeTesting' || parameter === 'skipPrompts') {
|
|
1091
|
+
if (parameter === 'includeSampleModel' || parameter === 'includeTesting' || parameter === 'skipPrompts' || parameter === 'includeBenchmark' || parameter === 'benchmarkStreaming') {
|
|
1015
1092
|
return value === true || value === 'true';
|
|
1016
1093
|
}
|
|
1017
1094
|
|