@aws/ml-container-creator 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/config/bootstrap-stack.json +86 -7
- package/config/defaults.json +1 -1
- package/infra/ci-harness/buildspec.yml +60 -0
- package/package.json +3 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +42 -2
- package/servers/instance-sizer/lib/instance-ranker.js +114 -10
- package/servers/instance-sizer/lib/quota-resolver.js +368 -0
- package/servers/instance-sizer/package.json +2 -0
- package/servers/lib/catalogs/instances.json +527 -12
- package/servers/lib/catalogs/model-servers.json +15 -15
- package/servers/lib/catalogs/model-sizes.json +27 -0
- package/servers/lib/catalogs/models.json +71 -0
- package/servers/lib/schemas/image-catalog.schema.json +9 -1
- package/src/app.js +109 -3
- package/src/lib/bootstrap-command-handler.js +96 -3
- package/src/lib/cli-handler.js +2 -2
- package/src/lib/config-manager.js +117 -1
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/prompt-runner.js +270 -12
- package/src/lib/prompts.js +288 -6
- package/src/lib/registry-command-handler.js +12 -0
- package/src/lib/schema-sync.js +31 -0
- package/src/lib/template-manager.js +49 -1
- package/src/lib/validate-runner.js +125 -2
- package/templates/Dockerfile +22 -2
- package/templates/code/cuda_compat.sh +22 -0
- package/templates/code/serve +3 -0
- package/templates/code/serving.properties +14 -0
- package/templates/code/start_server.sh +3 -0
- package/templates/diffusors/Dockerfile +2 -1
- package/templates/diffusors/serve +3 -0
- package/templates/do/README.md +33 -0
- package/templates/do/adapter +1214 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +718 -0
- package/templates/do/clean +593 -17
- package/templates/do/config +49 -4
- package/templates/do/deploy +513 -362
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +119 -2
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
- package/templates/triton/Dockerfile +5 -0
package/src/lib/prompts.js
CHANGED
|
@@ -47,6 +47,69 @@ function loadInstanceTypeRegistry() {
|
|
|
47
47
|
|
|
48
48
|
const instanceTypeRegistry = loadInstanceTypeRegistry();
|
|
49
49
|
|
|
50
|
+
/**
|
|
51
|
+
* Load the raw instance catalog for GPU/CUDA generation lookups.
|
|
52
|
+
* Returns the full catalog entries keyed by instance type.
|
|
53
|
+
*/
|
|
54
|
+
function loadInstanceCatalogRaw() {
|
|
55
|
+
try {
|
|
56
|
+
const raw = readFileSync(instancesCatalogPath, 'utf8');
|
|
57
|
+
const catalog = JSON.parse(raw);
|
|
58
|
+
return catalog?.catalog || {};
|
|
59
|
+
} catch (error) {
|
|
60
|
+
return {};
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const instanceCatalogRaw = loadInstanceCatalogRaw();
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Get the CUDA generation key for an instance type.
|
|
68
|
+
* Uses gpuArchitecture as the generation grouping (e.g., "Turing", "Ampere", "Hopper").
|
|
69
|
+
* Instances in the same generation share AMI compatibility.
|
|
70
|
+
* @param {string} instanceType - e.g., "ml.g5.xlarge"
|
|
71
|
+
* @returns {string|null} Generation key or null if not found/not GPU
|
|
72
|
+
*/
|
|
73
|
+
function getInstanceCudaGeneration(instanceType) {
|
|
74
|
+
const entry = instanceCatalogRaw[instanceType];
|
|
75
|
+
if (!entry) return null;
|
|
76
|
+
if (entry.acceleratorType !== 'cuda') return null;
|
|
77
|
+
return entry.gpuArchitecture || null;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Filter instance choices to only include instances from the same CUDA generation
|
|
82
|
+
* as the first (highest-priority) instance in the list.
|
|
83
|
+
* @param {string[]} instanceTypes - Array of instance type strings
|
|
84
|
+
* @returns {{ filtered: string[], generation: string|null, removed: string[] }}
|
|
85
|
+
*/
|
|
86
|
+
function filterByCudaGeneration(instanceTypes) {
|
|
87
|
+
if (!instanceTypes || instanceTypes.length === 0) {
|
|
88
|
+
return { filtered: [], generation: null, removed: [] };
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Find the generation of the first instance
|
|
92
|
+
const firstGen = getInstanceCudaGeneration(instanceTypes[0]);
|
|
93
|
+
if (!firstGen) {
|
|
94
|
+
// First instance not in catalog or not CUDA — return all (can't filter)
|
|
95
|
+
return { filtered: instanceTypes, generation: null, removed: [] };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const filtered = [];
|
|
99
|
+
const removed = [];
|
|
100
|
+
for (const it of instanceTypes) {
|
|
101
|
+
const gen = getInstanceCudaGeneration(it);
|
|
102
|
+
// Keep if same generation, or if not in catalog (don't block unknown types)
|
|
103
|
+
if (gen === firstGen || gen === null) {
|
|
104
|
+
filtered.push(it);
|
|
105
|
+
} else {
|
|
106
|
+
removed.push(it);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return { filtered, generation: firstGen, removed };
|
|
111
|
+
}
|
|
112
|
+
|
|
50
113
|
/**
|
|
51
114
|
* Generate pseudo-randomized project name based on framework
|
|
52
115
|
* @param {string} framework - The ML framework
|
|
@@ -583,7 +646,7 @@ const modulePrompts = [
|
|
|
583
646
|
type: 'confirm',
|
|
584
647
|
name: 'includeSampleModel',
|
|
585
648
|
message: 'Include sample Abalone classifier?',
|
|
586
|
-
default:
|
|
649
|
+
default: true,
|
|
587
650
|
when: (answers) => {
|
|
588
651
|
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
589
652
|
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
@@ -622,7 +685,10 @@ const modulePrompts = [
|
|
|
622
685
|
|
|
623
686
|
// Transformers and Triton LLM backends only support hosted endpoint tests
|
|
624
687
|
if (architecture === 'transformers') {
|
|
625
|
-
return ['hosted-model-endpoint'];
|
|
688
|
+
return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
|
|
689
|
+
}
|
|
690
|
+
if (architecture === 'diffusors') {
|
|
691
|
+
return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
|
|
626
692
|
}
|
|
627
693
|
if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
|
|
628
694
|
return ['hosted-model-endpoint'];
|
|
@@ -635,7 +701,10 @@ const modulePrompts = [
|
|
|
635
701
|
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
636
702
|
|
|
637
703
|
if (architecture === 'transformers') {
|
|
638
|
-
return ['hosted-model-endpoint'];
|
|
704
|
+
return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
|
|
705
|
+
}
|
|
706
|
+
if (architecture === 'diffusors') {
|
|
707
|
+
return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
|
|
639
708
|
}
|
|
640
709
|
if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
|
|
641
710
|
return ['hosted-model-endpoint'];
|
|
@@ -692,15 +761,137 @@ const infraRegionAndTargetPrompts = [
|
|
|
692
761
|
}
|
|
693
762
|
];
|
|
694
763
|
|
|
764
|
+
// Sub-phase A2: Existing endpoint prompt (only when deploymentTarget === 'realtime-inference')
|
|
765
|
+
const infraExistingEndpointPrompts = [
|
|
766
|
+
{
|
|
767
|
+
type: 'list',
|
|
768
|
+
name: 'useExistingEndpoint',
|
|
769
|
+
message: 'Deploy to an existing endpoint? (attach IC to running endpoint)',
|
|
770
|
+
choices: [
|
|
771
|
+
{ name: 'No — create a new endpoint', value: 'no' },
|
|
772
|
+
{ name: 'Yes — attach to an existing endpoint', value: 'yes' }
|
|
773
|
+
],
|
|
774
|
+
default: 'no',
|
|
775
|
+
when: answers => answers.deploymentTarget === 'realtime-inference'
|
|
776
|
+
},
|
|
777
|
+
{
|
|
778
|
+
type: 'list',
|
|
779
|
+
name: 'existingEndpointName',
|
|
780
|
+
message: 'Select endpoint:',
|
|
781
|
+
choices: (answers) => {
|
|
782
|
+
const mcpChoices = answers._mcpEndpointChoices || [];
|
|
783
|
+
if (mcpChoices.length > 0) {
|
|
784
|
+
return [...mcpChoices, { name: 'Custom (enter manually)', value: 'custom' }];
|
|
785
|
+
}
|
|
786
|
+
return [{ name: 'Enter endpoint name manually', value: 'custom' }];
|
|
787
|
+
},
|
|
788
|
+
when: answers => answers.useExistingEndpoint === 'yes'
|
|
789
|
+
},
|
|
790
|
+
{
|
|
791
|
+
type: 'input',
|
|
792
|
+
name: 'customExistingEndpointName',
|
|
793
|
+
message: 'Enter existing endpoint name:',
|
|
794
|
+
validate: (input) => {
|
|
795
|
+
if (!input || input.trim() === '') {
|
|
796
|
+
return 'Endpoint name is required';
|
|
797
|
+
}
|
|
798
|
+
return true;
|
|
799
|
+
},
|
|
800
|
+
when: answers => answers.useExistingEndpoint === 'yes' && answers.existingEndpointName === 'custom'
|
|
801
|
+
}
|
|
802
|
+
];
|
|
803
|
+
|
|
695
804
|
// Sub-phase B: Instance type (only when deploymentTarget === 'realtime-inference')
|
|
696
805
|
const infraInstancePrompts = [
|
|
806
|
+
// Multi-select prompt: shown when MCP sizer has choices AND deployment target is realtime-inference
|
|
807
|
+
// User can select 1-5 instances; selection count determines single-type vs instance-pools behavior
|
|
808
|
+
// Requirements: 6.4
|
|
809
|
+
{
|
|
810
|
+
type: 'checkbox',
|
|
811
|
+
name: 'instanceTypeSelections',
|
|
812
|
+
when: answers => answers.deploymentTarget === 'realtime-inference' &&
|
|
813
|
+
answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 1,
|
|
814
|
+
message: 'Select instance type(s) — select multiple for instance pools (priority = selection order, max 5):',
|
|
815
|
+
choices: (answers) => {
|
|
816
|
+
const mcpChoices = answers._mcpInstanceChoices || [];
|
|
817
|
+
// Show all compatible instances — CUDA generation filtering happens
|
|
818
|
+
// after selection to allow users to see all options and make informed choices.
|
|
819
|
+
// If they select instances from different generations, the post-selection
|
|
820
|
+
// filter (filterByCudaGeneration in prompt-runner.js) will warn and remove incompatible ones.
|
|
821
|
+
const choices = mcpChoices.map(instanceType => {
|
|
822
|
+
const entry = instanceCatalogRaw[instanceType];
|
|
823
|
+
const gpuInfo = entry ? `${entry.gpus} GPU${entry.gpus > 1 ? 's' : ''}, ${entry.gpuMemoryGb || '?'}GB` : '';
|
|
824
|
+
return {
|
|
825
|
+
name: gpuInfo ? `${instanceType} (${gpuInfo})` : instanceType,
|
|
826
|
+
value: instanceType,
|
|
827
|
+
short: instanceType
|
|
828
|
+
};
|
|
829
|
+
});
|
|
830
|
+
// Always include a "Custom Input" option at the end
|
|
831
|
+
choices.push({
|
|
832
|
+
name: 'Custom Input (enter one or comma-separated list)',
|
|
833
|
+
value: '__custom_input__',
|
|
834
|
+
short: 'Custom'
|
|
835
|
+
});
|
|
836
|
+
return choices;
|
|
837
|
+
},
|
|
838
|
+
validate: (input) => {
|
|
839
|
+
if (!input || input.length === 0) {
|
|
840
|
+
return 'Select at least one instance type';
|
|
841
|
+
}
|
|
842
|
+
if (input.length > 5) {
|
|
843
|
+
return 'Maximum 5 instance types allowed (API limit). Please deselect some.';
|
|
844
|
+
}
|
|
845
|
+
return true;
|
|
846
|
+
}
|
|
847
|
+
},
|
|
848
|
+
// Custom input prompt for multi-select: shown when user selects "Custom Input" in instanceTypeSelections
|
|
849
|
+
{
|
|
850
|
+
type: 'input',
|
|
851
|
+
name: 'customInstanceTypeSelections',
|
|
852
|
+
message: 'Enter instance type(s) — single for homogeneous, comma-separated for heterogeneous (e.g., ml.g5.xlarge or ml.g5.xlarge,ml.g5.2xlarge):',
|
|
853
|
+
when: answers => Array.isArray(answers.instanceTypeSelections) &&
|
|
854
|
+
answers.instanceTypeSelections.includes('__custom_input__'),
|
|
855
|
+
validate: (input) => {
|
|
856
|
+
if (!input || input.trim() === '') {
|
|
857
|
+
return 'At least one instance type is required';
|
|
858
|
+
}
|
|
859
|
+
const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
|
|
860
|
+
const instances = input.split(',').map(s => s.trim()).filter(s => s.length > 0);
|
|
861
|
+
if (instances.length === 0) {
|
|
862
|
+
return 'At least one instance type is required';
|
|
863
|
+
}
|
|
864
|
+
if (instances.length > 5) {
|
|
865
|
+
return 'Maximum 5 instance types allowed (API limit).';
|
|
866
|
+
}
|
|
867
|
+
for (const inst of instances) {
|
|
868
|
+
if (!instancePattern.test(inst)) {
|
|
869
|
+
return `Invalid instance type format: "${inst}". Expected format: ml.{family}.{size} (e.g., ml.g5.xlarge)`;
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
return true;
|
|
873
|
+
}
|
|
874
|
+
},
|
|
875
|
+
// Single-select prompt: shown when no MCP choices, or for non-realtime targets, or only 1 MCP choice
|
|
697
876
|
{
|
|
698
877
|
type: 'list',
|
|
699
878
|
name: 'instanceType',
|
|
700
|
-
when: answers =>
|
|
879
|
+
when: answers => {
|
|
880
|
+
// Skip if multi-select was shown (realtime with multiple MCP choices)
|
|
881
|
+
if (answers.deploymentTarget === 'realtime-inference' &&
|
|
882
|
+
answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 1) {
|
|
883
|
+
return false;
|
|
884
|
+
}
|
|
885
|
+
return answers.deploymentTarget === 'realtime-inference' || answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform' || answers.deploymentTarget === 'hyperpod-eks';
|
|
886
|
+
},
|
|
701
887
|
message: (answers) => {
|
|
702
888
|
const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
|
|
703
|
-
|
|
889
|
+
|
|
890
|
+
// Skip table when MCP sizer already displayed annotated results
|
|
891
|
+
if (answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 0) {
|
|
892
|
+
return 'Select instance type:';
|
|
893
|
+
}
|
|
894
|
+
|
|
704
895
|
const table = new Table({
|
|
705
896
|
head: [
|
|
706
897
|
chalk.cyan('Instance Type'),
|
|
@@ -1110,6 +1301,91 @@ const baseImagePrompts = [
|
|
|
1110
1301
|
}
|
|
1111
1302
|
];
|
|
1112
1303
|
|
|
1304
|
+
/**
|
|
1305
|
+
* LoRA adapter prompts for multi-adapter serving configuration.
|
|
1306
|
+
* Only shown when architecture is transformers AND model server is vllm, sglang, or djl-lmi.
|
|
1307
|
+
* Requirements: 1.1, 1.2, 1.4
|
|
1308
|
+
*/
|
|
1309
|
+
const loraPrompts = [
|
|
1310
|
+
{
|
|
1311
|
+
type: 'confirm',
|
|
1312
|
+
name: 'enableLora',
|
|
1313
|
+
message: 'Enable LoRA adapter serving?',
|
|
1314
|
+
default: false,
|
|
1315
|
+
when: (answers) => {
|
|
1316
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
1317
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
1318
|
+
if (architecture !== 'transformers') return false;
|
|
1319
|
+
const loraCapableServers = ['vllm', 'sglang', 'djl-lmi', 'lmi', 'djl'];
|
|
1320
|
+
return loraCapableServers.includes(backend);
|
|
1321
|
+
}
|
|
1322
|
+
},
|
|
1323
|
+
{
|
|
1324
|
+
type: 'number',
|
|
1325
|
+
name: 'maxLoras',
|
|
1326
|
+
message: 'Maximum concurrent LoRA adapters in GPU memory:',
|
|
1327
|
+
default: 30,
|
|
1328
|
+
when: (answers) => answers.enableLora === true
|
|
1329
|
+
},
|
|
1330
|
+
{
|
|
1331
|
+
type: 'number',
|
|
1332
|
+
name: 'maxLoraRank',
|
|
1333
|
+
message: 'Maximum LoRA rank:',
|
|
1334
|
+
default: 64,
|
|
1335
|
+
when: (answers) => answers.enableLora === true
|
|
1336
|
+
}
|
|
1337
|
+
];
|
|
1338
|
+
|
|
1339
|
+
/**
|
|
1340
|
+
* Benchmark prompts for SageMaker AI Benchmarking (NVIDIA AIPerf)
|
|
1341
|
+
* Sub-prompts shown when 'sagemaker-ai-automated-benchmarking' is selected in testTypes.
|
|
1342
|
+
* Requirements: 2.1, 2.2, 2.3, 2.4, 2.5
|
|
1343
|
+
*/
|
|
1344
|
+
const benchmarkPrompts = [
|
|
1345
|
+
{
|
|
1346
|
+
type: 'number',
|
|
1347
|
+
name: 'benchmarkConcurrency',
|
|
1348
|
+
message: 'Concurrent requests for benchmark:',
|
|
1349
|
+
default: 10,
|
|
1350
|
+
when: (answers) => answers.includeBenchmark === true
|
|
1351
|
+
},
|
|
1352
|
+
{
|
|
1353
|
+
type: 'number',
|
|
1354
|
+
name: 'benchmarkInputTokensMean',
|
|
1355
|
+
message: 'Mean input tokens per request:',
|
|
1356
|
+
default: 550,
|
|
1357
|
+
when: (answers) => answers.includeBenchmark === true
|
|
1358
|
+
},
|
|
1359
|
+
{
|
|
1360
|
+
type: 'number',
|
|
1361
|
+
name: 'benchmarkOutputTokensMean',
|
|
1362
|
+
message: 'Mean output tokens per request:',
|
|
1363
|
+
default: 150,
|
|
1364
|
+
when: (answers) => answers.includeBenchmark === true
|
|
1365
|
+
},
|
|
1366
|
+
{
|
|
1367
|
+
type: 'confirm',
|
|
1368
|
+
name: 'benchmarkStreaming',
|
|
1369
|
+
message: 'Enable streaming for benchmark?',
|
|
1370
|
+
default: true,
|
|
1371
|
+
when: (answers) => answers.includeBenchmark === true
|
|
1372
|
+
},
|
|
1373
|
+
{
|
|
1374
|
+
type: 'input',
|
|
1375
|
+
name: 'benchmarkRequestCount',
|
|
1376
|
+
message: 'Total request count (leave empty for service default):',
|
|
1377
|
+
default: '',
|
|
1378
|
+
when: (answers) => answers.includeBenchmark === true
|
|
1379
|
+
},
|
|
1380
|
+
{
|
|
1381
|
+
type: 'input',
|
|
1382
|
+
name: 'benchmarkS3OutputPath',
|
|
1383
|
+
message: 'Benchmark results S3 path (leave empty for auto-created bucket):',
|
|
1384
|
+
default: '',
|
|
1385
|
+
when: (answers) => answers.includeBenchmark === true
|
|
1386
|
+
}
|
|
1387
|
+
];
|
|
1388
|
+
|
|
1113
1389
|
export {
|
|
1114
1390
|
deploymentConfigPrompts,
|
|
1115
1391
|
frameworkPrompts, // Deprecated: kept for backward compatibility
|
|
@@ -1123,8 +1399,11 @@ export {
|
|
|
1123
1399
|
hfTokenPrompts,
|
|
1124
1400
|
ngcApiKeyPrompts,
|
|
1125
1401
|
modulePrompts,
|
|
1402
|
+
loraPrompts,
|
|
1403
|
+
benchmarkPrompts,
|
|
1126
1404
|
infrastructurePrompts,
|
|
1127
1405
|
infraRegionAndTargetPrompts,
|
|
1406
|
+
infraExistingEndpointPrompts,
|
|
1128
1407
|
infraInstancePrompts,
|
|
1129
1408
|
infraAsyncPrompts,
|
|
1130
1409
|
infraBatchTransformPrompts,
|
|
@@ -1134,5 +1413,8 @@ export {
|
|
|
1134
1413
|
destinationPrompts,
|
|
1135
1414
|
baseImageSearchPrompts,
|
|
1136
1415
|
baseImagePrompts,
|
|
1137
|
-
formatImageChoices
|
|
1416
|
+
formatImageChoices,
|
|
1417
|
+
filterByCudaGeneration,
|
|
1418
|
+
getInstanceCudaGeneration,
|
|
1419
|
+
instanceCatalogRaw
|
|
1138
1420
|
};
|
|
@@ -150,6 +150,18 @@ export default class RegistryCommandHandler {
|
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
+
// Parse icList from JSON string if provided
|
|
154
|
+
const icListRaw = options.icList || options['ic-list'];
|
|
155
|
+
if (icListRaw) {
|
|
156
|
+
try {
|
|
157
|
+
entry.deployment.icList = typeof icListRaw === 'string'
|
|
158
|
+
? JSON.parse(icListRaw)
|
|
159
|
+
: icListRaw;
|
|
160
|
+
} catch (err) {
|
|
161
|
+
console.log(`Warning: Could not parse ic-list JSON: ${err.message}`);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
153
165
|
try {
|
|
154
166
|
const id = registry.add(entry);
|
|
155
167
|
console.log('✅ Deployment entry logged successfully.');
|
package/src/lib/schema-sync.js
CHANGED
|
@@ -188,6 +188,37 @@ export function loadServiceModel(serviceName, registryPath) {
|
|
|
188
188
|
return readFileSync(modelPath, 'utf8');
|
|
189
189
|
}
|
|
190
190
|
|
|
191
|
+
/**
|
|
192
|
+
* Check whether the SageMaker service model includes the CreateAIBenchmarkJob operation shape.
|
|
193
|
+
* Used to determine if benchmark parameter validation can be performed.
|
|
194
|
+
*
|
|
195
|
+
* @param {string} [registryPath] - Override registry path
|
|
196
|
+
* @returns {{ available: boolean, reason?: string }}
|
|
197
|
+
*/
|
|
198
|
+
export function hasBenchmarkShape(registryPath) {
|
|
199
|
+
const regPath = registryPath || getRegistryPath();
|
|
200
|
+
const modelContent = loadServiceModel('sagemaker', regPath);
|
|
201
|
+
|
|
202
|
+
if (!modelContent) {
|
|
203
|
+
return { available: false, reason: 'SageMaker service model not found in registry' };
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
try {
|
|
207
|
+
const model = JSON.parse(modelContent);
|
|
208
|
+
const operations = model.operations || {};
|
|
209
|
+
const shapes = model.shapes || {};
|
|
210
|
+
|
|
211
|
+
// Check for the CreateAIBenchmarkJob operation or its input shape
|
|
212
|
+
if (operations.CreateAIBenchmarkJob || shapes.CreateAIBenchmarkJobRequest) {
|
|
213
|
+
return { available: true };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return { available: false, reason: 'service model does not include AI Benchmark operations' };
|
|
217
|
+
} catch {
|
|
218
|
+
return { available: false, reason: 'Failed to parse SageMaker service model' };
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
191
222
|
/**
|
|
192
223
|
* Store a service model in the registry.
|
|
193
224
|
* @param {string} serviceName - Service name (e.g., 'sagemaker')
|
|
@@ -65,7 +65,7 @@ export default class TemplateManager {
|
|
|
65
65
|
],
|
|
66
66
|
buildTargets: ['codebuild'],
|
|
67
67
|
deploymentTargets: ['realtime-inference', 'async-inference', 'batch-transform', 'hyperpod-eks'],
|
|
68
|
-
testTypes: ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
|
|
68
|
+
testTypes: ['local-model-cli', 'local-model-server', 'hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'],
|
|
69
69
|
awsRegions: [
|
|
70
70
|
'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2',
|
|
71
71
|
'eu-west-1', 'eu-west-2', 'eu-central-1', 'eu-north-1',
|
|
@@ -134,6 +134,9 @@ export default class TemplateManager {
|
|
|
134
134
|
|
|
135
135
|
// Validate batch transform specific fields
|
|
136
136
|
this._validateBatchTransformConfig();
|
|
137
|
+
|
|
138
|
+
// Validate benchmark specific fields
|
|
139
|
+
this._validateBenchmarkConfig();
|
|
137
140
|
|
|
138
141
|
// Validate instance type format (ml.*.*) - only for realtime-inference
|
|
139
142
|
if (this.answers.instanceType && this.answers.instanceType !== 'custom') {
|
|
@@ -297,6 +300,51 @@ export default class TemplateManager {
|
|
|
297
300
|
}
|
|
298
301
|
}
|
|
299
302
|
|
|
303
|
+
/**
|
|
304
|
+
* Validates benchmark configuration parameters
|
|
305
|
+
* @private
|
|
306
|
+
* @throws {Error} If benchmark configuration is invalid
|
|
307
|
+
*/
|
|
308
|
+
_validateBenchmarkConfig() {
|
|
309
|
+
if (!this.answers.includeBenchmark) return;
|
|
310
|
+
|
|
311
|
+
// Gate to supported architectures
|
|
312
|
+
const dc = this.answers.deploymentConfig;
|
|
313
|
+
const arch = dc ? dc.split('-')[0] : this.answers.architecture;
|
|
314
|
+
if (arch !== 'transformers' && arch !== 'diffusors') {
|
|
315
|
+
throw new Error('⚠️ Benchmarking is only supported with transformers and diffusors architectures.');
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Gate to supported deployment targets
|
|
319
|
+
if (this.answers.deploymentTarget === 'hyperpod-eks') {
|
|
320
|
+
throw new Error('⚠️ Benchmarking is only supported with managed-inference, async-inference, and batch-transform deployment targets');
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Validate numeric parameters
|
|
324
|
+
if (this.answers.benchmarkConcurrency !== undefined) {
|
|
325
|
+
if (!Number.isInteger(this.answers.benchmarkConcurrency) || this.answers.benchmarkConcurrency < 1) {
|
|
326
|
+
throw new Error('⚠️ benchmarkConcurrency must be an integer >= 1');
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
if (this.answers.benchmarkInputTokensMean !== undefined) {
|
|
330
|
+
if (!Number.isInteger(this.answers.benchmarkInputTokensMean) || this.answers.benchmarkInputTokensMean < 1) {
|
|
331
|
+
throw new Error('⚠️ benchmarkInputTokensMean must be an integer >= 1');
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
if (this.answers.benchmarkOutputTokensMean !== undefined) {
|
|
335
|
+
if (!Number.isInteger(this.answers.benchmarkOutputTokensMean) || this.answers.benchmarkOutputTokensMean < 1) {
|
|
336
|
+
throw new Error('⚠️ benchmarkOutputTokensMean must be an integer >= 1');
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// Validate S3 path format
|
|
341
|
+
if (this.answers.benchmarkS3OutputPath && this.answers.benchmarkS3OutputPath.trim() !== '') {
|
|
342
|
+
if (!this.answers.benchmarkS3OutputPath.startsWith('s3://')) {
|
|
343
|
+
throw new Error('⚠️ benchmarkS3OutputPath must start with "s3://". Example: s3://my-bucket/benchmark-results/');
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
300
348
|
/**
|
|
301
349
|
* Validates GPU instance type requirement for GPU-requiring backends.
|
|
302
350
|
* Called when deploymentConfig is present.
|
|
@@ -21,7 +21,7 @@ import SchemaValidationEngine from './schema-validation-engine.js';
|
|
|
21
21
|
import ServiceModelParser from './service-model-parser.js';
|
|
22
22
|
import CrossCuttingChecker from './cross-cutting-checker.js';
|
|
23
23
|
import HuggingFaceClient from './huggingface-client.js';
|
|
24
|
-
import { getRegistryPath, loadManifest } from './schema-sync.js';
|
|
24
|
+
import { getRegistryPath, loadManifest, hasBenchmarkShape } from './schema-sync.js';
|
|
25
25
|
|
|
26
26
|
const __filename = fileURLToPath(import.meta.url);
|
|
27
27
|
const __dirname = path.dirname(__filename);
|
|
@@ -52,6 +52,115 @@ export function parseDoConfig(configPath) {
|
|
|
52
52
|
return config;
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
+
/**
|
|
56
|
+
* Validate benchmark parameters against service model constraints.
|
|
57
|
+
* Called when the CreateAIBenchmarkJob shape is available in the synced schema.
|
|
58
|
+
*
|
|
59
|
+
* Validates:
|
|
60
|
+
* - Concurrency: integer, min 1
|
|
61
|
+
* - S3OutputLocation: string, starts with s3://
|
|
62
|
+
* - AIBenchmarkJobName: pattern ^[a-zA-Z0-9](-*[a-zA-Z0-9])*, max 63 chars
|
|
63
|
+
*
|
|
64
|
+
* Requirements: 8.1, 8.2, 8.3
|
|
65
|
+
*
|
|
66
|
+
* @param {Object} config - Parsed do/config values
|
|
67
|
+
* @returns {Array<Object>} Array of validation findings
|
|
68
|
+
*/
|
|
69
|
+
export function validateBenchmarkParams(config) {
|
|
70
|
+
const findings = [];
|
|
71
|
+
|
|
72
|
+
// Validate Concurrency (integer, min 1)
|
|
73
|
+
if (config.BENCHMARK_CONCURRENCY !== null && config.BENCHMARK_CONCURRENCY !== undefined && config.BENCHMARK_CONCURRENCY !== '') {
|
|
74
|
+
const concurrency = Number(config.BENCHMARK_CONCURRENCY);
|
|
75
|
+
if (!Number.isInteger(concurrency) || concurrency < 1) {
|
|
76
|
+
findings.push({
|
|
77
|
+
severity: 'error',
|
|
78
|
+
operation: 'CreateAIBenchmarkJob',
|
|
79
|
+
fieldPath: 'Concurrency',
|
|
80
|
+
constraint: 'integer >= 1',
|
|
81
|
+
invalidValue: config.BENCHMARK_CONCURRENCY,
|
|
82
|
+
remediationHint: 'BENCHMARK_CONCURRENCY must be a positive integer (>= 1)'
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Validate S3OutputLocation (string, starts with s3://)
|
|
88
|
+
if (config.BENCHMARK_S3_OUTPUT_PATH !== null && config.BENCHMARK_S3_OUTPUT_PATH !== undefined && config.BENCHMARK_S3_OUTPUT_PATH !== '') {
|
|
89
|
+
const s3Path = config.BENCHMARK_S3_OUTPUT_PATH;
|
|
90
|
+
// Skip dynamic shell expressions (e.g., s3://...$(aws ...))
|
|
91
|
+
if (!s3Path.includes('$(') && !s3Path.startsWith('s3://')) {
|
|
92
|
+
findings.push({
|
|
93
|
+
severity: 'error',
|
|
94
|
+
operation: 'CreateAIBenchmarkJob',
|
|
95
|
+
fieldPath: 'OutputConfig.S3OutputLocation',
|
|
96
|
+
constraint: 'must start with s3://',
|
|
97
|
+
invalidValue: s3Path,
|
|
98
|
+
remediationHint: 'BENCHMARK_S3_OUTPUT_PATH must start with "s3://". Example: s3://my-bucket/benchmark-results/'
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Validate AIBenchmarkJobName pattern (^[a-zA-Z0-9](-*[a-zA-Z0-9])*, max 63 chars)
|
|
104
|
+
if (config.BENCHMARK_JOB_NAME !== null && config.BENCHMARK_JOB_NAME !== undefined && config.BENCHMARK_JOB_NAME !== '') {
|
|
105
|
+
const jobName = config.BENCHMARK_JOB_NAME;
|
|
106
|
+
// Skip dynamic shell expressions
|
|
107
|
+
if (!jobName.includes('$(') && !jobName.includes('${')) {
|
|
108
|
+
const namePattern = /^[a-zA-Z0-9](-*[a-zA-Z0-9])*$/;
|
|
109
|
+
if (jobName.length > 63) {
|
|
110
|
+
findings.push({
|
|
111
|
+
severity: 'error',
|
|
112
|
+
operation: 'CreateAIBenchmarkJob',
|
|
113
|
+
fieldPath: 'AIBenchmarkJobName',
|
|
114
|
+
constraint: 'max 63 characters',
|
|
115
|
+
invalidValue: jobName,
|
|
116
|
+
remediationHint: 'AIBenchmarkJobName must be at most 63 characters'
|
|
117
|
+
});
|
|
118
|
+
} else if (!namePattern.test(jobName)) {
|
|
119
|
+
findings.push({
|
|
120
|
+
severity: 'error',
|
|
121
|
+
operation: 'CreateAIBenchmarkJob',
|
|
122
|
+
fieldPath: 'AIBenchmarkJobName',
|
|
123
|
+
constraint: 'pattern: ^[a-zA-Z0-9](-*[a-zA-Z0-9])*',
|
|
124
|
+
invalidValue: jobName,
|
|
125
|
+
remediationHint: 'AIBenchmarkJobName must start with alphanumeric and contain only alphanumeric characters and hyphens'
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Validate input tokens mean (integer, min 1)
|
|
132
|
+
if (config.BENCHMARK_INPUT_TOKENS_MEAN !== null && config.BENCHMARK_INPUT_TOKENS_MEAN !== undefined && config.BENCHMARK_INPUT_TOKENS_MEAN !== '') {
|
|
133
|
+
const inputTokens = Number(config.BENCHMARK_INPUT_TOKENS_MEAN);
|
|
134
|
+
if (!Number.isInteger(inputTokens) || inputTokens < 1) {
|
|
135
|
+
findings.push({
|
|
136
|
+
severity: 'error',
|
|
137
|
+
operation: 'CreateAIWorkloadConfig',
|
|
138
|
+
fieldPath: 'WorkloadSpec.parameters.prompt_input_tokens_mean',
|
|
139
|
+
constraint: 'integer >= 1',
|
|
140
|
+
invalidValue: config.BENCHMARK_INPUT_TOKENS_MEAN,
|
|
141
|
+
remediationHint: 'BENCHMARK_INPUT_TOKENS_MEAN must be a positive integer (>= 1)'
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Validate output tokens mean (integer, min 1)
|
|
147
|
+
if (config.BENCHMARK_OUTPUT_TOKENS_MEAN !== null && config.BENCHMARK_OUTPUT_TOKENS_MEAN !== undefined && config.BENCHMARK_OUTPUT_TOKENS_MEAN !== '') {
|
|
148
|
+
const outputTokens = Number(config.BENCHMARK_OUTPUT_TOKENS_MEAN);
|
|
149
|
+
if (!Number.isInteger(outputTokens) || outputTokens < 1) {
|
|
150
|
+
findings.push({
|
|
151
|
+
severity: 'error',
|
|
152
|
+
operation: 'CreateAIWorkloadConfig',
|
|
153
|
+
fieldPath: 'WorkloadSpec.parameters.output_tokens_mean',
|
|
154
|
+
constraint: 'integer >= 1',
|
|
155
|
+
invalidValue: config.BENCHMARK_OUTPUT_TOKENS_MEAN,
|
|
156
|
+
remediationHint: 'BENCHMARK_OUTPUT_TOKENS_MEAN must be a positive integer (>= 1)'
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
return findings;
|
|
162
|
+
}
|
|
163
|
+
|
|
55
164
|
/**
|
|
56
165
|
* Run the full validation pipeline.
|
|
57
166
|
*
|
|
@@ -171,6 +280,20 @@ export async function run(options = {}) {
|
|
|
171
280
|
}
|
|
172
281
|
}
|
|
173
282
|
|
|
283
|
+
// Run benchmark parameter validation (Requirements 8.1, 8.2, 8.3)
|
|
284
|
+
if (config.BENCHMARK_CONCURRENCY || config.BENCHMARK_INPUT_TOKENS_MEAN ||
|
|
285
|
+
config.BENCHMARK_OUTPUT_TOKENS_MEAN || config.BENCHMARK_S3_OUTPUT_PATH) {
|
|
286
|
+
const benchmarkCheck = hasBenchmarkShape(registryPath);
|
|
287
|
+
if (benchmarkCheck.available) {
|
|
288
|
+
const benchmarkFindings = validateBenchmarkParams(config);
|
|
289
|
+
for (const finding of benchmarkFindings) {
|
|
290
|
+
report.addFinding(finding);
|
|
291
|
+
}
|
|
292
|
+
} else {
|
|
293
|
+
console.log('⚠️ Benchmark validation skipped: service model does not include AI Benchmark operations. Run `bootstrap sync-schemas` to update.');
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
174
297
|
const summary = report.getSummary();
|
|
175
298
|
|
|
176
299
|
// Load manifest for version info
|
|
@@ -213,4 +336,4 @@ export async function run(options = {}) {
|
|
|
213
336
|
return exitCode;
|
|
214
337
|
}
|
|
215
338
|
|
|
216
|
-
export default { run, parseDoConfig };
|
|
339
|
+
export default { run, parseDoConfig, validateBenchmarkParams };
|