@aws/ml-container-creator 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +86 -7
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/buildspec.yml +60 -0
  5. package/package.json +3 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +42 -2
  8. package/servers/instance-sizer/lib/instance-ranker.js +114 -10
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +15 -15
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +71 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +9 -1
  16. package/src/app.js +109 -3
  17. package/src/lib/bootstrap-command-handler.js +96 -3
  18. package/src/lib/cli-handler.js +2 -2
  19. package/src/lib/config-manager.js +117 -1
  20. package/src/lib/deployment-entry-schema.js +16 -0
  21. package/src/lib/prompt-runner.js +270 -12
  22. package/src/lib/prompts.js +288 -6
  23. package/src/lib/registry-command-handler.js +12 -0
  24. package/src/lib/schema-sync.js +31 -0
  25. package/src/lib/template-manager.js +49 -1
  26. package/src/lib/validate-runner.js +125 -2
  27. package/templates/Dockerfile +22 -2
  28. package/templates/code/cuda_compat.sh +22 -0
  29. package/templates/code/serve +3 -0
  30. package/templates/code/serving.properties +14 -0
  31. package/templates/code/start_server.sh +3 -0
  32. package/templates/diffusors/Dockerfile +2 -1
  33. package/templates/diffusors/serve +3 -0
  34. package/templates/do/README.md +33 -0
  35. package/templates/do/adapter +1214 -0
  36. package/templates/do/adapters/.gitkeep +2 -0
  37. package/templates/do/add-ic +130 -0
  38. package/templates/do/benchmark +718 -0
  39. package/templates/do/clean +593 -17
  40. package/templates/do/config +49 -4
  41. package/templates/do/deploy +513 -362
  42. package/templates/do/ic/default.conf +32 -0
  43. package/templates/do/lib/endpoint-config.sh +216 -0
  44. package/templates/do/lib/inference-component.sh +167 -0
  45. package/templates/do/lib/secrets.sh +44 -0
  46. package/templates/do/lib/wait.sh +131 -0
  47. package/templates/do/logs +107 -27
  48. package/templates/do/optimize +528 -0
  49. package/templates/do/register +119 -2
  50. package/templates/do/status +337 -0
  51. package/templates/do/test +80 -28
  52. package/templates/triton/Dockerfile +5 -0
@@ -47,6 +47,69 @@ function loadInstanceTypeRegistry() {
47
47
 
48
48
  const instanceTypeRegistry = loadInstanceTypeRegistry();
49
49
 
50
+ /**
51
+ * Load the raw instance catalog for GPU/CUDA generation lookups.
52
+ * Returns the full catalog entries keyed by instance type.
53
+ */
54
+ function loadInstanceCatalogRaw() {
55
+ try {
56
+ const raw = readFileSync(instancesCatalogPath, 'utf8');
57
+ const catalog = JSON.parse(raw);
58
+ return catalog?.catalog || {};
59
+ } catch (error) {
60
+ return {};
61
+ }
62
+ }
63
+
64
+ const instanceCatalogRaw = loadInstanceCatalogRaw();
65
+
66
+ /**
67
+ * Get the CUDA generation key for an instance type.
68
+ * Uses gpuArchitecture as the generation grouping (e.g., "Turing", "Ampere", "Hopper").
69
+ * Instances in the same generation share AMI compatibility.
70
+ * @param {string} instanceType - e.g., "ml.g5.xlarge"
71
+ * @returns {string|null} Generation key or null if not found/not GPU
72
+ */
73
+ function getInstanceCudaGeneration(instanceType) {
74
+ const entry = instanceCatalogRaw[instanceType];
75
+ if (!entry) return null;
76
+ if (entry.acceleratorType !== 'cuda') return null;
77
+ return entry.gpuArchitecture || null;
78
+ }
79
+
80
+ /**
81
+ * Filter instance choices to only include instances from the same CUDA generation
82
+ * as the first (highest-priority) instance in the list.
83
+ * @param {string[]} instanceTypes - Array of instance type strings
84
+ * @returns {{ filtered: string[], generation: string|null, removed: string[] }}
85
+ */
86
+ function filterByCudaGeneration(instanceTypes) {
87
+ if (!instanceTypes || instanceTypes.length === 0) {
88
+ return { filtered: [], generation: null, removed: [] };
89
+ }
90
+
91
+ // Find the generation of the first instance
92
+ const firstGen = getInstanceCudaGeneration(instanceTypes[0]);
93
+ if (!firstGen) {
94
+ // First instance not in catalog or not CUDA — return all (can't filter)
95
+ return { filtered: instanceTypes, generation: null, removed: [] };
96
+ }
97
+
98
+ const filtered = [];
99
+ const removed = [];
100
+ for (const it of instanceTypes) {
101
+ const gen = getInstanceCudaGeneration(it);
102
+ // Keep if same generation, or if not in catalog (don't block unknown types)
103
+ if (gen === firstGen || gen === null) {
104
+ filtered.push(it);
105
+ } else {
106
+ removed.push(it);
107
+ }
108
+ }
109
+
110
+ return { filtered, generation: firstGen, removed };
111
+ }
112
+
50
113
  /**
51
114
  * Generate pseudo-randomized project name based on framework
52
115
  * @param {string} framework - The ML framework
@@ -583,7 +646,7 @@ const modulePrompts = [
583
646
  type: 'confirm',
584
647
  name: 'includeSampleModel',
585
648
  message: 'Include sample Abalone classifier?',
586
- default: false,
649
+ default: true,
587
650
  when: (answers) => {
588
651
  const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
589
652
  const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
@@ -622,7 +685,10 @@ const modulePrompts = [
622
685
 
623
686
  // Transformers and Triton LLM backends only support hosted endpoint tests
624
687
  if (architecture === 'transformers') {
625
- return ['hosted-model-endpoint'];
688
+ return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
689
+ }
690
+ if (architecture === 'diffusors') {
691
+ return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
626
692
  }
627
693
  if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
628
694
  return ['hosted-model-endpoint'];
@@ -635,7 +701,10 @@ const modulePrompts = [
635
701
  const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
636
702
 
637
703
  if (architecture === 'transformers') {
638
- return ['hosted-model-endpoint'];
704
+ return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
705
+ }
706
+ if (architecture === 'diffusors') {
707
+ return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
639
708
  }
640
709
  if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
641
710
  return ['hosted-model-endpoint'];
@@ -692,15 +761,137 @@ const infraRegionAndTargetPrompts = [
692
761
  }
693
762
  ];
694
763
 
764
+ // Sub-phase A2: Existing endpoint prompt (only when deploymentTarget === 'realtime-inference')
765
+ const infraExistingEndpointPrompts = [
766
+ {
767
+ type: 'list',
768
+ name: 'useExistingEndpoint',
769
+ message: 'Deploy to an existing endpoint? (attach IC to running endpoint)',
770
+ choices: [
771
+ { name: 'No — create a new endpoint', value: 'no' },
772
+ { name: 'Yes — attach to an existing endpoint', value: 'yes' }
773
+ ],
774
+ default: 'no',
775
+ when: answers => answers.deploymentTarget === 'realtime-inference'
776
+ },
777
+ {
778
+ type: 'list',
779
+ name: 'existingEndpointName',
780
+ message: 'Select endpoint:',
781
+ choices: (answers) => {
782
+ const mcpChoices = answers._mcpEndpointChoices || [];
783
+ if (mcpChoices.length > 0) {
784
+ return [...mcpChoices, { name: 'Custom (enter manually)', value: 'custom' }];
785
+ }
786
+ return [{ name: 'Enter endpoint name manually', value: 'custom' }];
787
+ },
788
+ when: answers => answers.useExistingEndpoint === 'yes'
789
+ },
790
+ {
791
+ type: 'input',
792
+ name: 'customExistingEndpointName',
793
+ message: 'Enter existing endpoint name:',
794
+ validate: (input) => {
795
+ if (!input || input.trim() === '') {
796
+ return 'Endpoint name is required';
797
+ }
798
+ return true;
799
+ },
800
+ when: answers => answers.useExistingEndpoint === 'yes' && answers.existingEndpointName === 'custom'
801
+ }
802
+ ];
803
+
695
804
  // Sub-phase B: Instance type (only when deploymentTarget === 'realtime-inference')
696
805
  const infraInstancePrompts = [
806
+ // Multi-select prompt: shown when MCP sizer has choices AND deployment target is realtime-inference
807
+ // User can select 1-5 instances; selection count determines single-type vs instance-pools behavior
808
+ // Requirements: 6.4
809
+ {
810
+ type: 'checkbox',
811
+ name: 'instanceTypeSelections',
812
+ when: answers => answers.deploymentTarget === 'realtime-inference' &&
813
+ answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 1,
814
+ message: 'Select instance type(s) — select multiple for instance pools (priority = selection order, max 5):',
815
+ choices: (answers) => {
816
+ const mcpChoices = answers._mcpInstanceChoices || [];
817
+ // Show all compatible instances — CUDA generation filtering happens
818
+ // after selection to allow users to see all options and make informed choices.
819
+ // If they select instances from different generations, the post-selection
820
+ // filter (filterByCudaGeneration in prompt-runner.js) will warn and remove incompatible ones.
821
+ const choices = mcpChoices.map(instanceType => {
822
+ const entry = instanceCatalogRaw[instanceType];
823
+ const gpuInfo = entry ? `${entry.gpus} GPU${entry.gpus > 1 ? 's' : ''}, ${entry.gpuMemoryGb || '?'}GB` : '';
824
+ return {
825
+ name: gpuInfo ? `${instanceType} (${gpuInfo})` : instanceType,
826
+ value: instanceType,
827
+ short: instanceType
828
+ };
829
+ });
830
+ // Always include a "Custom Input" option at the end
831
+ choices.push({
832
+ name: 'Custom Input (enter one or comma-separated list)',
833
+ value: '__custom_input__',
834
+ short: 'Custom'
835
+ });
836
+ return choices;
837
+ },
838
+ validate: (input) => {
839
+ if (!input || input.length === 0) {
840
+ return 'Select at least one instance type';
841
+ }
842
+ if (input.length > 5) {
843
+ return 'Maximum 5 instance types allowed (API limit). Please deselect some.';
844
+ }
845
+ return true;
846
+ }
847
+ },
848
+ // Custom input prompt for multi-select: shown when user selects "Custom Input" in instanceTypeSelections
849
+ {
850
+ type: 'input',
851
+ name: 'customInstanceTypeSelections',
852
+ message: 'Enter instance type(s) — single for homogeneous, comma-separated for heterogeneous (e.g., ml.g5.xlarge or ml.g5.xlarge,ml.g5.2xlarge):',
853
+ when: answers => Array.isArray(answers.instanceTypeSelections) &&
854
+ answers.instanceTypeSelections.includes('__custom_input__'),
855
+ validate: (input) => {
856
+ if (!input || input.trim() === '') {
857
+ return 'At least one instance type is required';
858
+ }
859
+ const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
860
+ const instances = input.split(',').map(s => s.trim()).filter(s => s.length > 0);
861
+ if (instances.length === 0) {
862
+ return 'At least one instance type is required';
863
+ }
864
+ if (instances.length > 5) {
865
+ return 'Maximum 5 instance types allowed (API limit).';
866
+ }
867
+ for (const inst of instances) {
868
+ if (!instancePattern.test(inst)) {
869
+ return `Invalid instance type format: "${inst}". Expected format: ml.{family}.{size} (e.g., ml.g5.xlarge)`;
870
+ }
871
+ }
872
+ return true;
873
+ }
874
+ },
875
+ // Single-select prompt: shown when no MCP choices, or for non-realtime targets, or only 1 MCP choice
697
876
  {
698
877
  type: 'list',
699
878
  name: 'instanceType',
700
- when: answers => answers.deploymentTarget === 'realtime-inference' || answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform' || answers.deploymentTarget === 'hyperpod-eks',
879
+ when: answers => {
880
+ // Skip if multi-select was shown (realtime with multiple MCP choices)
881
+ if (answers.deploymentTarget === 'realtime-inference' &&
882
+ answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 1) {
883
+ return false;
884
+ }
885
+ return answers.deploymentTarget === 'realtime-inference' || answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform' || answers.deploymentTarget === 'hyperpod-eks';
886
+ },
701
887
  message: (answers) => {
702
888
  const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
703
-
889
+
890
+ // Skip table when MCP sizer already displayed annotated results
891
+ if (answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 0) {
892
+ return 'Select instance type:';
893
+ }
894
+
704
895
  const table = new Table({
705
896
  head: [
706
897
  chalk.cyan('Instance Type'),
@@ -1110,6 +1301,91 @@ const baseImagePrompts = [
1110
1301
  }
1111
1302
  ];
1112
1303
 
1304
+ /**
1305
+ * LoRA adapter prompts for multi-adapter serving configuration.
1306
+ * Only shown when architecture is transformers AND model server is vllm, sglang, or djl-lmi.
1307
+ * Requirements: 1.1, 1.2, 1.4
1308
+ */
1309
+ const loraPrompts = [
1310
+ {
1311
+ type: 'confirm',
1312
+ name: 'enableLora',
1313
+ message: 'Enable LoRA adapter serving?',
1314
+ default: false,
1315
+ when: (answers) => {
1316
+ const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
1317
+ const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
1318
+ if (architecture !== 'transformers') return false;
1319
+ const loraCapableServers = ['vllm', 'sglang', 'djl-lmi', 'lmi', 'djl'];
1320
+ return loraCapableServers.includes(backend);
1321
+ }
1322
+ },
1323
+ {
1324
+ type: 'number',
1325
+ name: 'maxLoras',
1326
+ message: 'Maximum concurrent LoRA adapters in GPU memory:',
1327
+ default: 30,
1328
+ when: (answers) => answers.enableLora === true
1329
+ },
1330
+ {
1331
+ type: 'number',
1332
+ name: 'maxLoraRank',
1333
+ message: 'Maximum LoRA rank:',
1334
+ default: 64,
1335
+ when: (answers) => answers.enableLora === true
1336
+ }
1337
+ ];
1338
+
1339
+ /**
1340
+ * Benchmark prompts for SageMaker AI Benchmarking (NVIDIA AIPerf)
1341
+ * Sub-prompts shown when 'sagemaker-ai-automated-benchmarking' is selected in testTypes.
1342
+ * Requirements: 2.1, 2.2, 2.3, 2.4, 2.5
1343
+ */
1344
+ const benchmarkPrompts = [
1345
+ {
1346
+ type: 'number',
1347
+ name: 'benchmarkConcurrency',
1348
+ message: 'Concurrent requests for benchmark:',
1349
+ default: 10,
1350
+ when: (answers) => answers.includeBenchmark === true
1351
+ },
1352
+ {
1353
+ type: 'number',
1354
+ name: 'benchmarkInputTokensMean',
1355
+ message: 'Mean input tokens per request:',
1356
+ default: 550,
1357
+ when: (answers) => answers.includeBenchmark === true
1358
+ },
1359
+ {
1360
+ type: 'number',
1361
+ name: 'benchmarkOutputTokensMean',
1362
+ message: 'Mean output tokens per request:',
1363
+ default: 150,
1364
+ when: (answers) => answers.includeBenchmark === true
1365
+ },
1366
+ {
1367
+ type: 'confirm',
1368
+ name: 'benchmarkStreaming',
1369
+ message: 'Enable streaming for benchmark?',
1370
+ default: true,
1371
+ when: (answers) => answers.includeBenchmark === true
1372
+ },
1373
+ {
1374
+ type: 'input',
1375
+ name: 'benchmarkRequestCount',
1376
+ message: 'Total request count (leave empty for service default):',
1377
+ default: '',
1378
+ when: (answers) => answers.includeBenchmark === true
1379
+ },
1380
+ {
1381
+ type: 'input',
1382
+ name: 'benchmarkS3OutputPath',
1383
+ message: 'Benchmark results S3 path (leave empty for auto-created bucket):',
1384
+ default: '',
1385
+ when: (answers) => answers.includeBenchmark === true
1386
+ }
1387
+ ];
1388
+
1113
1389
  export {
1114
1390
  deploymentConfigPrompts,
1115
1391
  frameworkPrompts, // Deprecated: kept for backward compatibility
@@ -1123,8 +1399,11 @@ export {
1123
1399
  hfTokenPrompts,
1124
1400
  ngcApiKeyPrompts,
1125
1401
  modulePrompts,
1402
+ loraPrompts,
1403
+ benchmarkPrompts,
1126
1404
  infrastructurePrompts,
1127
1405
  infraRegionAndTargetPrompts,
1406
+ infraExistingEndpointPrompts,
1128
1407
  infraInstancePrompts,
1129
1408
  infraAsyncPrompts,
1130
1409
  infraBatchTransformPrompts,
@@ -1134,5 +1413,8 @@ export {
1134
1413
  destinationPrompts,
1135
1414
  baseImageSearchPrompts,
1136
1415
  baseImagePrompts,
1137
- formatImageChoices
1416
+ formatImageChoices,
1417
+ filterByCudaGeneration,
1418
+ getInstanceCudaGeneration,
1419
+ instanceCatalogRaw
1138
1420
  };
@@ -150,6 +150,18 @@ export default class RegistryCommandHandler {
150
150
  }
151
151
  }
152
152
 
153
+ // Parse icList from JSON string if provided
154
+ const icListRaw = options.icList || options['ic-list'];
155
+ if (icListRaw) {
156
+ try {
157
+ entry.deployment.icList = typeof icListRaw === 'string'
158
+ ? JSON.parse(icListRaw)
159
+ : icListRaw;
160
+ } catch (err) {
161
+ console.log(`Warning: Could not parse ic-list JSON: ${err.message}`);
162
+ }
163
+ }
164
+
153
165
  try {
154
166
  const id = registry.add(entry);
155
167
  console.log('✅ Deployment entry logged successfully.');
@@ -188,6 +188,37 @@ export function loadServiceModel(serviceName, registryPath) {
188
188
  return readFileSync(modelPath, 'utf8');
189
189
  }
190
190
 
191
+ /**
192
+ * Check whether the SageMaker service model includes the CreateAIBenchmarkJob operation shape.
193
+ * Used to determine if benchmark parameter validation can be performed.
194
+ *
195
+ * @param {string} [registryPath] - Override registry path
196
+ * @returns {{ available: boolean, reason?: string }}
197
+ */
198
+ export function hasBenchmarkShape(registryPath) {
199
+ const regPath = registryPath || getRegistryPath();
200
+ const modelContent = loadServiceModel('sagemaker', regPath);
201
+
202
+ if (!modelContent) {
203
+ return { available: false, reason: 'SageMaker service model not found in registry' };
204
+ }
205
+
206
+ try {
207
+ const model = JSON.parse(modelContent);
208
+ const operations = model.operations || {};
209
+ const shapes = model.shapes || {};
210
+
211
+ // Check for the CreateAIBenchmarkJob operation or its input shape
212
+ if (operations.CreateAIBenchmarkJob || shapes.CreateAIBenchmarkJobRequest) {
213
+ return { available: true };
214
+ }
215
+
216
+ return { available: false, reason: 'service model does not include AI Benchmark operations' };
217
+ } catch {
218
+ return { available: false, reason: 'Failed to parse SageMaker service model' };
219
+ }
220
+ }
221
+
191
222
  /**
192
223
  * Store a service model in the registry.
193
224
  * @param {string} serviceName - Service name (e.g., 'sagemaker')
@@ -65,7 +65,7 @@ export default class TemplateManager {
65
65
  ],
66
66
  buildTargets: ['codebuild'],
67
67
  deploymentTargets: ['realtime-inference', 'async-inference', 'batch-transform', 'hyperpod-eks'],
68
- testTypes: ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
68
+ testTypes: ['local-model-cli', 'local-model-server', 'hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'],
69
69
  awsRegions: [
70
70
  'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2',
71
71
  'eu-west-1', 'eu-west-2', 'eu-central-1', 'eu-north-1',
@@ -134,6 +134,9 @@ export default class TemplateManager {
134
134
 
135
135
  // Validate batch transform specific fields
136
136
  this._validateBatchTransformConfig();
137
+
138
+ // Validate benchmark specific fields
139
+ this._validateBenchmarkConfig();
137
140
 
138
141
  // Validate instance type format (ml.*.*) - only for realtime-inference
139
142
  if (this.answers.instanceType && this.answers.instanceType !== 'custom') {
@@ -297,6 +300,51 @@ export default class TemplateManager {
297
300
  }
298
301
  }
299
302
 
303
+ /**
304
+ * Validates benchmark configuration parameters
305
+ * @private
306
+ * @throws {Error} If benchmark configuration is invalid
307
+ */
308
+ _validateBenchmarkConfig() {
309
+ if (!this.answers.includeBenchmark) return;
310
+
311
+ // Gate to supported architectures
312
+ const dc = this.answers.deploymentConfig;
313
+ const arch = dc ? dc.split('-')[0] : this.answers.architecture;
314
+ if (arch !== 'transformers' && arch !== 'diffusors') {
315
+ throw new Error('⚠️ Benchmarking is only supported with transformers and diffusors architectures.');
316
+ }
317
+
318
+ // Gate to supported deployment targets
319
+ if (this.answers.deploymentTarget === 'hyperpod-eks') {
320
+ throw new Error('⚠️ Benchmarking is only supported with managed-inference, async-inference, and batch-transform deployment targets');
321
+ }
322
+
323
+ // Validate numeric parameters
324
+ if (this.answers.benchmarkConcurrency !== undefined) {
325
+ if (!Number.isInteger(this.answers.benchmarkConcurrency) || this.answers.benchmarkConcurrency < 1) {
326
+ throw new Error('⚠️ benchmarkConcurrency must be an integer >= 1');
327
+ }
328
+ }
329
+ if (this.answers.benchmarkInputTokensMean !== undefined) {
330
+ if (!Number.isInteger(this.answers.benchmarkInputTokensMean) || this.answers.benchmarkInputTokensMean < 1) {
331
+ throw new Error('⚠️ benchmarkInputTokensMean must be an integer >= 1');
332
+ }
333
+ }
334
+ if (this.answers.benchmarkOutputTokensMean !== undefined) {
335
+ if (!Number.isInteger(this.answers.benchmarkOutputTokensMean) || this.answers.benchmarkOutputTokensMean < 1) {
336
+ throw new Error('⚠️ benchmarkOutputTokensMean must be an integer >= 1');
337
+ }
338
+ }
339
+
340
+ // Validate S3 path format
341
+ if (this.answers.benchmarkS3OutputPath && this.answers.benchmarkS3OutputPath.trim() !== '') {
342
+ if (!this.answers.benchmarkS3OutputPath.startsWith('s3://')) {
343
+ throw new Error('⚠️ benchmarkS3OutputPath must start with "s3://". Example: s3://my-bucket/benchmark-results/');
344
+ }
345
+ }
346
+ }
347
+
300
348
  /**
301
349
  * Validates GPU instance type requirement for GPU-requiring backends.
302
350
  * Called when deploymentConfig is present.
@@ -21,7 +21,7 @@ import SchemaValidationEngine from './schema-validation-engine.js';
21
21
  import ServiceModelParser from './service-model-parser.js';
22
22
  import CrossCuttingChecker from './cross-cutting-checker.js';
23
23
  import HuggingFaceClient from './huggingface-client.js';
24
- import { getRegistryPath, loadManifest } from './schema-sync.js';
24
+ import { getRegistryPath, loadManifest, hasBenchmarkShape } from './schema-sync.js';
25
25
 
26
26
  const __filename = fileURLToPath(import.meta.url);
27
27
  const __dirname = path.dirname(__filename);
@@ -52,6 +52,115 @@ export function parseDoConfig(configPath) {
52
52
  return config;
53
53
  }
54
54
 
55
+ /**
56
+ * Validate benchmark parameters against service model constraints.
57
+ * Called when the CreateAIBenchmarkJob shape is available in the synced schema.
58
+ *
59
+ * Validates:
60
+ * - Concurrency: integer, min 1
61
+ * - S3OutputLocation: string, starts with s3://
62
+ * - AIBenchmarkJobName: pattern ^[a-zA-Z0-9](-*[a-zA-Z0-9])*, max 63 chars
63
+ *
64
+ * Requirements: 8.1, 8.2, 8.3
65
+ *
66
+ * @param {Object} config - Parsed do/config values
67
+ * @returns {Array<Object>} Array of validation findings
68
+ */
69
+ export function validateBenchmarkParams(config) {
70
+ const findings = [];
71
+
72
+ // Validate Concurrency (integer, min 1)
73
+ if (config.BENCHMARK_CONCURRENCY !== null && config.BENCHMARK_CONCURRENCY !== undefined && config.BENCHMARK_CONCURRENCY !== '') {
74
+ const concurrency = Number(config.BENCHMARK_CONCURRENCY);
75
+ if (!Number.isInteger(concurrency) || concurrency < 1) {
76
+ findings.push({
77
+ severity: 'error',
78
+ operation: 'CreateAIBenchmarkJob',
79
+ fieldPath: 'Concurrency',
80
+ constraint: 'integer >= 1',
81
+ invalidValue: config.BENCHMARK_CONCURRENCY,
82
+ remediationHint: 'BENCHMARK_CONCURRENCY must be a positive integer (>= 1)'
83
+ });
84
+ }
85
+ }
86
+
87
+ // Validate S3OutputLocation (string, starts with s3://)
88
+ if (config.BENCHMARK_S3_OUTPUT_PATH !== null && config.BENCHMARK_S3_OUTPUT_PATH !== undefined && config.BENCHMARK_S3_OUTPUT_PATH !== '') {
89
+ const s3Path = config.BENCHMARK_S3_OUTPUT_PATH;
90
+ // Skip dynamic shell expressions (e.g., s3://...$(aws ...))
91
+ if (!s3Path.includes('$(') && !s3Path.startsWith('s3://')) {
92
+ findings.push({
93
+ severity: 'error',
94
+ operation: 'CreateAIBenchmarkJob',
95
+ fieldPath: 'OutputConfig.S3OutputLocation',
96
+ constraint: 'must start with s3://',
97
+ invalidValue: s3Path,
98
+ remediationHint: 'BENCHMARK_S3_OUTPUT_PATH must start with "s3://". Example: s3://my-bucket/benchmark-results/'
99
+ });
100
+ }
101
+ }
102
+
103
+ // Validate AIBenchmarkJobName pattern (^[a-zA-Z0-9](-*[a-zA-Z0-9])*, max 63 chars)
104
+ if (config.BENCHMARK_JOB_NAME !== null && config.BENCHMARK_JOB_NAME !== undefined && config.BENCHMARK_JOB_NAME !== '') {
105
+ const jobName = config.BENCHMARK_JOB_NAME;
106
+ // Skip dynamic shell expressions
107
+ if (!jobName.includes('$(') && !jobName.includes('${')) {
108
+ const namePattern = /^[a-zA-Z0-9](-*[a-zA-Z0-9])*$/;
109
+ if (jobName.length > 63) {
110
+ findings.push({
111
+ severity: 'error',
112
+ operation: 'CreateAIBenchmarkJob',
113
+ fieldPath: 'AIBenchmarkJobName',
114
+ constraint: 'max 63 characters',
115
+ invalidValue: jobName,
116
+ remediationHint: 'AIBenchmarkJobName must be at most 63 characters'
117
+ });
118
+ } else if (!namePattern.test(jobName)) {
119
+ findings.push({
120
+ severity: 'error',
121
+ operation: 'CreateAIBenchmarkJob',
122
+ fieldPath: 'AIBenchmarkJobName',
123
+ constraint: 'pattern: ^[a-zA-Z0-9](-*[a-zA-Z0-9])*',
124
+ invalidValue: jobName,
125
+ remediationHint: 'AIBenchmarkJobName must start with alphanumeric and contain only alphanumeric characters and hyphens'
126
+ });
127
+ }
128
+ }
129
+ }
130
+
131
+ // Validate input tokens mean (integer, min 1)
132
+ if (config.BENCHMARK_INPUT_TOKENS_MEAN !== null && config.BENCHMARK_INPUT_TOKENS_MEAN !== undefined && config.BENCHMARK_INPUT_TOKENS_MEAN !== '') {
133
+ const inputTokens = Number(config.BENCHMARK_INPUT_TOKENS_MEAN);
134
+ if (!Number.isInteger(inputTokens) || inputTokens < 1) {
135
+ findings.push({
136
+ severity: 'error',
137
+ operation: 'CreateAIWorkloadConfig',
138
+ fieldPath: 'WorkloadSpec.parameters.prompt_input_tokens_mean',
139
+ constraint: 'integer >= 1',
140
+ invalidValue: config.BENCHMARK_INPUT_TOKENS_MEAN,
141
+ remediationHint: 'BENCHMARK_INPUT_TOKENS_MEAN must be a positive integer (>= 1)'
142
+ });
143
+ }
144
+ }
145
+
146
+ // Validate output tokens mean (integer, min 1)
147
+ if (config.BENCHMARK_OUTPUT_TOKENS_MEAN !== null && config.BENCHMARK_OUTPUT_TOKENS_MEAN !== undefined && config.BENCHMARK_OUTPUT_TOKENS_MEAN !== '') {
148
+ const outputTokens = Number(config.BENCHMARK_OUTPUT_TOKENS_MEAN);
149
+ if (!Number.isInteger(outputTokens) || outputTokens < 1) {
150
+ findings.push({
151
+ severity: 'error',
152
+ operation: 'CreateAIWorkloadConfig',
153
+ fieldPath: 'WorkloadSpec.parameters.output_tokens_mean',
154
+ constraint: 'integer >= 1',
155
+ invalidValue: config.BENCHMARK_OUTPUT_TOKENS_MEAN,
156
+ remediationHint: 'BENCHMARK_OUTPUT_TOKENS_MEAN must be a positive integer (>= 1)'
157
+ });
158
+ }
159
+ }
160
+
161
+ return findings;
162
+ }
163
+
55
164
  /**
56
165
  * Run the full validation pipeline.
57
166
  *
@@ -171,6 +280,20 @@ export async function run(options = {}) {
171
280
  }
172
281
  }
173
282
 
283
+ // Run benchmark parameter validation (Requirements 8.1, 8.2, 8.3)
284
+ if (config.BENCHMARK_CONCURRENCY || config.BENCHMARK_INPUT_TOKENS_MEAN ||
285
+ config.BENCHMARK_OUTPUT_TOKENS_MEAN || config.BENCHMARK_S3_OUTPUT_PATH) {
286
+ const benchmarkCheck = hasBenchmarkShape(registryPath);
287
+ if (benchmarkCheck.available) {
288
+ const benchmarkFindings = validateBenchmarkParams(config);
289
+ for (const finding of benchmarkFindings) {
290
+ report.addFinding(finding);
291
+ }
292
+ } else {
293
+ console.log('⚠️ Benchmark validation skipped: service model does not include AI Benchmark operations. Run `bootstrap sync-schemas` to update.');
294
+ }
295
+ }
296
+
174
297
  const summary = report.getSummary();
175
298
 
176
299
  // Load manifest for version info
@@ -213,4 +336,4 @@ export async function run(options = {}) {
213
336
  return exitCode;
214
337
  }
215
338
 
216
- export default { run, parseDoConfig };
339
+ export default { run, parseDoConfig, validateBenchmarkParams };