@aws/ml-container-creator 0.10.3 → 0.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/config/parameter-schema-v2.json +28 -1
  2. package/infra/ci-harness/lib/ci-harness-stack.ts +50 -36
  3. package/package.json +14 -5
  4. package/servers/instance-sizer/index.js +30 -17
  5. package/servers/instance-sizer/lib/instance-ranker.js +44 -0
  6. package/servers/lib/catalogs/instances.json +27 -0
  7. package/src/app.js +22 -1
  8. package/src/lib/bootstrap-command-handler.js +32 -3
  9. package/src/lib/config-validator.js +1 -1
  10. package/src/lib/generated/cli-options.js +7 -2
  11. package/src/lib/generated/parameter-matrix.js +16 -5
  12. package/src/lib/generated/validation-rules.js +7 -3
  13. package/src/lib/path-prover-brain.js +58 -1
  14. package/src/lib/prompts/infrastructure-prompts.js +2 -2
  15. package/src/lib/prompts/model-prompts.js +6 -0
  16. package/src/lib/prove-pipeline-executor.js +294 -0
  17. package/src/lib/secrets-prompt-runner.js +4 -0
  18. package/src/lib/template-manager.js +1 -1
  19. package/src/lib/template-variable-resolver.js +62 -0
  20. package/templates/do/README.md +37 -0
  21. package/templates/do/adapter +8 -0
  22. package/templates/do/build +8 -0
  23. package/templates/do/clean.d/async-inference.ejs +8 -0
  24. package/templates/do/clean.d/batch-transform.ejs +8 -0
  25. package/templates/do/clean.d/hyperpod-eks.ejs +8 -0
  26. package/templates/do/clean.d/managed-inference.ejs +8 -0
  27. package/templates/do/config +12 -45
  28. package/templates/do/deploy.d/async-inference.ejs +33 -3
  29. package/templates/do/deploy.d/batch-transform.ejs +32 -3
  30. package/templates/do/deploy.d/hyperpod-eks.ejs +7 -0
  31. package/templates/do/deploy.d/managed-inference.ejs +27 -3
  32. package/templates/do/lib/endpoint-config.sh +1 -1
  33. package/templates/do/lib/profile.sh +44 -0
  34. package/templates/do/lib/staged-assets.sh +217 -0
  35. package/templates/do/push +8 -0
  36. package/templates/do/register +8 -0
  37. package/templates/do/stage +569 -0
  38. package/templates/do/submit +10 -0
  39. package/templates/do/test +1 -0
  40. package/templates/do/tune +7 -0
@@ -174,7 +174,7 @@
174
174
  "configKey": "instanceType",
175
175
  "default": null,
176
176
  "validation": {
177
- "pattern": "^ml\\.[a-z0-9]+\\.[a-z0-9]+$"
177
+ "pattern": "^ml\\.[a-z0-9-]+\\.[a-z0-9]+$"
178
178
  },
179
179
  "phase": "infrastructure",
180
180
  "group": "infrastructure",
@@ -1238,6 +1238,33 @@
1238
1238
  "deprecated": false,
1239
1239
  "since": "0.4.0"
1240
1240
  },
1241
+ "capacityReservationArn": {
1242
+ "type": "string",
1243
+ "description": "Capacity reservation ARN (FTP or ODCR) for reserved instance deployment",
1244
+ "cliFlag": "--capacity-reservation-arn",
1245
+ "cliArgName": "arn",
1246
+ "envVar": "ML_CAPACITY_RESERVATION_ARN",
1247
+ "templateVar": "capacityReservationArn",
1248
+ "configKey": "capacityReservationArn",
1249
+ "default": null,
1250
+ "validation": {
1251
+ "pattern": "^arn:aws:sagemaker:"
1252
+ },
1253
+ "phase": "infrastructure",
1254
+ "group": "endpoint",
1255
+ "appliesTo": {
1256
+ "deploymentTargets": [
1257
+ "managed-inference"
1258
+ ],
1259
+ "architectures": [
1260
+ "*"
1261
+ ]
1262
+ },
1263
+ "widget": null,
1264
+ "prompt": null,
1265
+ "deprecated": false,
1266
+ "since": "0.11.0"
1267
+ },
1241
1268
  "icCpuCount": {
1242
1269
  "type": "number",
1243
1270
  "description": "vCPUs allocated to the inference component",
@@ -983,43 +983,57 @@ export class MlccCiHarnessStack extends cdk.Stack {
983
983
  },
984
984
  storageDescriptor: {
985
985
  columns: [
986
- // Core dimensions
987
- { name: 'config_id', type: 'string', comment: 'SHA-256 hash (16 chars), join key with DynamoDB' },
988
- { name: 'model_name', type: 'string', comment: 'HuggingFace model ID (e.g., Qwen/Qwen3-4B)' },
989
- { name: 'model_family', type: 'string', comment: 'Derived: qwen3, llama3, deepseek-r1, etc.' },
990
- { name: 'instance_type', type: 'string', comment: 'SageMaker instance (e.g., ml.g5.xlarge)' },
991
- { name: 'instance_family', type: 'string', comment: 'Derived: g5, g6, g6e, p5, trn2, etc.' },
992
- { name: 'deployment_config', type: 'string', comment: 'Architecture-backend (e.g., transformers-vllm)' },
993
- { name: 'deployment_target', type: 'string', comment: 'realtime-inference, async-inference, etc.' },
994
- { name: 'run_timestamp', type: 'string', comment: 'When this benchmark ran (ISO 8601 UTC)' },
995
- // Configuration dimensions
996
- { name: 'tensor_parallel_degree', type: 'int', comment: 'TP degree (1, 2, 4, 8)' },
997
- { name: 'quantization', type: 'string', comment: 'Quantization method (fp16, fp8, awq, gptq, none)' },
998
- { name: 'enable_lora', type: 'boolean', comment: 'Whether LoRA adapters were enabled' },
999
- { name: 'base_image', type: 'string', comment: 'Container base image (e.g., vllm/vllm-openai:v0.8.5)' },
1000
- { name: 'base_image_version', type: 'string', comment: 'Extracted tag from base image' },
1001
- { name: 'mcc_version', type: 'string', comment: 'MCC generator version that produced the project' },
1002
- // Workload dimensions
1003
- { name: 'concurrency', type: 'int', comment: 'Number of concurrent requests in this measurement' },
1004
- { name: 'input_tokens_mean', type: 'int', comment: 'Mean input token count for workload' },
1005
- { name: 'output_tokens_mean', type: 'int', comment: 'Mean output token count for workload' },
1006
- { name: 'duration_seconds', type: 'int', comment: 'Benchmark duration in seconds' },
1007
- // Result metrics
1008
- { name: 'ttft_p50_ms', type: 'double', comment: 'Time to first token, 50th percentile (ms)' },
1009
- { name: 'ttft_p99_ms', type: 'double', comment: 'Time to first token, 99th percentile (ms)' },
1010
- { name: 'itl_p50_ms', type: 'double', comment: 'Inter-token latency, 50th percentile (ms)' },
1011
- { name: 'itl_p99_ms', type: 'double', comment: 'Inter-token latency, 99th percentile (ms)' },
1012
- { name: 'throughput_rps', type: 'double', comment: 'Requests per second at this concurrency' },
1013
- { name: 'tokens_per_second', type: 'double', comment: 'Output tokens per second' },
1014
- { name: 'cost_per_1m_tokens', type: 'double', comment: 'Estimated cost per 1M output tokens (USD)' },
1015
- { name: 'error_rate', type: 'double', comment: 'Fraction of requests that failed (0.0-1.0)' },
1016
- { name: 'status', type: 'string', comment: 'completed, failed, timeout, unfeasible' },
986
+ // Identity & config (matches Parquet writer get_parquet_schema() exactly)
987
+ { name: 'project_name', type: 'string', comment: 'MCC project name' },
988
+ { name: 'model_name', type: 'string', comment: 'HuggingFace model ID' },
989
+ { name: 'model_family', type: 'string', comment: 'Derived: qwen3, llama3, deepseek-r1' },
990
+ { name: 'instance_type', type: 'string', comment: 'SageMaker instance type' },
991
+ { name: 'deployment_config', type: 'string', comment: 'Architecture-backend' },
992
+ { name: 'deployment_target', type: 'string', comment: 'Deployment target' },
993
+ { name: 'quantization', type: 'string', comment: 'none, fp8, awq, gptq' },
994
+ { name: 'tensor_parallel_degree', type: 'int', comment: 'TP degree' },
995
+ { name: 'serving_config', type: 'string', comment: 'Full serving config JSON blob' },
996
+ { name: 'workload', type: 'string', comment: 'Named workload profile' },
997
+ { name: 'concurrency', type: 'int', comment: 'Concurrent requests' },
998
+ { name: 'input_tokens_mean', type: 'int', comment: 'Mean input tokens' },
999
+ { name: 'output_tokens_mean', type: 'int', comment: 'Mean output tokens' },
1000
+ { name: 'streaming', type: 'boolean', comment: 'Streaming enabled' },
1001
+ { name: 'duration_seconds', type: 'int', comment: 'Duration in seconds' },
1002
+ // Throughput metrics
1003
+ { name: 'request_throughput_rps', type: 'double', comment: 'Requests/sec' },
1004
+ { name: 'total_token_throughput_tps', type: 'double', comment: 'Total tokens/sec (in+out)' },
1005
+ { name: 'output_token_throughput_tps', type: 'double', comment: 'Output tokens/sec' },
1006
+ { name: 'request_count', type: 'double', comment: 'Total requests in run' },
1007
+ // Latency metrics (avg/p50/p90/p99)
1008
+ { name: 'ttft_avg_ms', type: 'double', comment: 'TTFT average (ms)' },
1009
+ { name: 'ttft_p50_ms', type: 'double', comment: 'TTFT p50 (ms)' },
1010
+ { name: 'ttft_p90_ms', type: 'double', comment: 'TTFT p90 (ms)' },
1011
+ { name: 'ttft_p99_ms', type: 'double', comment: 'TTFT p99 (ms)' },
1012
+ { name: 'itl_avg_ms', type: 'double', comment: 'ITL average (ms)' },
1013
+ { name: 'itl_p50_ms', type: 'double', comment: 'ITL p50 (ms)' },
1014
+ { name: 'itl_p90_ms', type: 'double', comment: 'ITL p90 (ms)' },
1015
+ { name: 'itl_p99_ms', type: 'double', comment: 'ITL p99 (ms)' },
1016
+ { name: 'e2e_latency_avg_ms', type: 'double', comment: 'E2E latency average (ms)' },
1017
+ { name: 'e2e_latency_p50_ms', type: 'double', comment: 'E2E latency p50 (ms)' },
1018
+ { name: 'e2e_latency_p90_ms', type: 'double', comment: 'E2E latency p90 (ms)' },
1019
+ { name: 'e2e_latency_p99_ms', type: 'double', comment: 'E2E latency p99 (ms)' },
1020
+ { name: 'prefill_tps_avg', type: 'double', comment: 'Prefill throughput avg (tokens/sec)' },
1021
+ { name: 'prefill_tps_p50', type: 'double', comment: 'Prefill throughput p50' },
1022
+ { name: 'output_token_tps_avg', type: 'double', comment: 'Per-user output TPS avg' },
1023
+ { name: 'output_token_tps_p50', type: 'double', comment: 'Per-user output TPS p50' },
1024
+ { name: 'output_token_tps_p90', type: 'double', comment: 'Per-user output TPS p90' },
1025
+ { name: 'ttst_p50_ms', type: 'double', comment: 'Time to second token p50 (ms)' },
1026
+ { name: 'ttst_p90_ms', type: 'double', comment: 'Time to second token p90 (ms)' },
1027
+ { name: 'output_sequence_length_avg', type: 'double', comment: 'Avg output sequence length' },
1028
+ { name: 'input_sequence_length_avg', type: 'double', comment: 'Avg input sequence length' },
1029
+ { name: 'error_rate', type: 'double', comment: 'Error rate (0.0-1.0)' },
1030
+ { name: 'benchmark_duration_sec', type: 'double', comment: 'Wall-clock duration (sec)' },
1017
1031
  // Provenance
1018
- { name: 'run_type', type: 'string', comment: 'Source: ci, path_prove, optimization, manual' },
1019
- { name: 'ci_run_id', type: 'string', comment: 'Step Functions execution ID or CodeBuild build ID' },
1020
- { name: 'ci_stage', type: 'string', comment: 'stage2-benchmark' },
1021
- { name: 'benchmark_job_name', type: 'string', comment: 'SageMaker AI Benchmark job name' },
1022
- { name: 'account_id', type: 'string', comment: 'AWS account ID' },
1032
+ { name: 'run_type', type: 'string', comment: 'ci, path_prove, manual' },
1033
+ { name: 'benchmark_job_name', type: 'string', comment: 'SageMaker benchmark job name' },
1034
+ { name: 'mcc_version', type: 'string', comment: 'MCC version' },
1035
+ { name: 'run_timestamp', type: 'string', comment: 'ISO 8601 UTC timestamp' },
1036
+ { name: 'region', type: 'string', comment: 'AWS region' },
1023
1037
  ],
1024
1038
  location: `s3://mlcc-benchmark-results-${this.account}-${this.region}/results/`,
1025
1039
  inputFormat: 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aws/ml-container-creator",
3
- "version": "0.10.3",
3
+ "version": "0.13.3",
4
4
  "description": "Build and deploy custom ML containers on AWS SageMaker with minimal configuration.",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -85,8 +85,8 @@
85
85
  "npm": ">=11.6.2"
86
86
  },
87
87
  "scripts": {
88
- "test": "mocha 'test/**/*.test.js' --recursive --timeout 30000",
89
- "test:property": "mocha 'test/property/**/*.test.js' --recursive --timeout 60000",
88
+ "test": "mocha 'test/**/*.test.js' --ignore 'test/property/**' --recursive --timeout 30000 --parallel",
89
+ "test:property": "mocha 'test/property/**/*.test.js' --recursive --timeout 60000 --parallel",
90
90
  "test:all": "npm run test && npm run test:property",
91
91
  "test:fast": "mocha 'test/**/*.test.js' --recursive --timeout 15000 --parallel",
92
92
  "test:unit": "mocha 'test/unit/**/*.test.js' --recursive --timeout 15000",
@@ -98,10 +98,11 @@
98
98
  "test:perf": "node scripts/analyze-test-performance.js",
99
99
  "lint": "eslint src/ servers/ bin/ --ext .js,.cjs,.mjs",
100
100
  "lint:fix": "eslint src/ servers/ bin/ --ext .js,.cjs,.mjs --fix",
101
- "codegen": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js && node scripts/codegen-parameter-matrix.js",
101
+ "codegen": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js && node scripts/codegen-parameter-matrix.js && eslint src/lib/generated/ --fix",
102
102
  "validate:doc-commands": "node scripts/validate-docs-commands.js",
103
103
  "sbom": "sbom --format spdx --output sbom.json",
104
- "prepublishOnly": "npm run lint && npm run test:all"
104
+ "prepublishOnly": "npm run lint && npm run test:all",
105
+ "prepare": "husky || true"
105
106
  },
106
107
  "dependencies": {
107
108
  "@inquirer/prompts": "^8.4.2",
@@ -119,11 +120,19 @@
119
120
  "@aws-sdk/client-service-quotas": "^3.700.0",
120
121
  "@microsoft/eslint-formatter-sarif": "^3.1.0",
121
122
  "eslint": "^8.57.0",
123
+ "eslint-plugin-property-test-rules": "file:eslint-rules",
122
124
  "fast-check": "^4.5.2",
125
+ "husky": "^9.1.7",
123
126
  "license-report": "^6.8.0",
127
+ "lint-staged": "^17.0.7",
124
128
  "mocha": "^10.2.0",
125
129
  "npm-force-resolutions": "^0.0.10",
126
130
  "nyc": "^15.1.0",
127
131
  "sbom": "^0.0.0"
132
+ },
133
+ "lint-staged": {
134
+ "*.js": [
135
+ "eslint --fix --quiet --max-warnings 0"
136
+ ]
128
137
  }
129
138
  }
@@ -327,31 +327,44 @@ async function handleGetInstanceRecommendation(params) {
327
327
  // If model metadata cannot be resolved, return all GPU instances unfiltered
328
328
  if (!modelMetadata) {
329
329
  log(`Model metadata not found for "${modelName}", returning unfiltered GPU instances`);
330
- const allGpuInstances = Object.keys(effectiveCatalog)
330
+ let unfilteredRecs = Object.keys(effectiveCatalog)
331
331
  .filter(key => effectiveCatalog[key].category === 'gpu')
332
- .slice(0, limit);
332
+ .slice(0, limit)
333
+ .map(instanceType => ({
334
+ instanceType,
335
+ gpuCount: effectiveCatalog[instanceType]?.gpus || 0,
336
+ totalVramGb: null,
337
+ utilizationPercent: null,
338
+ tensorParallelism: null,
339
+ costTier: null
340
+ }));
341
+
342
+ // Still apply availability ranking so quota/FTP info is displayed
343
+ if (DISCOVER_MODE && unfilteredRecs.length > 0) {
344
+ try {
345
+ const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION;
346
+ const quotaResolver = new QuotaResolver(region);
347
+ const instanceTypes = unfilteredRecs.map(r => r.instanceType);
348
+ const [quotas, reservations, ftps] = await Promise.allSettled([
349
+ quotaResolver.getQuotaHeadroom(instanceTypes),
350
+ quotaResolver.getCapacityReservations(),
351
+ quotaResolver.getTrainingPlans()
352
+ ]);
353
+ unfilteredRecs = applyAvailabilityRanking(unfilteredRecs, quotas.status === 'fulfilled' ? quotas.value : null, reservations.status === 'fulfilled' ? reservations.value : null, ftps.status === 'fulfilled' ? ftps.value : null);
354
+ } catch (err) {
355
+ log(`Quota resolution skipped (unfiltered path): ${err.message}`);
356
+ }
357
+ }
333
358
 
334
359
  return {
335
360
  content: [{
336
361
  type: 'text',
337
362
  text: JSON.stringify({
338
- values: { instanceType: allGpuInstances[0] || null },
339
- choices: { instanceType: allGpuInstances },
363
+ values: { instanceType: unfilteredRecs[0]?.instanceType || null },
364
+ choices: { instanceType: unfilteredRecs.map(r => r.instanceType) },
340
365
  metadata: {
341
366
  modelName,
342
- parameterCount: null,
343
- dtype: null,
344
- quantization: quantization || null,
345
- estimatedVramGb: null,
346
- vramBreakdown: null,
347
- recommendations: allGpuInstances.map(instanceType => ({
348
- instanceType,
349
- gpuCount: effectiveCatalog[instanceType]?.gpus || 0,
350
- totalVramGb: null,
351
- utilizationPercent: null,
352
- tensorParallelism: null,
353
- costTier: null
354
- })),
367
+ recommendations: unfilteredRecs,
355
368
  source: 'unfiltered',
356
369
  cudaVersionFilter: cudaVersion || null,
357
370
  warning: `Could not resolve model metadata for "${modelName}". Returning all GPU instances without filtering.`
@@ -343,6 +343,50 @@ const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) =
343
343
  }
344
344
  }
345
345
 
346
+ // Inject FTP/reserved instances that aren't already in the recommendation list.
347
+ // These instances may not be in the static catalog (e.g., ml.p6-b200.48xlarge)
348
+ // but are available via capacity reservation — always surface them.
349
+ const existingTypes = new Set(recommendations.map(r => r.instanceType));
350
+
351
+ if (reservations) {
352
+ for (const [instanceType, info] of reservations) {
353
+ if (!existingTypes.has(instanceType)) {
354
+ recommendations.push({
355
+ instanceType,
356
+ capacityType: 'reserved',
357
+ reservationInfo: info,
358
+ reservationType: 'training-plan',
359
+ quotaStatus: 'available',
360
+ gpuCount: null,
361
+ totalVramGb: null,
362
+ utilizationPercent: null,
363
+ tensorParallelism: null,
364
+ costTier: null,
365
+ injectedFromReservation: true
366
+ });
367
+ }
368
+ }
369
+ }
370
+
371
+ if (ftps) {
372
+ for (const [instanceType, info] of ftps) {
373
+ if (!existingTypes.has(instanceType)) {
374
+ recommendations.push({
375
+ instanceType,
376
+ capacityType: 'ftp',
377
+ ftpInfo: info,
378
+ quotaStatus: 'available',
379
+ gpuCount: null,
380
+ totalVramGb: null,
381
+ utilizationPercent: null,
382
+ tensorParallelism: null,
383
+ costTier: null,
384
+ injectedFromFtp: true
385
+ });
386
+ }
387
+ }
388
+ }
389
+
346
390
  // Filter out zero-quota instances (but never filter reserved/FTP — you have the capacity)
347
391
  const filtered = recommendations.filter(r =>
348
392
  r.quotaStatus !== 'zero-quota' || r.capacityType === 'reserved' || r.capacityType === 'ftp'
@@ -228,6 +228,33 @@
228
228
  "gpuMemoryGb": 24,
229
229
  "gpuType": "NVIDIA A10G",
230
230
  "costTier": "medium"
231
+ },
232
+ "ml.p6-b200.48xlarge": {
233
+ "category": "gpu",
234
+ "gpus": 8,
235
+ "vcpus": 192,
236
+ "memGb": 1536,
237
+ "accelerator": "8x B200 1440GB",
238
+ "cudaVersions": [
239
+ "12.4",
240
+ "12.6"
241
+ ],
242
+ "tags": [
243
+ "gpu",
244
+ "multi-gpu",
245
+ "b200",
246
+ "cuda-12",
247
+ "high-performance"
248
+ ],
249
+ "family": "p6",
250
+ "acceleratorType": "cuda",
251
+ "hardware": "NVIDIA B200",
252
+ "gpuArchitecture": "Blackwell",
253
+ "defaultCudaVersion": "12.6",
254
+ "notes": "8x NVIDIA B200 GPUs (1440GB total). Next-gen Blackwell architecture",
255
+ "gpuMemoryGb": 180,
256
+ "gpuType": "NVIDIA B200",
257
+ "costTier": "high"
231
258
  }
232
259
  },
233
260
  "recommendations": {
package/src/app.js CHANGED
@@ -377,6 +377,12 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
377
377
  ignorePatterns.push('**/do/lib/feedback.sh');
378
378
  }
379
379
 
380
+ // Exclude do/stage when model is already S3-sourced (nothing to stage)
381
+ const modelName = answers.modelName || answers.customModelName || '';
382
+ if (answers.modelSource === 's3' || modelName.startsWith('s3://')) {
383
+ ignorePatterns.push('**/do/stage');
384
+ }
385
+
380
386
  // Exclude do/test when hosted-model-endpoint is not selected
381
387
  const testTypes = answers.testTypes || [];
382
388
  if (!testTypes.includes('hosted-model-endpoint')) {
@@ -556,6 +562,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
556
562
  fs.writeFileSync(gitignorePath, gitignoreContent);
557
563
  }
558
564
  }
565
+
566
+ // Add .mlcc/ to .gitignore (staged-assets tracking — account-specific URIs)
567
+ {
568
+ const gitignorePath = path.join(destDir, '.gitignore');
569
+ const mlccIgnore = '# Staged assets tracking (account-specific, generated by do/stage)\n.mlcc/\n';
570
+ if (fs.existsSync(gitignorePath)) {
571
+ const existing = fs.readFileSync(gitignorePath, 'utf8');
572
+ if (!existing.includes('.mlcc/')) {
573
+ fs.appendFileSync(gitignorePath, `\n${mlccIgnore}`);
574
+ }
575
+ } else {
576
+ fs.writeFileSync(gitignorePath, mlccIgnore);
577
+ }
578
+ }
559
579
  }
560
580
 
561
581
  /**
@@ -818,7 +838,8 @@ function _setExecutablePermissions(destDir, answers = {}) {
818
838
  'do/add-ic',
819
839
  'do/adapter',
820
840
  'do/tune',
821
- 'do/train'
841
+ 'do/train',
842
+ 'do/stage'
822
843
  ];
823
844
 
824
845
  const shellScripts = architecture === 'marketplace' ? marketplaceScripts : defaultScripts;
@@ -459,8 +459,22 @@ export default class BootstrapCommandHandler {
459
459
 
460
460
  // --no-rollback prevents rollback on AlreadyExists errors for IAM roles
461
461
  // that may pre-exist from a prior deployment or another region.
462
+ // Check if benchmark bucket already exists (from a prior torn-down stack with RETAIN policy)
463
+ let importBucketCtx = '';
464
+ if (options.benchmarkInfra) {
465
+ try {
466
+ execSync(
467
+ `aws s3api head-bucket --bucket mlcc-benchmark-results-${profileData.accountId}-${profileData.awsRegion}${profileData.awsProfile ? ` --profile ${profileData.awsProfile}` : ''} --region ${profileData.awsRegion}`,
468
+ { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
469
+ );
470
+ importBucketCtx = ' -c importExistingBenchmarkBucket=true';
471
+ console.log(' ℹ️ Benchmark results bucket already exists — importing into stack');
472
+ } catch {
473
+ // Bucket doesn't exist — will be created fresh
474
+ }
475
+ }
462
476
  const cdkDeployCmd = options.benchmarkInfra
463
- ? 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true'
477
+ ? `npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true${importBucketCtx}`
464
478
  : 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
465
479
  execSync(
466
480
  cdkDeployCmd,
@@ -634,8 +648,22 @@ export default class BootstrapCommandHandler {
634
648
 
635
649
  // --no-rollback prevents rollback on AlreadyExists errors for IAM roles
636
650
  // that may pre-exist from a prior deployment or another region.
651
+ // Check if benchmark bucket already exists (from a prior torn-down stack with RETAIN policy)
652
+ let updateImportBucketCtx = '';
653
+ if (options.benchmarkInfra || profileConfig.benchmarkInfraProvisioned) {
654
+ try {
655
+ execSync(
656
+ `aws s3api head-bucket --bucket mlcc-benchmark-results-${profileConfig.accountId}-${profileConfig.awsRegion}${profileConfig.awsProfile ? ` --profile ${profileConfig.awsProfile}` : ''} --region ${profileConfig.awsRegion}`,
657
+ { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
658
+ );
659
+ updateImportBucketCtx = ' -c importExistingBenchmarkBucket=true';
660
+ console.log(' ℹ️ Benchmark results bucket already exists — importing into stack');
661
+ } catch {
662
+ // Bucket doesn't exist — will be created fresh
663
+ }
664
+ }
637
665
  const updateCdkCmd = (options.benchmarkInfra || profileConfig.benchmarkInfraProvisioned)
638
- ? 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true'
666
+ ? `npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true${updateImportBucketCtx}`
639
667
  : 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
640
668
  execSync(
641
669
  updateCdkCmd,
@@ -989,7 +1017,8 @@ export default class BootstrapCommandHandler {
989
1017
  * @returns {object} Parsed JSON output
990
1018
  */
991
1019
  _execAws(command, profile) {
992
- const fullCommand = `aws ${command} --profile ${profile} --output json`;
1020
+ const profileFlag = profile ? `--profile ${profile}` : '';
1021
+ const fullCommand = `aws ${command} ${profileFlag} --output json`.replace(/\s+/g, ' ').trim();
993
1022
  const output = execSync(fullCommand, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] });
994
1023
  const trimmed = output.trim();
995
1024
  if (!trimmed) {
@@ -361,7 +361,7 @@ export default class ConfigValidator {
361
361
 
362
362
  case 'instanceType':
363
363
  if (value) {
364
- const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
364
+ const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
365
365
  if (!instancePattern.test(value)) {
366
366
  throw new ValidationError(
367
367
  `Invalid instance type format: ${value}. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)`,
@@ -1,6 +1,6 @@
1
1
  // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
2
2
  // Source: config/parameter-schema-v2.json
3
- // Generated: 2026-06-10T13:42:40.974Z
3
+ // Generated: 2026-06-12T22:03:00.429Z
4
4
 
5
5
  /**
6
6
  * CLI option definitions derived from parameter-schema-v2.json.
@@ -84,7 +84,7 @@ export const cliOptions = [
84
84
  },
85
85
  {
86
86
  'flag': '--include-benchmark',
87
- 'description': 'Include SageMaker AI Benchmarking',
87
+ 'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
88
88
  'defaultValue': false
89
89
  },
90
90
  {
@@ -244,6 +244,10 @@ export const cliOptions = [
244
244
  'flag': '--endpoint-volume-size <gb>',
245
245
  'description': 'ML storage volume size in GB'
246
246
  },
247
+ {
248
+ 'flag': '--capacity-reservation-arn <arn>',
249
+ 'description': 'Capacity reservation ARN (FTP or ODCR) for reserved instance deployment'
250
+ },
247
251
  {
248
252
  'flag': '--ic-cpu-count <n>',
249
253
  'description': 'vCPUs allocated to the inference component'
@@ -439,6 +443,7 @@ export const helpGroups = {
439
443
  '--endpoint-data-capture-percent': 'endpoint',
440
444
  '--endpoint-variant-name': 'endpoint',
441
445
  '--endpoint-volume-size': 'endpoint',
446
+ '--capacity-reservation-arn': 'endpoint',
442
447
  '--ic-cpu-count': 'ic',
443
448
  '--ic-model-weight': 'ic',
444
449
  '--async-s3-output-path': 'async',
@@ -1,6 +1,6 @@
1
1
  // AUTO-GENERATED by scripts/codegen-parameter-matrix.js — DO NOT EDIT
2
2
  // Source: config/parameter-schema-v2.json
3
- // Generated: 2026-06-10T13:42:41.083Z
3
+ // Generated: 2026-06-12T22:03:00.552Z
4
4
 
5
5
  /**
6
6
  * Parameter matrix defining how each parameter is loaded from various sources.
@@ -148,7 +148,7 @@ export const parameterMatrix = {
148
148
  'configFile': true,
149
149
  'packageJson': false,
150
150
  'mcp': false,
151
- 'promptable': true,
151
+ 'promptable': false,
152
152
  'required': false,
153
153
  'default': 10,
154
154
  'valueSpace': 'unbounded'
@@ -159,7 +159,7 @@ export const parameterMatrix = {
159
159
  'configFile': true,
160
160
  'packageJson': false,
161
161
  'mcp': false,
162
- 'promptable': true,
162
+ 'promptable': false,
163
163
  'required': false,
164
164
  'default': 550,
165
165
  'valueSpace': 'unbounded'
@@ -170,7 +170,7 @@ export const parameterMatrix = {
170
170
  'configFile': true,
171
171
  'packageJson': false,
172
172
  'mcp': false,
173
- 'promptable': true,
173
+ 'promptable': false,
174
174
  'required': false,
175
175
  'default': 150,
176
176
  'valueSpace': 'unbounded'
@@ -181,7 +181,7 @@ export const parameterMatrix = {
181
181
  'configFile': true,
182
182
  'packageJson': false,
183
183
  'mcp': false,
184
- 'promptable': true,
184
+ 'promptable': false,
185
185
  'required': false,
186
186
  'default': true,
187
187
  'valueSpace': 'bounded'
@@ -358,6 +358,17 @@ export const parameterMatrix = {
358
358
  'valueSpace': 'unbounded',
359
359
  'schemaValidated': true
360
360
  },
361
+ 'capacityReservationArn': {
362
+ 'cliOption': 'capacity-reservation-arn',
363
+ 'envVar': 'ML_CAPACITY_RESERVATION_ARN',
364
+ 'configFile': true,
365
+ 'packageJson': false,
366
+ 'mcp': false,
367
+ 'promptable': false,
368
+ 'required': false,
369
+ 'default': null,
370
+ 'valueSpace': 'unbounded'
371
+ },
361
372
  'icCpuCount': {
362
373
  'cliOption': 'ic-cpu-count',
363
374
  'envVar': 'ML_IC_CPU_COUNT',
@@ -1,6 +1,6 @@
1
1
  // AUTO-GENERATED by scripts/codegen-validator.js — DO NOT EDIT
2
2
  // Source: config/parameter-schema-v2.json
3
- // Generated: 2026-06-10T13:42:41.011Z
3
+ // Generated: 2026-06-12T22:03:00.468Z
4
4
 
5
5
  /**
6
6
  * Validation rules derived from parameter-schema-v2.json.
@@ -26,7 +26,7 @@ export const validationRules = {
26
26
  return null;
27
27
  },
28
28
  'instanceType': (value) => {
29
- if (!new RegExp('^ml\\.[a-z0-9]+\\.[a-z0-9]+$').test(value)) return 'instanceType does not match required pattern';
29
+ if (!new RegExp('^ml\\.[a-z0-9-]+\\.[a-z0-9]+$').test(value)) return 'instanceType does not match required pattern';
30
30
  return null;
31
31
  },
32
32
  'icGpuCount': (value) => {
@@ -132,6 +132,10 @@ export const validationRules = {
132
132
  if (value > 16384) return `endpointVolumeSize must be <= 16384, got ${value}`;
133
133
  return null;
134
134
  },
135
+ 'capacityReservationArn': (value) => {
136
+ if (!new RegExp('^arn:aws:sagemaker:').test(value)) return 'capacityReservationArn does not match required pattern';
137
+ return null;
138
+ },
135
139
  'icCpuCount': (value) => {
136
140
  if (value < 0.25) return `icCpuCount must be >= 0.25, got ${value}`;
137
141
  if (value > 768) return `icCpuCount must be <= 768, got ${value}`;
@@ -199,4 +203,4 @@ export const validationRules = {
199
203
  }
200
204
  };
201
205
 
202
- // 43 parameters have validation rules
206
+ // 44 parameters have validation rules