npm - @aws/ml-container-creator - Versions diffs - 0.10.3 → 0.13.3 - Mend

@aws/ml-container-creator 0.10.3 → 0.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/config/parameter-schema-v2.json +28 -1
package/infra/ci-harness/lib/ci-harness-stack.ts +50 -36
package/package.json +14 -5
package/servers/instance-sizer/index.js +30 -17
package/servers/instance-sizer/lib/instance-ranker.js +44 -0
package/servers/lib/catalogs/instances.json +27 -0
package/src/app.js +22 -1
package/src/lib/bootstrap-command-handler.js +32 -3
package/src/lib/config-validator.js +1 -1
package/src/lib/generated/cli-options.js +7 -2
package/src/lib/generated/parameter-matrix.js +16 -5
package/src/lib/generated/validation-rules.js +7 -3
package/src/lib/path-prover-brain.js +58 -1
package/src/lib/prompts/infrastructure-prompts.js +2 -2
package/src/lib/prompts/model-prompts.js +6 -0
package/src/lib/prove-pipeline-executor.js +294 -0
package/src/lib/secrets-prompt-runner.js +4 -0
package/src/lib/template-manager.js +1 -1
package/src/lib/template-variable-resolver.js +62 -0
package/templates/do/README.md +37 -0
package/templates/do/adapter +8 -0
package/templates/do/build +8 -0
package/templates/do/clean.d/async-inference.ejs +8 -0
package/templates/do/clean.d/batch-transform.ejs +8 -0
package/templates/do/clean.d/hyperpod-eks.ejs +8 -0
package/templates/do/clean.d/managed-inference.ejs +8 -0
package/templates/do/config +12 -45
package/templates/do/deploy.d/async-inference.ejs +33 -3
package/templates/do/deploy.d/batch-transform.ejs +32 -3
package/templates/do/deploy.d/hyperpod-eks.ejs +7 -0
package/templates/do/deploy.d/managed-inference.ejs +27 -3
package/templates/do/lib/endpoint-config.sh +1 -1
package/templates/do/lib/profile.sh +44 -0
package/templates/do/lib/staged-assets.sh +217 -0
package/templates/do/push +8 -0
package/templates/do/register +8 -0
package/templates/do/stage +569 -0
package/templates/do/submit +10 -0
package/templates/do/test +1 -0
package/templates/do/tune +7 -0

package/config/parameter-schema-v2.json CHANGED Viewed

@@ -174,7 +174,7 @@
             "configKey": "instanceType",
             "default": null,
             "validation": {
-                "pattern": "^ml\\.[a-z0-9]+\\.[a-z0-9]+$"
+                "pattern": "^ml\\.[a-z0-9-]+\\.[a-z0-9]+$"
             },
             "phase": "infrastructure",
             "group": "infrastructure",
@@ -1238,6 +1238,33 @@
             "deprecated": false,
             "since": "0.4.0"
         },
+        "capacityReservationArn": {
+            "type": "string",
+            "description": "Capacity reservation ARN (FTP or ODCR) for reserved instance deployment",
+            "cliFlag": "--capacity-reservation-arn",
+            "cliArgName": "arn",
+            "envVar": "ML_CAPACITY_RESERVATION_ARN",
+            "templateVar": "capacityReservationArn",
+            "configKey": "capacityReservationArn",
+            "default": null,
+            "validation": {
+                "pattern": "^arn:aws:sagemaker:"
+            },
+            "phase": "infrastructure",
+            "group": "endpoint",
+            "appliesTo": {
+                "deploymentTargets": [
+                    "managed-inference"
+                ],
+                "architectures": [
+                    "*"
+                ]
+            },
+            "widget": null,
+            "prompt": null,
+            "deprecated": false,
+            "since": "0.11.0"
+        },
         "icCpuCount": {
             "type": "number",
             "description": "vCPUs allocated to the inference component",

package/infra/ci-harness/lib/ci-harness-stack.ts CHANGED Viewed

@@ -983,43 +983,57 @@ export class MlccCiHarnessStack extends cdk.Stack {
                 },
                 storageDescriptor: {
                     columns: [
-                        // Core dimensions
-                        { name: 'config_id', type: 'string', comment: 'SHA-256 hash (16 chars), join key with DynamoDB' },
-                        { name: 'model_name', type: 'string', comment: 'HuggingFace model ID (e.g., Qwen/Qwen3-4B)' },
-                        { name: 'model_family', type: 'string', comment: 'Derived: qwen3, llama3, deepseek-r1, etc.' },
-                        { name: 'instance_type', type: 'string', comment: 'SageMaker instance (e.g., ml.g5.xlarge)' },
-                        { name: 'instance_family', type: 'string', comment: 'Derived: g5, g6, g6e, p5, trn2, etc.' },
-                        { name: 'deployment_config', type: 'string', comment: 'Architecture-backend (e.g., transformers-vllm)' },
-                        { name: 'deployment_target', type: 'string', comment: 'realtime-inference, async-inference, etc.' },
-                        { name: 'run_timestamp', type: 'string', comment: 'When this benchmark ran (ISO 8601 UTC)' },
-                        // Configuration dimensions
-                        { name: 'tensor_parallel_degree', type: 'int', comment: 'TP degree (1, 2, 4, 8)' },
-                        { name: 'quantization', type: 'string', comment: 'Quantization method (fp16, fp8, awq, gptq, none)' },
-                        { name: 'enable_lora', type: 'boolean', comment: 'Whether LoRA adapters were enabled' },
-                        { name: 'base_image', type: 'string', comment: 'Container base image (e.g., vllm/vllm-openai:v0.8.5)' },
-                        { name: 'base_image_version', type: 'string', comment: 'Extracted tag from base image' },
-                        { name: 'mcc_version', type: 'string', comment: 'MCC generator version that produced the project' },
-                        // Workload dimensions
-                        { name: 'concurrency', type: 'int', comment: 'Number of concurrent requests in this measurement' },
-                        { name: 'input_tokens_mean', type: 'int', comment: 'Mean input token count for workload' },
-                        { name: 'output_tokens_mean', type: 'int', comment: 'Mean output token count for workload' },
-                        { name: 'duration_seconds', type: 'int', comment: 'Benchmark duration in seconds' },
-                        // Result metrics
-                        { name: 'ttft_p50_ms', type: 'double', comment: 'Time to first token, 50th percentile (ms)' },
-                        { name: 'ttft_p99_ms', type: 'double', comment: 'Time to first token, 99th percentile (ms)' },
-                        { name: 'itl_p50_ms', type: 'double', comment: 'Inter-token latency, 50th percentile (ms)' },
-                        { name: 'itl_p99_ms', type: 'double', comment: 'Inter-token latency, 99th percentile (ms)' },
-                        { name: 'throughput_rps', type: 'double', comment: 'Requests per second at this concurrency' },
-                        { name: 'tokens_per_second', type: 'double', comment: 'Output tokens per second' },
-                        { name: 'cost_per_1m_tokens', type: 'double', comment: 'Estimated cost per 1M output tokens (USD)' },
-                        { name: 'error_rate', type: 'double', comment: 'Fraction of requests that failed (0.0-1.0)' },
-                        { name: 'status', type: 'string', comment: 'completed, failed, timeout, unfeasible' },
+                        // Identity & config (matches Parquet writer get_parquet_schema() exactly)
+                        { name: 'project_name', type: 'string', comment: 'MCC project name' },
+                        { name: 'model_name', type: 'string', comment: 'HuggingFace model ID' },
+                        { name: 'model_family', type: 'string', comment: 'Derived: qwen3, llama3, deepseek-r1' },
+                        { name: 'instance_type', type: 'string', comment: 'SageMaker instance type' },
+                        { name: 'deployment_config', type: 'string', comment: 'Architecture-backend' },
+                        { name: 'deployment_target', type: 'string', comment: 'Deployment target' },
+                        { name: 'quantization', type: 'string', comment: 'none, fp8, awq, gptq' },
+                        { name: 'tensor_parallel_degree', type: 'int', comment: 'TP degree' },
+                        { name: 'serving_config', type: 'string', comment: 'Full serving config JSON blob' },
+                        { name: 'workload', type: 'string', comment: 'Named workload profile' },
+                        { name: 'concurrency', type: 'int', comment: 'Concurrent requests' },
+                        { name: 'input_tokens_mean', type: 'int', comment: 'Mean input tokens' },
+                        { name: 'output_tokens_mean', type: 'int', comment: 'Mean output tokens' },
+                        { name: 'streaming', type: 'boolean', comment: 'Streaming enabled' },
+                        { name: 'duration_seconds', type: 'int', comment: 'Duration in seconds' },
+                        // Throughput metrics
+                        { name: 'request_throughput_rps', type: 'double', comment: 'Requests/sec' },
+                        { name: 'total_token_throughput_tps', type: 'double', comment: 'Total tokens/sec (in+out)' },
+                        { name: 'output_token_throughput_tps', type: 'double', comment: 'Output tokens/sec' },
+                        { name: 'request_count', type: 'double', comment: 'Total requests in run' },
+                        // Latency metrics (avg/p50/p90/p99)
+                        { name: 'ttft_avg_ms', type: 'double', comment: 'TTFT average (ms)' },
+                        { name: 'ttft_p50_ms', type: 'double', comment: 'TTFT p50 (ms)' },
+                        { name: 'ttft_p90_ms', type: 'double', comment: 'TTFT p90 (ms)' },
+                        { name: 'ttft_p99_ms', type: 'double', comment: 'TTFT p99 (ms)' },
+                        { name: 'itl_avg_ms', type: 'double', comment: 'ITL average (ms)' },
+                        { name: 'itl_p50_ms', type: 'double', comment: 'ITL p50 (ms)' },
+                        { name: 'itl_p90_ms', type: 'double', comment: 'ITL p90 (ms)' },
+                        { name: 'itl_p99_ms', type: 'double', comment: 'ITL p99 (ms)' },
+                        { name: 'e2e_latency_avg_ms', type: 'double', comment: 'E2E latency average (ms)' },
+                        { name: 'e2e_latency_p50_ms', type: 'double', comment: 'E2E latency p50 (ms)' },
+                        { name: 'e2e_latency_p90_ms', type: 'double', comment: 'E2E latency p90 (ms)' },
+                        { name: 'e2e_latency_p99_ms', type: 'double', comment: 'E2E latency p99 (ms)' },
+                        { name: 'prefill_tps_avg', type: 'double', comment: 'Prefill throughput avg (tokens/sec)' },
+                        { name: 'prefill_tps_p50', type: 'double', comment: 'Prefill throughput p50' },
+                        { name: 'output_token_tps_avg', type: 'double', comment: 'Per-user output TPS avg' },
+                        { name: 'output_token_tps_p50', type: 'double', comment: 'Per-user output TPS p50' },
+                        { name: 'output_token_tps_p90', type: 'double', comment: 'Per-user output TPS p90' },
+                        { name: 'ttst_p50_ms', type: 'double', comment: 'Time to second token p50 (ms)' },
+                        { name: 'ttst_p90_ms', type: 'double', comment: 'Time to second token p90 (ms)' },
+                        { name: 'output_sequence_length_avg', type: 'double', comment: 'Avg output sequence length' },
+                        { name: 'input_sequence_length_avg', type: 'double', comment: 'Avg input sequence length' },
+                        { name: 'error_rate', type: 'double', comment: 'Error rate (0.0-1.0)' },
+                        { name: 'benchmark_duration_sec', type: 'double', comment: 'Wall-clock duration (sec)' },
                         // Provenance
-                        { name: 'run_type', type: 'string', comment: 'Source: ci, path_prove, optimization, manual' },
-                        { name: 'ci_run_id', type: 'string', comment: 'Step Functions execution ID or CodeBuild build ID' },
-                        { name: 'ci_stage', type: 'string', comment: 'stage2-benchmark' },
-                        { name: 'benchmark_job_name', type: 'string', comment: 'SageMaker AI Benchmark job name' },
-                        { name: 'account_id', type: 'string', comment: 'AWS account ID' },
+                        { name: 'run_type', type: 'string', comment: 'ci, path_prove, manual' },
+                        { name: 'benchmark_job_name', type: 'string', comment: 'SageMaker benchmark job name' },
+                        { name: 'mcc_version', type: 'string', comment: 'MCC version' },
+                        { name: 'run_timestamp', type: 'string', comment: 'ISO 8601 UTC timestamp' },
+                        { name: 'region', type: 'string', comment: 'AWS region' },
                     ],
                     location: `s3://mlcc-benchmark-results-${this.account}-${this.region}/results/`,
                     inputFormat: 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat',

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aws/ml-container-creator",
-  "version": "0.10.3",
+  "version": "0.13.3",
   "description": "Build and deploy custom ML containers on AWS SageMaker with minimal configuration.",
   "main": "src/index.js",
   "bin": {
@@ -85,8 +85,8 @@
     "npm": ">=11.6.2"
   },
   "scripts": {
-    "test": "mocha 'test/**/*.test.js' --recursive --timeout 30000",
-    "test:property": "mocha 'test/property/**/*.test.js' --recursive --timeout 60000",
+    "test": "mocha 'test/**/*.test.js' --ignore 'test/property/**' --recursive --timeout 30000 --parallel",
+    "test:property": "mocha 'test/property/**/*.test.js' --recursive --timeout 60000 --parallel",
     "test:all": "npm run test && npm run test:property",
     "test:fast": "mocha 'test/**/*.test.js' --recursive --timeout 15000 --parallel",
     "test:unit": "mocha 'test/unit/**/*.test.js' --recursive --timeout 15000",
@@ -98,10 +98,11 @@
     "test:perf": "node scripts/analyze-test-performance.js",
     "lint": "eslint src/ servers/ bin/ --ext .js,.cjs,.mjs",
     "lint:fix": "eslint src/ servers/ bin/ --ext .js,.cjs,.mjs --fix",
-    "codegen": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js && node scripts/codegen-parameter-matrix.js",
+    "codegen": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js && node scripts/codegen-parameter-matrix.js && eslint src/lib/generated/ --fix",
     "validate:doc-commands": "node scripts/validate-docs-commands.js",
     "sbom": "sbom --format spdx --output sbom.json",
-    "prepublishOnly": "npm run lint && npm run test:all"
+    "prepublishOnly": "npm run lint && npm run test:all",
+    "prepare": "husky || true"
   },
   "dependencies": {
     "@inquirer/prompts": "^8.4.2",
@@ -119,11 +120,19 @@
     "@aws-sdk/client-service-quotas": "^3.700.0",
     "@microsoft/eslint-formatter-sarif": "^3.1.0",
     "eslint": "^8.57.0",
+    "eslint-plugin-property-test-rules": "file:eslint-rules",
     "fast-check": "^4.5.2",
+    "husky": "^9.1.7",
     "license-report": "^6.8.0",
+    "lint-staged": "^17.0.7",
     "mocha": "^10.2.0",
     "npm-force-resolutions": "^0.0.10",
     "nyc": "^15.1.0",
     "sbom": "^0.0.0"
+  },
+  "lint-staged": {
+    "*.js": [
+      "eslint --fix --quiet --max-warnings 0"
+    ]
   }
 }

package/servers/instance-sizer/index.js CHANGED Viewed

@@ -327,31 +327,44 @@ async function handleGetInstanceRecommendation(params) {
     // If model metadata cannot be resolved, return all GPU instances unfiltered
     if (!modelMetadata) {
         log(`Model metadata not found for "${modelName}", returning unfiltered GPU instances`);
-        const allGpuInstances = Object.keys(effectiveCatalog)
+        let unfilteredRecs = Object.keys(effectiveCatalog)
             .filter(key => effectiveCatalog[key].category === 'gpu')
-            .slice(0, limit);
+            .slice(0, limit)
+            .map(instanceType => ({
+                instanceType,
+                gpuCount: effectiveCatalog[instanceType]?.gpus || 0,
+                totalVramGb: null,
+                utilizationPercent: null,
+                tensorParallelism: null,
+                costTier: null
+            }));
+        // Still apply availability ranking so quota/FTP info is displayed
+        if (DISCOVER_MODE && unfilteredRecs.length > 0) {
+            try {
+                const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION;
+                const quotaResolver = new QuotaResolver(region);
+                const instanceTypes = unfilteredRecs.map(r => r.instanceType);
+                const [quotas, reservations, ftps] = await Promise.allSettled([
+                    quotaResolver.getQuotaHeadroom(instanceTypes),
+                    quotaResolver.getCapacityReservations(),
+                    quotaResolver.getTrainingPlans()
+                ]);
+                unfilteredRecs = applyAvailabilityRanking(unfilteredRecs, quotas.status === 'fulfilled' ? quotas.value : null, reservations.status === 'fulfilled' ? reservations.value : null, ftps.status === 'fulfilled' ? ftps.value : null);
+            } catch (err) {
+                log(`Quota resolution skipped (unfiltered path): ${err.message}`);
+            }
+        }
         return {
             content: [{
                 type: 'text',
                 text: JSON.stringify({
-                    values: { instanceType: allGpuInstances[0] || null },
-                    choices: { instanceType: allGpuInstances },
+                    values: { instanceType: unfilteredRecs[0]?.instanceType || null },
+                    choices: { instanceType: unfilteredRecs.map(r => r.instanceType) },
                     metadata: {
                         modelName,
-                        parameterCount: null,
-                        dtype: null,
-                        quantization: quantization || null,
-                        estimatedVramGb: null,
-                        vramBreakdown: null,
-                        recommendations: allGpuInstances.map(instanceType => ({
-                            instanceType,
-                            gpuCount: effectiveCatalog[instanceType]?.gpus || 0,
-                            totalVramGb: null,
-                            utilizationPercent: null,
-                            tensorParallelism: null,
-                            costTier: null
-                        })),
+                        recommendations: unfilteredRecs,
                         source: 'unfiltered',
                         cudaVersionFilter: cudaVersion || null,
                         warning: `Could not resolve model metadata for "${modelName}". Returning all GPU instances without filtering.`

package/servers/instance-sizer/lib/instance-ranker.js CHANGED Viewed

@@ -343,6 +343,50 @@ const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) =
         }
     }
+    // Inject FTP/reserved instances that aren't already in the recommendation list.
+    // These instances may not be in the static catalog (e.g., ml.p6-b200.48xlarge)
+    // but are available via capacity reservation — always surface them.
+    const existingTypes = new Set(recommendations.map(r => r.instanceType));
+    if (reservations) {
+        for (const [instanceType, info] of reservations) {
+            if (!existingTypes.has(instanceType)) {
+                recommendations.push({
+                    instanceType,
+                    capacityType: 'reserved',
+                    reservationInfo: info,
+                    reservationType: 'training-plan',
+                    quotaStatus: 'available',
+                    gpuCount: null,
+                    totalVramGb: null,
+                    utilizationPercent: null,
+                    tensorParallelism: null,
+                    costTier: null,
+                    injectedFromReservation: true
+                });
+            }
+        }
+    }
+    if (ftps) {
+        for (const [instanceType, info] of ftps) {
+            if (!existingTypes.has(instanceType)) {
+                recommendations.push({
+                    instanceType,
+                    capacityType: 'ftp',
+                    ftpInfo: info,
+                    quotaStatus: 'available',
+                    gpuCount: null,
+                    totalVramGb: null,
+                    utilizationPercent: null,
+                    tensorParallelism: null,
+                    costTier: null,
+                    injectedFromFtp: true
+                });
+            }
+        }
+    }
     // Filter out zero-quota instances (but never filter reserved/FTP — you have the capacity)
     const filtered = recommendations.filter(r =>
         r.quotaStatus !== 'zero-quota' || r.capacityType === 'reserved' || r.capacityType === 'ftp'

package/servers/lib/catalogs/instances.json CHANGED Viewed

@@ -228,6 +228,33 @@
             "gpuMemoryGb": 24,
             "gpuType": "NVIDIA A10G",
             "costTier": "medium"
+        },
+        "ml.p6-b200.48xlarge": {
+            "category": "gpu",
+            "gpus": 8,
+            "vcpus": 192,
+            "memGb": 1536,
+            "accelerator": "8x B200 1440GB",
+            "cudaVersions": [
+                "12.4",
+                "12.6"
+            ],
+            "tags": [
+                "gpu",
+                "multi-gpu",
+                "b200",
+                "cuda-12",
+                "high-performance"
+            ],
+            "family": "p6",
+            "acceleratorType": "cuda",
+            "hardware": "NVIDIA B200",
+            "gpuArchitecture": "Blackwell",
+            "defaultCudaVersion": "12.6",
+            "notes": "8x NVIDIA B200 GPUs (1440GB total). Next-gen Blackwell architecture",
+            "gpuMemoryGb": 180,
+            "gpuType": "NVIDIA B200",
+            "costTier": "high"
         }
     },
     "recommendations": {

package/src/app.js CHANGED Viewed

@@ -377,6 +377,12 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
         ignorePatterns.push('**/do/lib/feedback.sh');
     }
+    // Exclude do/stage when model is already S3-sourced (nothing to stage)
+    const modelName = answers.modelName || answers.customModelName || '';
+    if (answers.modelSource === 's3' || modelName.startsWith('s3://')) {
+        ignorePatterns.push('**/do/stage');
+    }
     // Exclude do/test when hosted-model-endpoint is not selected
     const testTypes = answers.testTypes || [];
     if (!testTypes.includes('hosted-model-endpoint')) {
@@ -556,6 +562,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
             fs.writeFileSync(gitignorePath, gitignoreContent);
         }
     }
+    // Add .mlcc/ to .gitignore (staged-assets tracking — account-specific URIs)
+    {
+        const gitignorePath = path.join(destDir, '.gitignore');
+        const mlccIgnore = '# Staged assets tracking (account-specific, generated by do/stage)\n.mlcc/\n';
+        if (fs.existsSync(gitignorePath)) {
+            const existing = fs.readFileSync(gitignorePath, 'utf8');
+            if (!existing.includes('.mlcc/')) {
+                fs.appendFileSync(gitignorePath, `\n${mlccIgnore}`);
+            }
+        } else {
+            fs.writeFileSync(gitignorePath, mlccIgnore);
+        }
+    }
 }
 /**
@@ -818,7 +838,8 @@ function _setExecutablePermissions(destDir, answers = {}) {
         'do/add-ic',
         'do/adapter',
         'do/tune',
-        'do/train'
+        'do/train',
+        'do/stage'
     ];
     const shellScripts = architecture === 'marketplace' ? marketplaceScripts : defaultScripts;

package/src/lib/bootstrap-command-handler.js CHANGED Viewed

@@ -459,8 +459,22 @@ export default class BootstrapCommandHandler {
                     // --no-rollback prevents rollback on AlreadyExists errors for IAM roles
                     // that may pre-exist from a prior deployment or another region.
+                    // Check if benchmark bucket already exists (from a prior torn-down stack with RETAIN policy)
+                    let importBucketCtx = '';
+                    if (options.benchmarkInfra) {
+                        try {
+                            execSync(
+                                `aws s3api head-bucket --bucket mlcc-benchmark-results-${profileData.accountId}-${profileData.awsRegion}${profileData.awsProfile ? ` --profile ${profileData.awsProfile}` : ''} --region ${profileData.awsRegion}`,
+                                { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
+                            );
+                            importBucketCtx = ' -c importExistingBenchmarkBucket=true';
+                            console.log('  ℹ️  Benchmark results bucket already exists — importing into stack');
+                        } catch {
+                            // Bucket doesn't exist — will be created fresh
+                        }
+                    }
                     const cdkDeployCmd = options.benchmarkInfra
-                        ? 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true'
+                        ? `npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true${importBucketCtx}`
                         : 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
                     execSync(
                         cdkDeployCmd,
@@ -634,8 +648,22 @@ export default class BootstrapCommandHandler {
                     // --no-rollback prevents rollback on AlreadyExists errors for IAM roles
                     // that may pre-exist from a prior deployment or another region.
+                    // Check if benchmark bucket already exists (from a prior torn-down stack with RETAIN policy)
+                    let updateImportBucketCtx = '';
+                    if (options.benchmarkInfra || profileConfig.benchmarkInfraProvisioned) {
+                        try {
+                            execSync(
+                                `aws s3api head-bucket --bucket mlcc-benchmark-results-${profileConfig.accountId}-${profileConfig.awsRegion}${profileConfig.awsProfile ? ` --profile ${profileConfig.awsProfile}` : ''} --region ${profileConfig.awsRegion}`,
+                                { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
+                            );
+                            updateImportBucketCtx = ' -c importExistingBenchmarkBucket=true';
+                            console.log('  ℹ️  Benchmark results bucket already exists — importing into stack');
+                        } catch {
+                            // Bucket doesn't exist — will be created fresh
+                        }
+                    }
                     const updateCdkCmd = (options.benchmarkInfra || profileConfig.benchmarkInfraProvisioned)
-                        ? 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true'
+                        ? `npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true${updateImportBucketCtx}`
                         : 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
                     execSync(
                         updateCdkCmd,
@@ -989,7 +1017,8 @@ export default class BootstrapCommandHandler {
      * @returns {object} Parsed JSON output
      */
     _execAws(command, profile) {
-        const fullCommand = `aws ${command} --profile ${profile} --output json`;
+        const profileFlag = profile ? `--profile ${profile}` : '';
+        const fullCommand = `aws ${command} ${profileFlag} --output json`.replace(/\s+/g, ' ').trim();
         const output = execSync(fullCommand, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] });
         const trimmed = output.trim();
         if (!trimmed) {

package/src/lib/config-validator.js CHANGED Viewed

@@ -361,7 +361,7 @@ export default class ConfigValidator {
         case 'instanceType':
             if (value) {
-                const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
+                const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
                 if (!instancePattern.test(value)) {
                     throw new ValidationError(
                         `Invalid instance type format: ${value}. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)`,

package/src/lib/generated/cli-options.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-10T13:42:40.974Z
+// Generated: 2026-06-12T22:03:00.429Z
 /**
  * CLI option definitions derived from parameter-schema-v2.json.
@@ -84,7 +84,7 @@ export const cliOptions = [
     },
     {
         'flag': '--include-benchmark',
-        'description': 'Include SageMaker AI Benchmarking',
+        'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
         'defaultValue': false
     },
     {
@@ -244,6 +244,10 @@ export const cliOptions = [
         'flag': '--endpoint-volume-size <gb>',
         'description': 'ML storage volume size in GB'
     },
+    {
+        'flag': '--capacity-reservation-arn <arn>',
+        'description': 'Capacity reservation ARN (FTP or ODCR) for reserved instance deployment'
+    },
     {
         'flag': '--ic-cpu-count <n>',
         'description': 'vCPUs allocated to the inference component'
@@ -439,6 +443,7 @@ export const helpGroups = {
     '--endpoint-data-capture-percent': 'endpoint',
     '--endpoint-variant-name': 'endpoint',
     '--endpoint-volume-size': 'endpoint',
+    '--capacity-reservation-arn': 'endpoint',
     '--ic-cpu-count': 'ic',
     '--ic-model-weight': 'ic',
     '--async-s3-output-path': 'async',

package/src/lib/generated/parameter-matrix.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-parameter-matrix.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-10T13:42:41.083Z
+// Generated: 2026-06-12T22:03:00.552Z
 /**
  * Parameter matrix defining how each parameter is loaded from various sources.
@@ -148,7 +148,7 @@ export const parameterMatrix = {
         'configFile': true,
         'packageJson': false,
         'mcp': false,
-        'promptable': true,
+        'promptable': false,
         'required': false,
         'default': 10,
         'valueSpace': 'unbounded'
@@ -159,7 +159,7 @@ export const parameterMatrix = {
         'configFile': true,
         'packageJson': false,
         'mcp': false,
-        'promptable': true,
+        'promptable': false,
         'required': false,
         'default': 550,
         'valueSpace': 'unbounded'
@@ -170,7 +170,7 @@ export const parameterMatrix = {
         'configFile': true,
         'packageJson': false,
         'mcp': false,
-        'promptable': true,
+        'promptable': false,
         'required': false,
         'default': 150,
         'valueSpace': 'unbounded'
@@ -181,7 +181,7 @@ export const parameterMatrix = {
         'configFile': true,
         'packageJson': false,
         'mcp': false,
-        'promptable': true,
+        'promptable': false,
         'required': false,
         'default': true,
         'valueSpace': 'bounded'
@@ -358,6 +358,17 @@ export const parameterMatrix = {
         'valueSpace': 'unbounded',
         'schemaValidated': true
     },
+    'capacityReservationArn': {
+        'cliOption': 'capacity-reservation-arn',
+        'envVar': 'ML_CAPACITY_RESERVATION_ARN',
+        'configFile': true,
+        'packageJson': false,
+        'mcp': false,
+        'promptable': false,
+        'required': false,
+        'default': null,
+        'valueSpace': 'unbounded'
+    },
     'icCpuCount': {
         'cliOption': 'ic-cpu-count',
         'envVar': 'ML_IC_CPU_COUNT',

package/src/lib/generated/validation-rules.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-validator.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-10T13:42:41.011Z
+// Generated: 2026-06-12T22:03:00.468Z
 /**
  * Validation rules derived from parameter-schema-v2.json.
@@ -26,7 +26,7 @@ export const validationRules = {
         return null;
     },
     'instanceType': (value) => {
-        if (!new RegExp('^ml\\.[a-z0-9]+\\.[a-z0-9]+$').test(value)) return 'instanceType does not match required pattern';
+        if (!new RegExp('^ml\\.[a-z0-9-]+\\.[a-z0-9]+$').test(value)) return 'instanceType does not match required pattern';
         return null;
     },
     'icGpuCount': (value) => {
@@ -132,6 +132,10 @@ export const validationRules = {
         if (value > 16384) return `endpointVolumeSize must be <= 16384, got ${value}`;
         return null;
     },
+    'capacityReservationArn': (value) => {
+        if (!new RegExp('^arn:aws:sagemaker:').test(value)) return 'capacityReservationArn does not match required pattern';
+        return null;
+    },
     'icCpuCount': (value) => {
         if (value < 0.25) return `icCpuCount must be >= 0.25, got ${value}`;
         if (value > 768) return `icCpuCount must be <= 768, got ${value}`;
@@ -199,4 +203,4 @@ export const validationRules = {
     }
 };
-// 43 parameters have validation rules
+// 44 parameters have validation rules