npm - @aws/ml-container-creator - Versions diffs - 0.3.0 → 0.4.0 - Mend

@aws/ml-container-creator 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/config/bootstrap-stack.json +86 -7
package/config/defaults.json +1 -1
package/package.json +3 -1
package/servers/instance-sizer/index.js +36 -2
package/servers/instance-sizer/lib/instance-ranker.js +114 -10
package/servers/instance-sizer/lib/quota-resolver.js +368 -0
package/servers/instance-sizer/package.json +2 -0
package/servers/lib/catalogs/instances.json +527 -12
package/servers/lib/catalogs/model-servers.json +15 -15
package/servers/lib/catalogs/model-sizes.json +27 -0
package/servers/lib/catalogs/models.json +71 -0
package/servers/lib/schemas/image-catalog.schema.json +9 -1
package/src/app.js +77 -2
package/src/lib/bootstrap-command-handler.js +96 -3
package/src/lib/cli-handler.js +2 -2
package/src/lib/config-manager.js +78 -1
package/src/lib/prompt-runner.js +96 -9
package/src/lib/prompts.js +66 -4
package/src/lib/schema-sync.js +31 -0
package/src/lib/template-manager.js +49 -1
package/src/lib/validate-runner.js +125 -2
package/templates/Dockerfile +10 -2
package/templates/code/cuda_compat.sh +22 -0
package/templates/code/serve +3 -0
package/templates/code/start_server.sh +3 -0
package/templates/diffusors/Dockerfile +2 -1
package/templates/diffusors/serve +3 -0
package/templates/do/README.md +33 -0
package/templates/do/benchmark +646 -0
package/templates/do/clean +86 -0
package/templates/do/config +26 -3
package/templates/do/deploy +6 -1
package/templates/do/register +8 -1
package/templates/triton/Dockerfile +5 -0

package/src/lib/prompt-runner.js CHANGED Viewed

@@ -18,6 +18,7 @@ import {
     modelLoadStrategyPrompts,
     modelProfilePrompts,
     modulePrompts,
+    benchmarkPrompts,
     infraRegionAndTargetPrompts,
     infraInstancePrompts,
     infraAsyncPrompts,
@@ -252,6 +253,29 @@ export default class PromptRunner {
                 this._autoGpuCount = tpRec.gpuCount;
                 console.log(`   ✓ Auto-set tensor parallelism: TP=${tpRec.tensorParallelism} (${tpRec.gpuCount} GPUs)`);
             }
+            // Display capacity type confirmation for selected instance
+            // Requirements: 5.4
+            if (matchingRec && matchingRec.capacityType) {
+                if (matchingRec.capacityType === 'reserved') {
+                    const resType = matchingRec.reservationType === 'capacity-block' ? 'Capacity Block' : 'ODCR';
+                    const endInfo = matchingRec.reservationType === 'capacity-block' && matchingRec.reservationInfo?.endDate
+                        ? `, ends ${new Date(matchingRec.reservationInfo.endDate).toLocaleDateString()}`
+                        : '';
+                    console.log(`   ✓ Using reserved capacity — ${resType} (reservation ${matchingRec.reservationInfo?.reservationId || 'unknown'}${endInfo})`);
+                } else if (matchingRec.capacityType === 'ftp') {
+                    console.log(`   ✓ Using reserved capacity (plan ${matchingRec.ftpInfo?.planName || 'unknown'})`);
+                } else {
+                    const headroom = matchingRec.quotaHeadroom;
+                    console.log(`   ✓ Using on-demand capacity (quota headroom: ${headroom ?? 'unknown'})`);
+                }
+            }
+            // Extract reservation ARN from selected instance for deployment config
+            // Requirements: 2.3
+            if (matchingRec && matchingRec.capacityType === 'reserved' && matchingRec.reservationInfo?.reservationArn) {
+                this._selectedCapacityReservationArn = matchingRec.reservationInfo.reservationArn;
+            }
         }
         // 3c. Async-specific prompts (only when deploymentTarget === 'async-inference')
@@ -375,6 +399,21 @@ export default class PromptRunner {
             moduleAnswers.includeSampleModel = false;
         }
+        // Benchmark prompts — derive includeBenchmark from testTypes selection or CLI flag
+        // Requirements: 1.1, 1.2
+        let benchmarkAnswers = {};
+        if (frameworkAnswers.architecture === 'transformers' || frameworkAnswers.architecture === 'diffusors') {
+            const testTypes = moduleAnswers.testTypes || [];
+            const includeBenchmark = testTypes.includes('sagemaker-ai-automated-benchmarking') ||
+                explicitConfig.includeBenchmark === true ||
+                explicitConfig.includeBenchmark === 'true';
+            benchmarkAnswers.includeBenchmark = includeBenchmark;
+            if (includeBenchmark) {
+                const subAnswers = await this._runPhase(benchmarkPrompts, { ...frameworkAnswers, ...moduleAnswers, includeBenchmark }, explicitConfig, existingConfig);
+                benchmarkAnswers = { ...benchmarkAnswers, ...subAnswers };
+            }
+        }
         // Validate instance type against framework requirements (now that framework version is known)
         const finalInstanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
         if (finalInstanceType && frameworkVersionAnswers.frameworkVersion) {
@@ -416,6 +455,7 @@ export default class PromptRunner {
             ...hfTokenAnswers,
             ...ngcApiKeyAnswers,
             ...moduleAnswers,
+            ...benchmarkAnswers,
             ...projectAnswers,
             ...destinationAnswers,
             buildTimestamp
@@ -435,6 +475,12 @@ export default class PromptRunner {
             combinedAnswers.artifactUri = this._mcpArtifactUri;
         }
+        // Flow capacity reservation ARN from instance-sizer selection
+        // Requirements: 2.3
+        if (this._selectedCapacityReservationArn) {
+            combinedAnswers.capacityReservationArn = this._selectedCapacityReservationArn;
+        }
         // Validate: non-HF model sources require an artifact URI
         // Without it, the serve script can't download the model at runtime
         // Infer modelSource from model name prefix if not set by MCP
@@ -1036,13 +1082,53 @@ export default class PromptRunner {
                         : '';
                     console.log(`   ✓ ${choices.length} compatible instance(s) found${vramInfo}`);
-                    // Display compact recommendation table
-                    for (const rec of recommendations) {
-                        const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
-                        const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
-                        const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
-                        console.log(`     ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM  ${util.padStart(4)} util${tp}`);
+                    // Check if availability data is present (recommendations have capacityType)
+                    const hasAvailabilityData = recommendations.some(r => r.capacityType);
+                    if (hasAvailabilityData) {
+                        // Group by capacityType for display
+                        const reserved = recommendations.filter(r => r.capacityType === 'reserved' || r.capacityType === 'ftp');
+                        const onDemand = recommendations.filter(r => r.capacityType === 'on-demand');
+                        if (reserved.length > 0) {
+                            console.log('     ── Reserved Capacity ──');
+                            for (const rec of reserved) {
+                                const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
+                                const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
+                                const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
+                                const tag = rec.capacityType === 'reserved'
+                                    ? ` [CR] ${rec.reservationInfo?.planName || rec.reservationInfo?.reservationId || ''}`
+                                    : ` [FTP] ${rec.ftpInfo?.planName || ''}`;
+                                console.log(`     ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM  ${util.padStart(4)} util${tp}${tag}`);
+                            }
+                        }
+                        if (onDemand.length > 0) {
+                            console.log('     ── On-Demand ──');
+                            for (const rec of onDemand) {
+                                const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
+                                const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
+                                const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
+                                const deployed = rec.quotaDeployed;
+                                const quota = rec.quotaLimit;
+                                const tag = quota !== null && quota !== undefined ? ` [Q:${deployed ?? 0}/${quota}]` : '';
+                                console.log(`     ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM  ${util.padStart(4)} util${tp}${tag}`);
+                            }
+                        }
+                    } else {
+                        // Fallback: display compact recommendation table (no availability data)
+                        for (const rec of recommendations) {
+                            const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
+                            const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
+                            const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
+                            console.log(`     ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM  ${util.padStart(4)} util${tp}`);
+                        }
                     }
+                } else if (parsed.metadata?.allFilteredByQuota) {
+                    // All VRAM-compatible instances had zero quota
+                    console.log('   ⚠️ No quota available for compatible instances. Request a quota increase.');
+                    this._instanceSizerMetadata = parsed.metadata || null;
                 } else if (parsed.metadata?.warning) {
                     console.log(`   ⚠️  ${parsed.metadata.warning}`);
                 } else {
@@ -1972,9 +2058,10 @@ export default class PromptRunner {
         '11.4': 'al2-ami-sagemaker-inference-gpu-2-1',
         '11.8': 'al2-ami-sagemaker-inference-gpu-2-1',
         '12.1': 'al2-ami-sagemaker-inference-gpu-3-1',
-        '12.2': 'al2023-ami-sagemaker-inference-gpu-4-1',
-        '12.4': 'al2023-ami-sagemaker-inference-gpu-4-1',
-        '12.6': 'al2023-ami-sagemaker-inference-gpu-4-1'
+        '12.2': 'al2-ami-sagemaker-inference-gpu-3-1',
+        '12.4': 'al2-ami-sagemaker-inference-gpu-3-1',
+        '12.6': 'al2-ami-sagemaker-inference-gpu-3-1',
+        '13.0': 'al2023-ami-sagemaker-inference-gpu-4-1'
     };
     /**

package/src/lib/prompts.js CHANGED Viewed

@@ -583,7 +583,7 @@ const modulePrompts = [
         type: 'confirm',
         name: 'includeSampleModel',
         message: 'Include sample Abalone classifier?',
-        default: false,
+        default: true,
         when: (answers) => {
             const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
             const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
@@ -622,7 +622,10 @@ const modulePrompts = [
             // Transformers and Triton LLM backends only support hosted endpoint tests
             if (architecture === 'transformers') {
-                return ['hosted-model-endpoint'];
+                return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
+            }
+            if (architecture === 'diffusors') {
+                return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
             }
             if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
                 return ['hosted-model-endpoint'];
@@ -635,7 +638,10 @@ const modulePrompts = [
             const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
             if (architecture === 'transformers') {
-                return ['hosted-model-endpoint'];
+                return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
+            }
+            if (architecture === 'diffusors') {
+                return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
             }
             if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
                 return ['hosted-model-endpoint'];
@@ -700,7 +706,12 @@ const infraInstancePrompts = [
         when: answers => answers.deploymentTarget === 'realtime-inference' || answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform' || answers.deploymentTarget === 'hyperpod-eks',
         message: (answers) => {
             const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
+            // Skip table when MCP sizer already displayed annotated results
+            if (answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 0) {
+                return 'Select instance type:';
+            }
             const table = new Table({
                 head: [
                     chalk.cyan('Instance Type'),
@@ -1110,6 +1121,56 @@ const baseImagePrompts = [
     }
 ];
+/**
+ * Benchmark prompts for SageMaker AI Benchmarking (NVIDIA AIPerf)
+ * Sub-prompts shown when 'sagemaker-ai-automated-benchmarking' is selected in testTypes.
+ * Requirements: 2.1, 2.2, 2.3, 2.4, 2.5
+ */
+const benchmarkPrompts = [
+    {
+        type: 'number',
+        name: 'benchmarkConcurrency',
+        message: 'Concurrent requests for benchmark:',
+        default: 10,
+        when: (answers) => answers.includeBenchmark === true
+    },
+    {
+        type: 'number',
+        name: 'benchmarkInputTokensMean',
+        message: 'Mean input tokens per request:',
+        default: 550,
+        when: (answers) => answers.includeBenchmark === true
+    },
+    {
+        type: 'number',
+        name: 'benchmarkOutputTokensMean',
+        message: 'Mean output tokens per request:',
+        default: 150,
+        when: (answers) => answers.includeBenchmark === true
+    },
+    {
+        type: 'confirm',
+        name: 'benchmarkStreaming',
+        message: 'Enable streaming for benchmark?',
+        default: true,
+        when: (answers) => answers.includeBenchmark === true
+    },
+    {
+        type: 'input',
+        name: 'benchmarkRequestCount',
+        message: 'Total request count (leave empty for service default):',
+        default: '',
+        when: (answers) => answers.includeBenchmark === true
+    },
+    {
+        type: 'input',
+        name: 'benchmarkS3OutputPath',
+        message: 'Benchmark results S3 path (leave empty for auto-created bucket):',
+        default: '',
+        when: (answers) => answers.includeBenchmark === true
+    }
+];
 export {
     deploymentConfigPrompts,
     frameworkPrompts, // Deprecated: kept for backward compatibility
@@ -1123,6 +1184,7 @@ export {
     hfTokenPrompts,
     ngcApiKeyPrompts,
     modulePrompts,
+    benchmarkPrompts,
     infrastructurePrompts,
     infraRegionAndTargetPrompts,
     infraInstancePrompts,

package/src/lib/schema-sync.js CHANGED Viewed

@@ -188,6 +188,37 @@ export function loadServiceModel(serviceName, registryPath) {
     return readFileSync(modelPath, 'utf8');
 }
+/**
+ * Check whether the SageMaker service model includes the CreateAIBenchmarkJob operation shape.
+ * Used to determine if benchmark parameter validation can be performed.
+ *
+ * @param {string} [registryPath] - Override registry path
+ * @returns {{ available: boolean, reason?: string }}
+ */
+export function hasBenchmarkShape(registryPath) {
+    const regPath = registryPath || getRegistryPath();
+    const modelContent = loadServiceModel('sagemaker', regPath);
+    if (!modelContent) {
+        return { available: false, reason: 'SageMaker service model not found in registry' };
+    }
+    try {
+        const model = JSON.parse(modelContent);
+        const operations = model.operations || {};
+        const shapes = model.shapes || {};
+        // Check for the CreateAIBenchmarkJob operation or its input shape
+        if (operations.CreateAIBenchmarkJob || shapes.CreateAIBenchmarkJobRequest) {
+            return { available: true };
+        }
+        return { available: false, reason: 'service model does not include AI Benchmark operations' };
+    } catch {
+        return { available: false, reason: 'Failed to parse SageMaker service model' };
+    }
+}
 /**
  * Store a service model in the registry.
  * @param {string} serviceName - Service name (e.g., 'sagemaker')

package/src/lib/template-manager.js CHANGED Viewed

@@ -65,7 +65,7 @@ export default class TemplateManager {
             ],
             buildTargets: ['codebuild'],
             deploymentTargets: ['realtime-inference', 'async-inference', 'batch-transform', 'hyperpod-eks'],
-            testTypes: ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
+            testTypes: ['local-model-cli', 'local-model-server', 'hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'],
             awsRegions: [
                 'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2',
                 'eu-west-1', 'eu-west-2', 'eu-central-1', 'eu-north-1',
@@ -134,6 +134,9 @@ export default class TemplateManager {
         // Validate batch transform specific fields
         this._validateBatchTransformConfig();
+        // Validate benchmark specific fields
+        this._validateBenchmarkConfig();
         // Validate instance type format (ml.*.*) - only for realtime-inference
         if (this.answers.instanceType && this.answers.instanceType !== 'custom') {
@@ -297,6 +300,51 @@ export default class TemplateManager {
         }
     }
+    /**
+     * Validates benchmark configuration parameters
+     * @private
+     * @throws {Error} If benchmark configuration is invalid
+     */
+    _validateBenchmarkConfig() {
+        if (!this.answers.includeBenchmark) return;
+        // Gate to supported architectures
+        const dc = this.answers.deploymentConfig;
+        const arch = dc ? dc.split('-')[0] : this.answers.architecture;
+        if (arch !== 'transformers' && arch !== 'diffusors') {
+            throw new Error('⚠️  Benchmarking is only supported with transformers and diffusors architectures.');
+        }
+        // Gate to supported deployment targets
+        if (this.answers.deploymentTarget === 'hyperpod-eks') {
+            throw new Error('⚠️  Benchmarking is only supported with managed-inference, async-inference, and batch-transform deployment targets');
+        }
+        // Validate numeric parameters
+        if (this.answers.benchmarkConcurrency !== undefined) {
+            if (!Number.isInteger(this.answers.benchmarkConcurrency) || this.answers.benchmarkConcurrency < 1) {
+                throw new Error('⚠️  benchmarkConcurrency must be an integer >= 1');
+            }
+        }
+        if (this.answers.benchmarkInputTokensMean !== undefined) {
+            if (!Number.isInteger(this.answers.benchmarkInputTokensMean) || this.answers.benchmarkInputTokensMean < 1) {
+                throw new Error('⚠️  benchmarkInputTokensMean must be an integer >= 1');
+            }
+        }
+        if (this.answers.benchmarkOutputTokensMean !== undefined) {
+            if (!Number.isInteger(this.answers.benchmarkOutputTokensMean) || this.answers.benchmarkOutputTokensMean < 1) {
+                throw new Error('⚠️  benchmarkOutputTokensMean must be an integer >= 1');
+            }
+        }
+        // Validate S3 path format
+        if (this.answers.benchmarkS3OutputPath && this.answers.benchmarkS3OutputPath.trim() !== '') {
+            if (!this.answers.benchmarkS3OutputPath.startsWith('s3://')) {
+                throw new Error('⚠️  benchmarkS3OutputPath must start with "s3://". Example: s3://my-bucket/benchmark-results/');
+            }
+        }
+    }
     /**
      * Validates GPU instance type requirement for GPU-requiring backends.
      * Called when deploymentConfig is present.

package/src/lib/validate-runner.js CHANGED Viewed

@@ -21,7 +21,7 @@ import SchemaValidationEngine from './schema-validation-engine.js';
 import ServiceModelParser from './service-model-parser.js';
 import CrossCuttingChecker from './cross-cutting-checker.js';
 import HuggingFaceClient from './huggingface-client.js';
-import { getRegistryPath, loadManifest } from './schema-sync.js';
+import { getRegistryPath, loadManifest, hasBenchmarkShape } from './schema-sync.js';
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
@@ -52,6 +52,115 @@ export function parseDoConfig(configPath) {
     return config;
 }
+/**
+ * Validate benchmark parameters against service model constraints.
+ * Called when the CreateAIBenchmarkJob shape is available in the synced schema.
+ *
+ * Validates:
+ * - Concurrency: integer, min 1
+ * - S3OutputLocation: string, starts with s3://
+ * - AIBenchmarkJobName: pattern ^[a-zA-Z0-9](-*[a-zA-Z0-9])*, max 63 chars
+ *
+ * Requirements: 8.1, 8.2, 8.3
+ *
+ * @param {Object} config - Parsed do/config values
+ * @returns {Array<Object>} Array of validation findings
+ */
+export function validateBenchmarkParams(config) {
+    const findings = [];
+    // Validate Concurrency (integer, min 1)
+    if (config.BENCHMARK_CONCURRENCY !== null && config.BENCHMARK_CONCURRENCY !== undefined && config.BENCHMARK_CONCURRENCY !== '') {
+        const concurrency = Number(config.BENCHMARK_CONCURRENCY);
+        if (!Number.isInteger(concurrency) || concurrency < 1) {
+            findings.push({
+                severity: 'error',
+                operation: 'CreateAIBenchmarkJob',
+                fieldPath: 'Concurrency',
+                constraint: 'integer >= 1',
+                invalidValue: config.BENCHMARK_CONCURRENCY,
+                remediationHint: 'BENCHMARK_CONCURRENCY must be a positive integer (>= 1)'
+            });
+        }
+    }
+    // Validate S3OutputLocation (string, starts with s3://)
+    if (config.BENCHMARK_S3_OUTPUT_PATH !== null && config.BENCHMARK_S3_OUTPUT_PATH !== undefined && config.BENCHMARK_S3_OUTPUT_PATH !== '') {
+        const s3Path = config.BENCHMARK_S3_OUTPUT_PATH;
+        // Skip dynamic shell expressions (e.g., s3://...$(aws ...))
+        if (!s3Path.includes('$(') && !s3Path.startsWith('s3://')) {
+            findings.push({
+                severity: 'error',
+                operation: 'CreateAIBenchmarkJob',
+                fieldPath: 'OutputConfig.S3OutputLocation',
+                constraint: 'must start with s3://',
+                invalidValue: s3Path,
+                remediationHint: 'BENCHMARK_S3_OUTPUT_PATH must start with "s3://". Example: s3://my-bucket/benchmark-results/'
+            });
+        }
+    }
+    // Validate AIBenchmarkJobName pattern (^[a-zA-Z0-9](-*[a-zA-Z0-9])*, max 63 chars)
+    if (config.BENCHMARK_JOB_NAME !== null && config.BENCHMARK_JOB_NAME !== undefined && config.BENCHMARK_JOB_NAME !== '') {
+        const jobName = config.BENCHMARK_JOB_NAME;
+        // Skip dynamic shell expressions
+        if (!jobName.includes('$(') && !jobName.includes('${')) {
+            const namePattern = /^[a-zA-Z0-9](-*[a-zA-Z0-9])*$/;
+            if (jobName.length > 63) {
+                findings.push({
+                    severity: 'error',
+                    operation: 'CreateAIBenchmarkJob',
+                    fieldPath: 'AIBenchmarkJobName',
+                    constraint: 'max 63 characters',
+                    invalidValue: jobName,
+                    remediationHint: 'AIBenchmarkJobName must be at most 63 characters'
+                });
+            } else if (!namePattern.test(jobName)) {
+                findings.push({
+                    severity: 'error',
+                    operation: 'CreateAIBenchmarkJob',
+                    fieldPath: 'AIBenchmarkJobName',
+                    constraint: 'pattern: ^[a-zA-Z0-9](-*[a-zA-Z0-9])*',
+                    invalidValue: jobName,
+                    remediationHint: 'AIBenchmarkJobName must start with alphanumeric and contain only alphanumeric characters and hyphens'
+                });
+            }
+        }
+    }
+    // Validate input tokens mean (integer, min 1)
+    if (config.BENCHMARK_INPUT_TOKENS_MEAN !== null && config.BENCHMARK_INPUT_TOKENS_MEAN !== undefined && config.BENCHMARK_INPUT_TOKENS_MEAN !== '') {
+        const inputTokens = Number(config.BENCHMARK_INPUT_TOKENS_MEAN);
+        if (!Number.isInteger(inputTokens) || inputTokens < 1) {
+            findings.push({
+                severity: 'error',
+                operation: 'CreateAIWorkloadConfig',
+                fieldPath: 'WorkloadSpec.parameters.prompt_input_tokens_mean',
+                constraint: 'integer >= 1',
+                invalidValue: config.BENCHMARK_INPUT_TOKENS_MEAN,
+                remediationHint: 'BENCHMARK_INPUT_TOKENS_MEAN must be a positive integer (>= 1)'
+            });
+        }
+    }
+    // Validate output tokens mean (integer, min 1)
+    if (config.BENCHMARK_OUTPUT_TOKENS_MEAN !== null && config.BENCHMARK_OUTPUT_TOKENS_MEAN !== undefined && config.BENCHMARK_OUTPUT_TOKENS_MEAN !== '') {
+        const outputTokens = Number(config.BENCHMARK_OUTPUT_TOKENS_MEAN);
+        if (!Number.isInteger(outputTokens) || outputTokens < 1) {
+            findings.push({
+                severity: 'error',
+                operation: 'CreateAIWorkloadConfig',
+                fieldPath: 'WorkloadSpec.parameters.output_tokens_mean',
+                constraint: 'integer >= 1',
+                invalidValue: config.BENCHMARK_OUTPUT_TOKENS_MEAN,
+                remediationHint: 'BENCHMARK_OUTPUT_TOKENS_MEAN must be a positive integer (>= 1)'
+            });
+        }
+    }
+    return findings;
+}
 /**
  * Run the full validation pipeline.
  *
@@ -171,6 +280,20 @@ export async function run(options = {}) {
         }
     }
+    // Run benchmark parameter validation (Requirements 8.1, 8.2, 8.3)
+    if (config.BENCHMARK_CONCURRENCY || config.BENCHMARK_INPUT_TOKENS_MEAN ||
+        config.BENCHMARK_OUTPUT_TOKENS_MEAN || config.BENCHMARK_S3_OUTPUT_PATH) {
+        const benchmarkCheck = hasBenchmarkShape(registryPath);
+        if (benchmarkCheck.available) {
+            const benchmarkFindings = validateBenchmarkParams(config);
+            for (const finding of benchmarkFindings) {
+                report.addFinding(finding);
+            }
+        } else {
+            console.log('⚠️  Benchmark validation skipped: service model does not include AI Benchmark operations. Run `bootstrap sync-schemas` to update.');
+        }
+    }
     const summary = report.getSummary();
     // Load manifest for version info
@@ -213,4 +336,4 @@ export async function run(options = {}) {
     return exitCode;
 }
-export default { run, parseDoConfig };
+export default { run, parseDoConfig, validateBenchmarkParams };

package/templates/Dockerfile CHANGED Viewed

@@ -12,6 +12,9 @@
 <% if (framework !== 'transformers') { %>
 FROM <%= baseImage || 'public.ecr.aws/docker/library/python:3.12-slim' %>
+# Ensure Python output is unbuffered so SageMaker can capture logs in CloudWatch
+ENV PYTHONUNBUFFERED=1
 # Set a docker label to name this project, postpended with the build time
 LABEL project.name="<%= projectName %>-<%= buildTimestamp %>" \
       project.base-name="<%= projectName %>" \
@@ -143,6 +146,9 @@ ARG BASE_IMAGE=<%= baseImage || 'deepjavalibrary/djl-serving:0.36.0-pytorch-gpu'
 FROM ${BASE_IMAGE}
+# Ensure Python output is unbuffered so SageMaker can capture logs in CloudWatch
+ENV PYTHONUNBUFFERED=1
 <% if (comments && comments.chatTemplate) { %>
 <%= comments.chatTemplate %>
 <% } %>
@@ -271,8 +277,9 @@ COPY code/serve /usr/bin/serve_trtllm
 RUN chmod +x /usr/bin/serve_trtllm
 # Copy startup script
+COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
 COPY code/start_server.sh /usr/bin/start_server.sh
-RUN chmod +x /usr/bin/start_server.sh
+RUN chmod +x /usr/bin/start_server.sh /usr/bin/cuda_compat.sh
 ENTRYPOINT [ "/usr/bin/start_server.sh" ]
 <% } else if (modelServer === 'lmi' || modelServer === 'djl') { %>
@@ -287,8 +294,9 @@ COPY code/serving.properties /opt/ml/model/serving.properties
 # LMI/DJL containers use their own entrypoint
 # The container will automatically start DJL Serving with the configuration
 <% } else { %>
+COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
 COPY code/serve /usr/bin/serve
-RUN chmod 777 /usr/bin/serve
+RUN chmod 777 /usr/bin/serve /usr/bin/cuda_compat.sh
 <% if (comments && comments.troubleshooting) { %>
 <%= comments.troubleshooting %>

package/templates/code/cuda_compat.sh ADDED Viewed

@@ -0,0 +1,22 @@
+#!/bin/bash
+# CUDA Compatibility Setup
+# Required for SageMaker inference AMIs using NVIDIA Container Toolkit 1.17.4+
+# (al2-ami-sagemaker-inference-gpu-2-1, al2-ami-sagemaker-inference-gpu-3-1,
+#  al2023-ami-sagemaker-inference-gpu-4-1)
+#
+# These AMIs no longer auto-mount CUDA compat libraries. This script detects
+# whether the host NVIDIA driver is older than what the container's CUDA toolkit
+# requires, and adds the compat libraries to LD_LIBRARY_PATH if needed.
+_verlt() {
+    [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
+}
+if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
+    CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-)
+    NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
+    if [ -n "$NVIDIA_DRIVER_VERSION" ] && _verlt "$NVIDIA_DRIVER_VERSION" "$CUDA_COMPAT_MAX_DRIVER_VERSION"; then
+        echo "CUDA compat: driver ${NVIDIA_DRIVER_VERSION} < ${CUDA_COMPAT_MAX_DRIVER_VERSION}, adding compat libs"
+        export LD_LIBRARY_PATH=/usr/local/cuda/compat:${LD_LIBRARY_PATH:-}
+    fi
+fi

package/templates/code/serve CHANGED Viewed

@@ -2,6 +2,9 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
+# CUDA compatibility setup (required for newer SageMaker inference AMIs)
+source /usr/bin/cuda_compat.sh 2>/dev/null || true
 <% if (modelServer === 'vllm') { %>
 echo "Starting vLLM server"
 <% } else if (modelServer === 'sglang') { %>

package/templates/code/start_server.sh CHANGED Viewed

@@ -2,6 +2,9 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
+# CUDA compatibility setup (required for newer SageMaker inference AMIs)
+source /usr/bin/cuda_compat.sh 2>/dev/null || true
 set -e
 echo "Starting TensorRT-LLM server on port 8081..."

package/templates/diffusors/Dockerfile CHANGED Viewed

@@ -59,8 +59,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends nginx \
 COPY nginx-diffusors.conf /etc/nginx/nginx.conf
 # Copy serve entrypoint and startup scripts
+COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
 COPY code/serve /usr/bin/serve
-RUN chmod 777 /usr/bin/serve
+RUN chmod 777 /usr/bin/serve /usr/bin/cuda_compat.sh
 COPY code/start_server.sh /usr/bin/start_server.sh
 RUN chmod +x /usr/bin/start_server.sh

package/templates/diffusors/serve CHANGED Viewed

@@ -2,6 +2,9 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
+# CUDA compatibility setup (required for newer SageMaker inference AMIs)
+source /usr/bin/cuda_compat.sh 2>/dev/null || true
 echo "Starting vLLM-Omni server (diffusion model serving)"
 # Resolve model URI prefixes that engines cannot handle natively.