npm - @aws/ml-container-creator - Versions diffs - 0.2.6 → 0.4.0 - Mend

@aws/ml-container-creator 0.2.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/bin/cli.js +38 -2
package/config/bootstrap-stack.json +94 -1
package/config/defaults.json +1 -1
package/infra/ci-harness/package-lock.json +22 -9
package/package.json +3 -1
package/servers/instance-sizer/index.js +45 -8
package/servers/instance-sizer/lib/instance-ranker.js +140 -11
package/servers/instance-sizer/lib/model-resolver.js +10 -6
package/servers/instance-sizer/lib/quota-resolver.js +368 -0
package/servers/instance-sizer/package.json +2 -0
package/servers/lib/catalogs/instances.json +527 -12
package/servers/lib/catalogs/model-servers.json +298 -20
package/servers/lib/catalogs/model-sizes.json +27 -0
package/servers/lib/catalogs/models.json +101 -0
package/servers/lib/schemas/image-catalog.schema.json +15 -1
package/servers/model-picker/index.js +2 -1
package/src/app.js +96 -2
package/src/lib/architecture-sync.js +171 -0
package/src/lib/arn-detection.js +22 -0
package/src/lib/bootstrap-command-handler.js +178 -3
package/src/lib/cli-handler.js +2 -2
package/src/lib/config-manager.js +121 -1
package/src/lib/cross-cutting-checker.js +119 -0
package/src/lib/deployment-entry-schema.js +1 -2
package/src/lib/prompt-runner.js +514 -20
package/src/lib/prompts.js +67 -5
package/src/lib/registry-command-handler.js +236 -0
package/src/lib/schema-sync.js +31 -0
package/src/lib/secret-classification.js +56 -0
package/src/lib/secrets-command-handler.js +550 -0
package/src/lib/template-manager.js +49 -1
package/src/lib/validate-runner.js +174 -2
package/src/lib/validation-report.js +8 -1
package/src/prompt-adapter.js +3 -2
package/templates/Dockerfile +10 -2
package/templates/code/cuda_compat.sh +22 -0
package/templates/code/serve +3 -0
package/templates/code/start_server.sh +3 -0
package/templates/diffusors/Dockerfile +2 -1
package/templates/diffusors/serve +3 -0
package/templates/do/README.md +33 -0
package/templates/do/benchmark +646 -0
package/templates/do/build +22 -0
package/templates/do/clean +86 -0
package/templates/do/config +41 -6
package/templates/do/deploy +66 -6
package/templates/do/logs +18 -3
package/templates/do/register +8 -1
package/templates/do/run +10 -0
package/templates/triton/Dockerfile +5 -0

package/src/lib/validate-runner.js CHANGED Viewed

@@ -15,10 +15,16 @@
 import { existsSync, readFileSync, readdirSync } from 'node:fs';
 import path from 'node:path';
+import { fileURLToPath } from 'node:url';
 import PayloadBuilder from './payload-builder.js';
 import SchemaValidationEngine from './schema-validation-engine.js';
 import ServiceModelParser from './service-model-parser.js';
-import { getRegistryPath, loadManifest } from './schema-sync.js';
+import CrossCuttingChecker from './cross-cutting-checker.js';
+import HuggingFaceClient from './huggingface-client.js';
+import { getRegistryPath, loadManifest, hasBenchmarkShape } from './schema-sync.js';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
 /**
  * Parse a do/config shell file into a key-value object.
@@ -46,6 +52,115 @@ export function parseDoConfig(configPath) {
     return config;
 }
+/**
+ * Validate benchmark parameters against service model constraints.
+ * Called when the CreateAIBenchmarkJob shape is available in the synced schema.
+ *
+ * Validates:
+ * - Concurrency: integer, min 1
+ * - S3OutputLocation: string, starts with s3://
+ * - AIBenchmarkJobName: pattern ^[a-zA-Z0-9](-*[a-zA-Z0-9])*, max 63 chars
+ *
+ * Requirements: 8.1, 8.2, 8.3
+ *
+ * @param {Object} config - Parsed do/config values
+ * @returns {Array<Object>} Array of validation findings
+ */
+export function validateBenchmarkParams(config) {
+    const findings = [];
+    // Validate Concurrency (integer, min 1)
+    if (config.BENCHMARK_CONCURRENCY !== null && config.BENCHMARK_CONCURRENCY !== undefined && config.BENCHMARK_CONCURRENCY !== '') {
+        const concurrency = Number(config.BENCHMARK_CONCURRENCY);
+        if (!Number.isInteger(concurrency) || concurrency < 1) {
+            findings.push({
+                severity: 'error',
+                operation: 'CreateAIBenchmarkJob',
+                fieldPath: 'Concurrency',
+                constraint: 'integer >= 1',
+                invalidValue: config.BENCHMARK_CONCURRENCY,
+                remediationHint: 'BENCHMARK_CONCURRENCY must be a positive integer (>= 1)'
+            });
+        }
+    }
+    // Validate S3OutputLocation (string, starts with s3://)
+    if (config.BENCHMARK_S3_OUTPUT_PATH !== null && config.BENCHMARK_S3_OUTPUT_PATH !== undefined && config.BENCHMARK_S3_OUTPUT_PATH !== '') {
+        const s3Path = config.BENCHMARK_S3_OUTPUT_PATH;
+        // Skip dynamic shell expressions (e.g., s3://...$(aws ...))
+        if (!s3Path.includes('$(') && !s3Path.startsWith('s3://')) {
+            findings.push({
+                severity: 'error',
+                operation: 'CreateAIBenchmarkJob',
+                fieldPath: 'OutputConfig.S3OutputLocation',
+                constraint: 'must start with s3://',
+                invalidValue: s3Path,
+                remediationHint: 'BENCHMARK_S3_OUTPUT_PATH must start with "s3://". Example: s3://my-bucket/benchmark-results/'
+            });
+        }
+    }
+    // Validate AIBenchmarkJobName pattern (^[a-zA-Z0-9](-*[a-zA-Z0-9])*, max 63 chars)
+    if (config.BENCHMARK_JOB_NAME !== null && config.BENCHMARK_JOB_NAME !== undefined && config.BENCHMARK_JOB_NAME !== '') {
+        const jobName = config.BENCHMARK_JOB_NAME;
+        // Skip dynamic shell expressions
+        if (!jobName.includes('$(') && !jobName.includes('${')) {
+            const namePattern = /^[a-zA-Z0-9](-*[a-zA-Z0-9])*$/;
+            if (jobName.length > 63) {
+                findings.push({
+                    severity: 'error',
+                    operation: 'CreateAIBenchmarkJob',
+                    fieldPath: 'AIBenchmarkJobName',
+                    constraint: 'max 63 characters',
+                    invalidValue: jobName,
+                    remediationHint: 'AIBenchmarkJobName must be at most 63 characters'
+                });
+            } else if (!namePattern.test(jobName)) {
+                findings.push({
+                    severity: 'error',
+                    operation: 'CreateAIBenchmarkJob',
+                    fieldPath: 'AIBenchmarkJobName',
+                    constraint: 'pattern: ^[a-zA-Z0-9](-*[a-zA-Z0-9])*',
+                    invalidValue: jobName,
+                    remediationHint: 'AIBenchmarkJobName must start with alphanumeric and contain only alphanumeric characters and hyphens'
+                });
+            }
+        }
+    }
+    // Validate input tokens mean (integer, min 1)
+    if (config.BENCHMARK_INPUT_TOKENS_MEAN !== null && config.BENCHMARK_INPUT_TOKENS_MEAN !== undefined && config.BENCHMARK_INPUT_TOKENS_MEAN !== '') {
+        const inputTokens = Number(config.BENCHMARK_INPUT_TOKENS_MEAN);
+        if (!Number.isInteger(inputTokens) || inputTokens < 1) {
+            findings.push({
+                severity: 'error',
+                operation: 'CreateAIWorkloadConfig',
+                fieldPath: 'WorkloadSpec.parameters.prompt_input_tokens_mean',
+                constraint: 'integer >= 1',
+                invalidValue: config.BENCHMARK_INPUT_TOKENS_MEAN,
+                remediationHint: 'BENCHMARK_INPUT_TOKENS_MEAN must be a positive integer (>= 1)'
+            });
+        }
+    }
+    // Validate output tokens mean (integer, min 1)
+    if (config.BENCHMARK_OUTPUT_TOKENS_MEAN !== null && config.BENCHMARK_OUTPUT_TOKENS_MEAN !== undefined && config.BENCHMARK_OUTPUT_TOKENS_MEAN !== '') {
+        const outputTokens = Number(config.BENCHMARK_OUTPUT_TOKENS_MEAN);
+        if (!Number.isInteger(outputTokens) || outputTokens < 1) {
+            findings.push({
+                severity: 'error',
+                operation: 'CreateAIWorkloadConfig',
+                fieldPath: 'WorkloadSpec.parameters.output_tokens_mean',
+                constraint: 'integer >= 1',
+                invalidValue: config.BENCHMARK_OUTPUT_TOKENS_MEAN,
+                remediationHint: 'BENCHMARK_OUTPUT_TOKENS_MEAN must be a positive integer (>= 1)'
+            });
+        }
+    }
+    return findings;
+}
 /**
  * Run the full validation pipeline.
  *
@@ -122,6 +237,63 @@ export async function run(options = {}) {
     });
     const report = await engine.validate(context);
+    // Run model architecture compatibility check (Requirement 5.1-5.2)
+    if (config.MODEL_NAME) {
+        try {
+            const catalogPath = path.resolve(__dirname, '../../servers/lib/catalogs/model-servers.json');
+            if (existsSync(catalogPath)) {
+                const modelServersCatalog = JSON.parse(readFileSync(catalogPath, 'utf8'));
+                // Fetch model's config.json from HuggingFace to get model_type
+                const hfClient = new HuggingFaceClient({ timeout: 10000 });
+                const modelConfig = await hfClient.fetchModelConfig(config.MODEL_NAME);
+                const modelType = modelConfig?.model_type || null;
+                if (modelType) {
+                    // Extract baseImageVersion from BASE_IMAGE (e.g., "vllm/vllm-openai:v0.10.1" → "v0.10.1")
+                    const baseImage = config.BASE_IMAGE || '';
+                    const baseImageVersion = baseImage.includes(':') ? baseImage.split(':').pop() : '';
+                    // Strip leading 'v' to match catalog's framework_version format (e.g., "v0.10.1" → "0.10.1")
+                    const frameworkVersion = baseImageVersion.replace(/^v/, '');
+                    const modelServer = config.MODEL_SERVER || '';
+                    // Build context fields for the architecture checker
+                    const archContext = {
+                        config: {
+                            modelType,
+                            modelServer,
+                            baseImageVersion: frameworkVersion
+                        }
+                    };
+                    const checker = new CrossCuttingChecker();
+                    const archFindings = checker.checkModelArchitectureCompatibility(archContext, modelServersCatalog);
+                    for (const finding of archFindings) {
+                        report.addFinding(finding);
+                    }
+                }
+            }
+        } catch {
+            // Graceful degradation: if architecture check fails, continue without it
+        }
+    }
+    // Run benchmark parameter validation (Requirements 8.1, 8.2, 8.3)
+    if (config.BENCHMARK_CONCURRENCY || config.BENCHMARK_INPUT_TOKENS_MEAN ||
+        config.BENCHMARK_OUTPUT_TOKENS_MEAN || config.BENCHMARK_S3_OUTPUT_PATH) {
+        const benchmarkCheck = hasBenchmarkShape(registryPath);
+        if (benchmarkCheck.available) {
+            const benchmarkFindings = validateBenchmarkParams(config);
+            for (const finding of benchmarkFindings) {
+                report.addFinding(finding);
+            }
+        } else {
+            console.log('⚠️  Benchmark validation skipped: service model does not include AI Benchmark operations. Run `bootstrap sync-schemas` to update.');
+        }
+    }
     const summary = report.getSummary();
     // Load manifest for version info
@@ -164,4 +336,4 @@ export async function run(options = {}) {
     return exitCode;
 }
-export default { run, parseDoConfig };
+export default { run, parseDoConfig, validateBenchmarkParams };

package/src/lib/validation-report.js CHANGED Viewed

@@ -22,7 +22,14 @@ export default class ValidationReport {
         const source = finding.source || '';
         if (source === 'cross-cutting') {
-            this.crossCuttingErrors.push(finding);
+            // Cross-cutting findings with medium/low confidence are advisory, not errors
+            if (finding.confidence === 'medium' || finding.confidence === 'low') {
+                this.advisoryFindings.push(finding);
+            } else if (finding.severity === 'warning') {
+                this.warnings.push(finding);
+            } else {
+                this.crossCuttingErrors.push(finding);
+            }
         } else if (source === 'smart-mode' || source.startsWith('smart:')) {
             // Smart-mode findings are advisory UNLESS confidence is definitive AND severity is error
             if (finding.confidence === 'definitive' && finding.severity === 'error') {

package/src/prompt-adapter.js CHANGED Viewed

@@ -1,12 +1,12 @@
 // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 // SPDX-License-Identifier: Apache-2.0
-import { select, input, confirm, checkbox, number, Separator } from '@inquirer/prompts';
+import { select, input, confirm, checkbox, number, password, Separator } from '@inquirer/prompts';
 /**
  * Maps Yeoman prompt type names to @inquirer/prompts runner functions.
  */
-const runners = { list: select, select, input, confirm, checkbox, number };
+const runners = { list: select, select, input, confirm, checkbox, number, password };
 /**
  * Runs a sequence of Yeoman-style prompt definitions using @inquirer/prompts.
@@ -55,6 +55,7 @@ export async function runPrompts(prompts, previousAnswers = {}, options = {}) {
         if (mappedChoices !== undefined) config.choices = mappedChoices;
         if (defaultVal !== undefined) config.default = defaultVal;
         if (prompt.validate) config.validate = prompt.validate;
+        if (prompt.mask !== undefined) config.mask = prompt.mask;
         answers[prompt.name] = await runner(config);
     }

package/templates/Dockerfile CHANGED Viewed

@@ -12,6 +12,9 @@
 <% if (framework !== 'transformers') { %>
 FROM <%= baseImage || 'public.ecr.aws/docker/library/python:3.12-slim' %>
+# Ensure Python output is unbuffered so SageMaker can capture logs in CloudWatch
+ENV PYTHONUNBUFFERED=1
 # Set a docker label to name this project, postpended with the build time
 LABEL project.name="<%= projectName %>-<%= buildTimestamp %>" \
       project.base-name="<%= projectName %>" \
@@ -143,6 +146,9 @@ ARG BASE_IMAGE=<%= baseImage || 'deepjavalibrary/djl-serving:0.36.0-pytorch-gpu'
 FROM ${BASE_IMAGE}
+# Ensure Python output is unbuffered so SageMaker can capture logs in CloudWatch
+ENV PYTHONUNBUFFERED=1
 <% if (comments && comments.chatTemplate) { %>
 <%= comments.chatTemplate %>
 <% } %>
@@ -271,8 +277,9 @@ COPY code/serve /usr/bin/serve_trtllm
 RUN chmod +x /usr/bin/serve_trtllm
 # Copy startup script
+COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
 COPY code/start_server.sh /usr/bin/start_server.sh
-RUN chmod +x /usr/bin/start_server.sh
+RUN chmod +x /usr/bin/start_server.sh /usr/bin/cuda_compat.sh
 ENTRYPOINT [ "/usr/bin/start_server.sh" ]
 <% } else if (modelServer === 'lmi' || modelServer === 'djl') { %>
@@ -287,8 +294,9 @@ COPY code/serving.properties /opt/ml/model/serving.properties
 # LMI/DJL containers use their own entrypoint
 # The container will automatically start DJL Serving with the configuration
 <% } else { %>
+COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
 COPY code/serve /usr/bin/serve
-RUN chmod 777 /usr/bin/serve
+RUN chmod 777 /usr/bin/serve /usr/bin/cuda_compat.sh
 <% if (comments && comments.troubleshooting) { %>
 <%= comments.troubleshooting %>

package/templates/code/cuda_compat.sh ADDED Viewed

@@ -0,0 +1,22 @@
+#!/bin/bash
+# CUDA Compatibility Setup
+# Required for SageMaker inference AMIs using NVIDIA Container Toolkit 1.17.4+
+# (al2-ami-sagemaker-inference-gpu-2-1, al2-ami-sagemaker-inference-gpu-3-1,
+#  al2023-ami-sagemaker-inference-gpu-4-1)
+#
+# These AMIs no longer auto-mount CUDA compat libraries. This script detects
+# whether the host NVIDIA driver is older than what the container's CUDA toolkit
+# requires, and adds the compat libraries to LD_LIBRARY_PATH if needed.
+_verlt() {
+    [ "$1" = "$2" ] && return 1 || [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
+}
+if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
+    CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d'.' -f 3-)
+    NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
+    if [ -n "$NVIDIA_DRIVER_VERSION" ] && _verlt "$NVIDIA_DRIVER_VERSION" "$CUDA_COMPAT_MAX_DRIVER_VERSION"; then
+        echo "CUDA compat: driver ${NVIDIA_DRIVER_VERSION} < ${CUDA_COMPAT_MAX_DRIVER_VERSION}, adding compat libs"
+        export LD_LIBRARY_PATH=/usr/local/cuda/compat:${LD_LIBRARY_PATH:-}
+    fi
+fi

package/templates/code/serve CHANGED Viewed

@@ -2,6 +2,9 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
+# CUDA compatibility setup (required for newer SageMaker inference AMIs)
+source /usr/bin/cuda_compat.sh 2>/dev/null || true
 <% if (modelServer === 'vllm') { %>
 echo "Starting vLLM server"
 <% } else if (modelServer === 'sglang') { %>

package/templates/code/start_server.sh CHANGED Viewed

@@ -2,6 +2,9 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
+# CUDA compatibility setup (required for newer SageMaker inference AMIs)
+source /usr/bin/cuda_compat.sh 2>/dev/null || true
 set -e
 echo "Starting TensorRT-LLM server on port 8081..."

package/templates/diffusors/Dockerfile CHANGED Viewed

@@ -59,8 +59,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends nginx \
 COPY nginx-diffusors.conf /etc/nginx/nginx.conf
 # Copy serve entrypoint and startup scripts
+COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
 COPY code/serve /usr/bin/serve
-RUN chmod 777 /usr/bin/serve
+RUN chmod 777 /usr/bin/serve /usr/bin/cuda_compat.sh
 COPY code/start_server.sh /usr/bin/start_server.sh
 RUN chmod +x /usr/bin/start_server.sh

package/templates/diffusors/serve CHANGED Viewed

@@ -2,6 +2,9 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
+# CUDA compatibility setup (required for newer SageMaker inference AMIs)
+source /usr/bin/cuda_compat.sh 2>/dev/null || true
 echo "Starting vLLM-Omni server (diffusion model serving)"
 # Resolve model URI prefixes that engines cannot handle natively.

package/templates/do/README.md CHANGED Viewed

@@ -262,6 +262,39 @@ Clean everything:
 ---
+<% if (typeof includeBenchmark !== 'undefined' && includeBenchmark) { %>
+### `./do/benchmark`
+Run SageMaker AI Benchmark against deployed endpoint.
+**What it does:**
+- Verifies endpoint is InService
+- Ensures S3 output bucket exists
+- Creates AI workload configuration
+- Creates and monitors AI benchmark job
+- Displays performance results (throughput, latency P50/P90/P99, TTFT, ITL)
+**Prerequisites:**
+- Endpoint deployed and InService (`./do/deploy`)
+- AWS credentials configured
+**Usage:**
+```bash
+./do/benchmark
+```
+**Clean up benchmark resources:**
+```bash
+./do/benchmark --clean
+```
+**Output:**
+- Benchmark results summary table
+- Detailed results in S3
+---
+<% } %>
 <% if (buildTarget === 'codebuild') { %>
 ### `./do/submit`