npm - @aws/ml-container-creator - Versions diffs - 0.6.1 → 0.8.0 - Mend

@aws/ml-container-creator 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/bin/cli.js +1 -1
package/infra/ci-harness/buildspec.yml +4 -0
package/package.json +1 -1
package/servers/lib/catalogs/model-servers.json +80 -0
package/servers/model-picker/index.js +27 -16
package/src/app.js +89 -21
package/src/lib/cli-handler.js +1 -1
package/src/lib/config-manager.js +39 -2
package/src/lib/cross-cutting-checker.js +146 -33
package/src/lib/deployment-config-resolver.js +10 -4
package/src/lib/e2e-bootstrap.js +227 -0
package/src/lib/e2e-catalog-validator.js +103 -0
package/src/lib/e2e-quota-validator.js +135 -0
package/src/lib/prompt-runner.js +290 -22
package/src/lib/prompts.js +9 -3
package/src/lib/template-manager.js +10 -4
package/src/lib/tune-catalog-validator.js +5 -5
package/templates/Dockerfile +2 -0
package/templates/code/cw_log_forwarder.py +64 -0
package/templates/code/serve +14 -3
package/templates/code/serving.properties +2 -2
package/templates/deploy_notebook_generator.py +897 -0
package/templates/diffusors/serve +3 -3
package/templates/do/.tune_helper.py +2 -2
package/templates/do/export +19 -2
package/templates/do/lib/endpoint-config.sh +3 -1
package/templates/do/lib/inference-component.sh +5 -1
package/templates/do/register +8 -2
package/templates/do/test +5 -5
package/templates/do/tune +2 -2
package/templates/marketplace/config +118 -0
package/templates/marketplace/deploy +890 -0
package/templates/marketplace/test +453 -0

package/src/lib/prompt-runner.js CHANGED Viewed

@@ -111,6 +111,14 @@ export default class PromptRunner {
             framework: framework || deploymentConfigAnswers.framework,
             modelServer: modelServer || deploymentConfigAnswers.modelServer
         };
+        // ──────────────────────────────────────────────────────────────────────
+        // Marketplace fast-path: skip all container-related prompts
+        // Requirements: 2.3, 2.4, 2.5
+        // ──────────────────────────────────────────────────────────────────────
+        if (frameworkAnswers.architecture === 'marketplace') {
+            return this._runMarketplaceFlow(frameworkAnswers, explicitConfig, existingConfig, buildTimestamp);
+        }
         // Engine prompt for http architecture
         const engineAnswers = await this._runPhase(enginePrompts, { ...frameworkAnswers }, explicitConfig, existingConfig);
@@ -596,13 +604,27 @@ export default class PromptRunner {
         // Infer modelSource from model name prefix if not set by MCP
         const modelName = combinedAnswers.customModelName || combinedAnswers.modelName;
         if (!combinedAnswers.modelSource && modelName) {
-            if (modelName.startsWith('s3://')) {
+            // Reject deprecated JumpStart prefixes with migration message
+            if (modelName.startsWith('jumpstart://') || modelName.startsWith('jumpstart-hub://')) {
+                const bareId = modelName.replace(/^jumpstart(-hub)?:\/\//, '');
+                console.error(`\n   ⚠️  JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}`);
+                console.error('   JumpStart model sources have been removed. Use one of:');
+                console.error('     • HuggingFace model ID (e.g., meta-llama/Llama-2-7b-hf)');
+                console.error('     • s3://bucket/path/model.tar.gz');
+                console.error('     • registry://model-package-name');
+                console.error('     • marketplace://arn:aws:sagemaker:...\n');
+                process.exit(1);
+            }
+            if (modelName.startsWith('marketplace://')) {
+                // marketplace://arn:aws:sagemaker:... → set architecture to marketplace and store ARN
+                const arn = modelName.replace(/^marketplace:\/\//, '');
+                combinedAnswers.modelPackageArn = arn;
+                combinedAnswers.architecture = 'marketplace';
+                combinedAnswers.deploymentConfig = 'marketplace';
+                combinedAnswers.modelSource = undefined;
+            } else if (modelName.startsWith('s3://')) {
                 combinedAnswers.modelSource = 's3';
                 combinedAnswers.artifactUri = modelName;
-            } else if (modelName.startsWith('jumpstart://')) {
-                combinedAnswers.modelSource = 'jumpstart';
-            } else if (modelName.startsWith('jumpstart-hub://')) {
-                combinedAnswers.modelSource = 'jumpstart-hub';
             } else if (modelName.startsWith('registry://')) {
                 combinedAnswers.modelSource = 'registry';
             }
@@ -613,7 +635,7 @@ export default class PromptRunner {
                 combinedAnswers.artifactUri = modelName;
             }
         }
-        const downloadSources = ['jumpstart', 's3'];
+        const downloadSources = ['s3'];
         if (downloadSources.includes(combinedAnswers.modelSource) && !combinedAnswers.artifactUri) {
             console.log(`\n   ⚠️  Model source is '${combinedAnswers.modelSource}' but no artifact URI was resolved.`);
             console.log('   The model-picker could not determine the download location.');
@@ -638,18 +660,7 @@ export default class PromptRunner {
             }
         }
-        // Warn about jumpstart-hub:// models — private hub deployment requires
-        // HubAccessConfig on CreateModel, which is not yet supported by the generator.
-        if (combinedAnswers.modelSource === 'jumpstart-hub') {
-            console.log('\n   ⚠️  JumpStart Private Hub models are not yet fully supported.');
-            console.log('   Private hub artifacts live in AWS-managed S3 buckets that require');
-            console.log('   SageMaker\'s HubAccessConfig mechanism for access.');
-            console.log('   The generated project will not be able to download model artifacts at runtime.');
-            console.log('   This feature is tracked for a future release.\n');
-            console.log('   Falling back to HuggingFace source.\n');
-            combinedAnswers.modelSource = 'huggingface';
-            delete combinedAnswers.artifactUri;
-        }
         // Apply auto-set model format for Triton backends with single format
         // Requirements: 3.3, 3.4, 3.5
@@ -731,6 +742,265 @@ export default class PromptRunner {
         return combinedAnswers;
     }
+    /**
+     * Marketplace-specific prompt flow.
+     * Skips all container-related prompts (framework, model server, base image, CUDA version)
+     * and prompts only for: model package ARN, instance type, deployment target, region.
+     *
+     * Requirements: 2.3, 2.4, 2.5
+     * @private
+     */
+    async _runMarketplaceFlow(frameworkAnswers, explicitConfig, existingConfig, buildTimestamp) {
+        console.log('\n🏪 Marketplace Model Package Configuration');
+        // Query marketplace-picker MCP server for subscription discovery
+        // Requirements: 2.4, 6.1, 6.2
+        let mcpSubscriptions = [];
+        const cm = this.configManager;
+        if (cm && cm.getMcpServerNames && cm.getMcpServerNames().includes('marketplace-picker')) {
+            try {
+                console.log('   🔍 Querying marketplace-picker for subscriptions...');
+                const result = await cm.queryMcpServer('marketplace-picker', {
+                    region: explicitConfig.awsRegion || existingConfig.awsRegion || process.env.AWS_REGION || 'us-east-1'
+                });
+                if (result && result.metadata?.subscriptions?.length > 0) {
+                    mcpSubscriptions = result.metadata.subscriptions;
+                    console.log(`   ✅ Found ${mcpSubscriptions.length} Marketplace subscription(s)`);
+                } else {
+                    console.log('   ℹ️  No Marketplace subscriptions found — enter ARN manually');
+                }
+            } catch (err) {
+                console.log(`   ⚠️  marketplace-picker unavailable: ${err.message}`);
+                console.log('   Falling back to manual ARN entry');
+            }
+        }
+        // Marketplace-specific prompts: model package ARN
+        const marketplacePrompts = [
+            {
+                type: mcpSubscriptions.length > 0 ? 'list' : 'input',
+                name: 'modelPackageArn',
+                message: mcpSubscriptions.length > 0
+                    ? 'Select a Marketplace model package:'
+                    : 'Model package ARN (arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>):',
+                ...(mcpSubscriptions.length > 0 ? {
+                    choices: [
+                        ...mcpSubscriptions.map(sub => ({
+                            name: `${sub.modelName} (${sub.vendor}) — ${sub.arn}`,
+                            value: sub.arn,
+                            short: sub.modelName
+                        })),
+                        { type: 'separator', separator: '──────────────' },
+                        { name: 'Enter ARN manually...', value: '__manual__', short: 'manual' }
+                    ]
+                } : {
+                    validate: (input) => {
+                        if (!input || input.trim() === '') {
+                            return 'Model package ARN is required';
+                        }
+                        const arnPattern = /^arn:aws:sagemaker:[a-z0-9-]+:\d{12}:model-package\/[\w-]+\/\d+$/;
+                        if (!arnPattern.test(input.trim())) {
+                            return 'Invalid ARN format. Expected: arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>';
+                        }
+                        return true;
+                    }
+                })
+            },
+            {
+                type: 'input',
+                name: 'modelPackageArnManual',
+                message: 'Model package ARN (arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>):',
+                when: (answers) => answers.modelPackageArn === '__manual__',
+                validate: (input) => {
+                    if (!input || input.trim() === '') {
+                        return 'Model package ARN is required';
+                    }
+                    const arnPattern = /^arn:aws:sagemaker:[a-z0-9-]+:\d{12}:model-package\/[\w-]+\/\d+$/;
+                    if (!arnPattern.test(input.trim())) {
+                        return 'Invalid ARN format. Expected: arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>';
+                    }
+                    return true;
+                }
+            }
+        ];
+        const marketplaceAnswers = await this._runPhase(marketplacePrompts, { ...frameworkAnswers }, explicitConfig, existingConfig);
+        // Handle manual ARN entry fallback
+        if (marketplaceAnswers.modelPackageArn === '__manual__' && marketplaceAnswers.modelPackageArnManual) {
+            marketplaceAnswers.modelPackageArn = marketplaceAnswers.modelPackageArnManual;
+            delete marketplaceAnswers.modelPackageArnManual;
+        }
+        // Infrastructure prompts: region, deployment target, instance type
+        console.log('\n💪 Infrastructure & Deployment');
+        const bootstrapRegion = existingConfig.awsRegion || explicitConfig.awsRegion;
+        const regionPreviousAnswers = bootstrapRegion ? { _bootstrapRegion: bootstrapRegion } : {};
+        // Marketplace deployment targets (no HyperPod — vendor controls the container)
+        const marketplaceInfraPrompts = [
+            {
+                type: 'list',
+                name: 'awsRegion',
+                message: 'Target AWS region?',
+                choices: (answers) => {
+                    const bootstrapReg = answers._bootstrapRegion;
+                    const choices = ['us-east-1'];
+                    if (bootstrapReg && bootstrapReg !== 'us-east-1') {
+                        choices.unshift({ name: `${bootstrapReg} (from bootstrap profile)`, value: bootstrapReg });
+                    }
+                    choices.push({ name: 'Custom...', value: 'custom' });
+                    return choices;
+                },
+                default: (answers) => answers._bootstrapRegion || 'us-east-1'
+            },
+            {
+                type: 'input',
+                name: 'customAwsRegion',
+                message: 'Enter AWS region (e.g., us-west-2, eu-west-1):',
+                when: answers => answers.awsRegion === 'custom'
+            },
+            {
+                type: 'list',
+                name: 'deploymentTarget',
+                message: 'Deployment target?',
+                choices: [
+                    { name: 'SageMaker Real-Time Inference', value: 'realtime-inference' },
+                    { name: 'SageMaker Async Inference', value: 'async-inference' },
+                    { name: 'SageMaker Batch Transform', value: 'batch-transform' }
+                ],
+                default: 'realtime-inference'
+            },
+            {
+                type: 'list',
+                name: 'instanceType',
+                message: 'Instance type for deployment?',
+                choices: [
+                    { name: 'ml.g5.xlarge (1 GPU, 24GB)', value: 'ml.g5.xlarge' },
+                    { name: 'ml.g5.2xlarge (1 GPU, 24GB)', value: 'ml.g5.2xlarge' },
+                    { name: 'ml.g5.4xlarge (1 GPU, 24GB)', value: 'ml.g5.4xlarge' },
+                    { name: 'ml.g5.12xlarge (4 GPUs, 96GB)', value: 'ml.g5.12xlarge' },
+                    { name: 'ml.p3.2xlarge (1 GPU, 16GB V100)', value: 'ml.p3.2xlarge' },
+                    { name: 'ml.m5.xlarge (CPU, 16GB)', value: 'ml.m5.xlarge' },
+                    { name: 'Custom...', value: 'custom' }
+                ],
+                default: 'ml.g5.xlarge'
+            },
+            {
+                type: 'input',
+                name: 'customInstanceType',
+                message: 'Enter instance type (e.g., ml.g5.xlarge):',
+                validate: (input) => {
+                    if (!input || input.trim() === '') {
+                        return 'Instance type is required';
+                    }
+                    if (!input.startsWith('ml.')) {
+                        return 'Instance type must start with "ml." (e.g., ml.g5.xlarge)';
+                    }
+                    return true;
+                },
+                when: answers => answers.instanceType === 'custom'
+            }
+        ];
+        const infraAnswers = await this._runPhase(marketplaceInfraPrompts, { ...frameworkAnswers, ...regionPreviousAnswers }, explicitConfig, existingConfig);
+        // Async-specific prompts (only when deploymentTarget === 'async-inference')
+        let asyncAnswers = {};
+        if (infraAnswers.deploymentTarget === 'async-inference') {
+            asyncAnswers = await this._runPhase(infraAsyncPrompts, { ...infraAnswers }, explicitConfig, existingConfig);
+        }
+        // Batch transform-specific prompts (only when deploymentTarget === 'batch-transform')
+        let batchTransformAnswers = {};
+        if (infraAnswers.deploymentTarget === 'batch-transform') {
+            batchTransformAnswers = await this._runPhase(
+                infraBatchTransformPrompts,
+                { ...infraAnswers },
+                explicitConfig,
+                existingConfig
+            );
+        }
+        // Role ARN prompt (always needed for marketplace deploy)
+        const rolePrompts = [
+            {
+                type: 'input',
+                name: 'awsRoleArn',
+                message: 'AWS IAM Role ARN for SageMaker execution (optional)?',
+                validate: (input) => {
+                    if (!input || input.trim() === '') {
+                        return true;
+                    }
+                    const arnPattern = /^arn:aws:iam::\d{12}:role\/[\w+=,.@-]+$/;
+                    if (!arnPattern.test(input)) {
+                        return 'Invalid ARN format. Expected: arn:aws:iam::123456789012:role/RoleName';
+                    }
+                    return true;
+                }
+            }
+        ];
+        const roleAnswers = await this._runPhase(rolePrompts, { ...infraAnswers }, explicitConfig, existingConfig);
+        // Project name + destination
+        console.log('\n📋 Project Configuration');
+        const allTechnicalAnswers = {
+            ...frameworkAnswers,
+            ...marketplaceAnswers,
+            ...infraAnswers,
+            ...asyncAnswers,
+            ...batchTransformAnswers,
+            ...roleAnswers
+        };
+        const projectAnswers = await this._runPhase(projectPrompts, allTechnicalAnswers, explicitConfig, existingConfig);
+        const destinationAnswers = await this._runPhase(destinationPrompts,
+            { ...allTechnicalAnswers, ...projectAnswers }, explicitConfig, existingConfig);
+        // Combine all marketplace answers
+        const combinedAnswers = {
+            ...frameworkAnswers,
+            ...marketplaceAnswers,
+            ...infraAnswers,
+            ...asyncAnswers,
+            ...batchTransformAnswers,
+            ...roleAnswers,
+            ...projectAnswers,
+            ...destinationAnswers,
+            buildTimestamp
+        };
+        // Handle custom instance type
+        if (combinedAnswers.customInstanceType) {
+            combinedAnswers.instanceType = combinedAnswers.customInstanceType;
+            delete combinedAnswers.customInstanceType;
+        }
+        // Handle custom AWS region
+        if (combinedAnswers.customAwsRegion) {
+            combinedAnswers.awsRegion = combinedAnswers.customAwsRegion;
+            delete combinedAnswers.customAwsRegion;
+        }
+        // Map awsRoleArn to roleArn for templates
+        if (combinedAnswers.awsRoleArn) {
+            combinedAnswers.roleArn = combinedAnswers.awsRoleArn;
+            delete combinedAnswers.awsRoleArn;
+        }
+        // Ensure CLI-provided values are in combinedAnswers
+        if (explicitConfig.modelPackageArn && !combinedAnswers.modelPackageArn) {
+            combinedAnswers.modelPackageArn = explicitConfig.modelPackageArn;
+        }
+        // Handle marketplace:// prefix from --model-name CLI option
+        const modelName = explicitConfig.modelName || combinedAnswers.modelName;
+        if (modelName && modelName.startsWith('marketplace://')) {
+            const arn = modelName.replace(/^marketplace:\/\//, '');
+            combinedAnswers.modelPackageArn = arn;
+            delete combinedAnswers.modelName;
+        }
+        return combinedAnswers;
+    }
     /**
      * Checks if a parameter is promptable according to the parameter matrix
      * @param {string} parameterName - Name of the parameter
@@ -1746,9 +2016,7 @@ export default class PromptRunner {
             const registryConfigManager = this.registryConfigManager;
             if (registryConfigManager) {
                 // Only try HuggingFace API for bare model IDs (not prefixed URIs)
-                const isNonHfUri = modelId.startsWith('jumpstart://') ||
-                        modelId.startsWith('jumpstart-hub://') ||
-                        modelId.startsWith('s3://') ||
+                const isNonHfUri = modelId.startsWith('s3://') ||
                         modelId.startsWith('registry://');
                 if (!isNonHfUri) {
@@ -1773,7 +2041,7 @@ export default class PromptRunner {
                         console.log('   ⚠️  HuggingFace API unavailable');
                     }
                 } else {
-                    // Non-HF URI (jumpstart://, s3://, etc.) — skip HF lookup silently
+                    // Non-HF URI (s3://, registry://, etc.) — skip HF lookup silently
                     // The summary at the end of this function will report "No additional model information"
                 }

package/src/lib/prompts.js CHANGED Viewed

@@ -232,6 +232,12 @@ const deploymentConfigPrompts = [
                 name: 'Diffusors with vLLM Omni',
                 value: 'diffusors-vllm-omni',
                 short: 'diffusors-vllm-omni'
+            },
+            { type: 'separator', separator: '── AWS Marketplace ──' },
+            {
+                name: 'Marketplace Model Package',
+                value: 'marketplace',
+                short: 'marketplace'
             }
         ]
     }
@@ -469,9 +475,9 @@ const modelFormatPrompts = [
             if (!input || input.trim() === '') {
                 return 'Model name is required';
             }
-            // Basic validation - must contain a slash (org/model, hub/model, s3://path, etc.)
+            // Basic validation - must contain a slash (org/model, s3://path, etc.)
             if (!input.includes('/')) {
-                return 'Please use the full model path (e.g., microsoft/DialoGPT-medium, jumpstart-hub://my-hub/my-model)';
+                return 'Please use the full model path (e.g., microsoft/DialoGPT-medium, s3://bucket/model, registry://my-package)';
             }
             return true;
         },
@@ -583,7 +589,7 @@ const hfTokenPrompts = [
             }
             // Skip HF token prompt for non-HuggingFace model sources
-            // (S3, JumpStart, Private Hub, Registry models don't need HF auth)
+            // (S3, Registry models don't need HF auth)
             const modelSource = answers.modelSource;
             if (modelSource && modelSource !== 'huggingface') {
                 return false;

package/src/lib/template-manager.js CHANGED Viewed

@@ -50,7 +50,7 @@ export default class TemplateManager {
      */
     validate() {
         const supportedOptions = {
-            // 15 canonical deployment-config values (2 http, 5 transformers, 7 triton, 1 diffusors)
+            // 16 canonical deployment-config values (2 http, 5 transformers, 7 triton, 1 diffusors, 1 marketplace)
             deploymentConfigs: [
                 // HTTP architecture (2)
                 'http-flask', 'http-fastapi',
@@ -61,7 +61,9 @@ export default class TemplateManager {
                 'triton-fil', 'triton-onnxruntime', 'triton-tensorflow',
                 'triton-pytorch', 'triton-vllm', 'triton-tensorrtllm', 'triton-python',
                 // Diffusors architecture (1)
-                'diffusors-vllm-omni'
+                'diffusors-vllm-omni',
+                // Marketplace architecture (1)
+                'marketplace'
             ],
             buildTargets: ['codebuild'],
             deploymentTargets: ['realtime-inference', 'async-inference', 'batch-transform', 'hyperpod-eks'],
@@ -82,7 +84,7 @@ export default class TemplateManager {
             this._validateGpuRequirement();
         } else {
             // Fallback: validate architecture and backend separately (new canonical format)
-            const architectures = ['http', 'transformers', 'triton', 'diffusors'];
+            const architectures = ['http', 'transformers', 'triton', 'diffusors', 'marketplace'];
             const backends = [
                 // http backends
                 'flask', 'fastapi',
@@ -95,7 +97,11 @@ export default class TemplateManager {
             ];
             this._validateChoice('architecture', architectures);
-            this._validateChoice('backend', backends);
+            // Marketplace has no backend — skip backend validation
+            if (this.answers.architecture !== 'marketplace') {
+                this._validateChoice('backend', backends);
+            }
             // Validate tensorrt-llm is only used with transformers architecture
             if (this.answers.backend === 'tensorrt-llm' && this.answers.architecture !== 'transformers') {

package/src/lib/tune-catalog-validator.js CHANGED Viewed

@@ -13,7 +13,7 @@
 /**
  * Look up a model entry in the catalog by model ID.
- * @param {string} modelId - The JumpStart model ID to look up
+ * @param {string} modelId - The model ID to look up
  * @param {Object} catalog - The tune catalog object with a `models` map
  * @returns {Object|null} The catalog entry for the model, or null if not found
  */
@@ -29,7 +29,7 @@ export function lookupModel(modelId, catalog) {
 /**
  * Check whether a model ID is present in the Supported Model Catalog.
- * @param {string} modelId - The JumpStart model ID to check
+ * @param {string} modelId - The model ID to check
  * @param {Object} catalog - The tune catalog object with a `models` map
  * @returns {boolean} True if the model is in the catalog
  */
@@ -41,7 +41,7 @@ export function isTuneSupported(modelId, catalog) {
  * Validate that a model ID exists in the catalog.
  * Returns a descriptive error when the model is not supported, including
  * the model name, supported families, and a reference to `do/train`.
- * @param {string} modelId - The JumpStart model ID to validate
+ * @param {string} modelId - The model ID to validate
  * @param {Object} catalog - The tune catalog object with a `models` map
  * @returns {{ valid: boolean, error?: string }}
  */
@@ -65,7 +65,7 @@ export function validateModel(modelId, catalog) {
  * Validate that a technique is supported for the given model.
  * Returns a descriptive error listing the supported techniques when
  * the requested technique is not available.
- * @param {string} modelId - The JumpStart model ID
+ * @param {string} modelId - The model ID
  * @param {string} technique - The technique to validate (e.g., 'sft', 'dpo')
  * @param {Object} catalog - The tune catalog object with a `models` map
  * @returns {{ valid: boolean, error?: string }}
@@ -92,7 +92,7 @@ export function validateTechnique(modelId, technique, catalog) {
  * Validate that a training type is supported for the given model and technique.
  * Returns a descriptive error listing the supported training types when
  * the requested type is not available.
- * @param {string} modelId - The JumpStart model ID
+ * @param {string} modelId - The model ID
  * @param {string} technique - The technique (e.g., 'sft', 'dpo')
  * @param {string} trainingType - The training type to validate (e.g., 'lora', 'full-rank')
  * @param {Object} catalog - The tune catalog object with a `models` map

package/templates/Dockerfile CHANGED Viewed

@@ -290,6 +290,7 @@ RUN chmod +x /usr/bin/serve_trtllm
 # Copy startup script
 COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
+COPY code/cw_log_forwarder.py /usr/bin/cw_log_forwarder.py
 COPY code/start_server.sh /usr/bin/start_server.sh
 RUN chmod +x /usr/bin/start_server.sh /usr/bin/cuda_compat.sh
@@ -307,6 +308,7 @@ COPY code/serving.properties /opt/ml/model/serving.properties
 # The container will automatically start DJL Serving with the configuration
 <% } else { %>
 COPY code/cuda_compat.sh /usr/bin/cuda_compat.sh
+COPY code/cw_log_forwarder.py /usr/bin/cw_log_forwarder.py
 COPY code/serve /usr/bin/serve
 RUN chmod 777 /usr/bin/serve /usr/bin/cuda_compat.sh

package/templates/code/cw_log_forwarder.py ADDED Viewed

@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+"""CloudWatch log forwarder — workaround for IC platform log routing gap.
+Pipes stdin to a CW log stream while passing through to stderr.
+Usage: exec > >(python3 /usr/bin/cw_log_forwarder.py) 2>&1
+"""
+import sys, os, time, threading
+import boto3
+from botocore.config import Config
+LOG_GROUP = os.environ.get("CW_LOG_GROUP",
+    f"/aws/sagemaker/InferenceComponents/{os.environ.get('INFERENCE_COMPONENT_NAME', os.environ.get('HOSTNAME', 'unknown'))}")
+LOG_STREAM = f"AllTraffic/{os.environ.get('HOSTNAME', 'container')}"
+REGION = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION", "us-west-2"))
+def main():
+    client = boto3.client("logs", region_name=REGION, config=Config(retries={"max_attempts": 2}))
+    try:
+        client.create_log_group(logGroupName=LOG_GROUP)
+    except Exception:
+        pass
+    try:
+        client.create_log_stream(logGroupName=LOG_GROUP, logStreamName=LOG_STREAM)
+    except Exception as e:
+        # Can't create stream — just passthrough
+        for line in sys.stdin:
+            sys.stderr.write(line)
+        return
+    buf, lock, seq = [], threading.Lock(), [None]
+    def flush():
+        with lock:
+            if not buf:
+                return
+            batch = buf[:50]
+            del buf[:50]
+        events = [{"timestamp": int(t * 1000), "message": m} for t, m in batch]
+        kw = {"logGroupName": LOG_GROUP, "logStreamName": LOG_STREAM, "logEvents": events}
+        if seq[0]:
+            kw["sequenceToken"] = seq[0]
+        try:
+            r = client.put_log_events(**kw)
+            seq[0] = r.get("nextSequenceToken")
+        except Exception:
+            pass
+    def loop():
+        while True:
+            time.sleep(2)
+            flush()
+    threading.Thread(target=loop, daemon=True).start()
+    try:
+        for line in sys.stdin:
+            sys.stderr.write(line)
+            with lock:
+                buf.append((time.time(), line.rstrip("\n")))
+    except (KeyboardInterrupt, BrokenPipeError):
+        pass
+    finally:
+        flush()
+if __name__ == "__main__":
+    main()

package/templates/code/serve CHANGED Viewed

@@ -2,6 +2,11 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
+# CloudWatch log forwarder — workaround for IC platform log routing gap
+exec > >(python3 /usr/bin/cw_log_forwarder.py) 2>&1
+echo "$(date -u '+%Y-%m-%dT%H:%M:%SZ') [serve] Container started — PID $$"
 # CUDA compatibility setup (required for newer SageMaker inference AMIs)
 source /usr/bin/cuda_compat.sh 2>/dev/null || true
@@ -108,7 +113,7 @@ resolve_model() {
             echo "${!_MODEL_VAR}"
             return
             ;;
-        s3|jumpstart|jumpstart-hub|registry)
+        s3|registry)
             # Check for pre-mounted artifacts first
             if [ -d "$LOCAL_MODEL_PATH" ] && [ "$(ls -A $LOCAL_MODEL_PATH 2>/dev/null)" ]; then
                 echo "Using pre-mounted model artifacts at $LOCAL_MODEL_PATH" >&2
@@ -240,7 +245,7 @@ ARG_PREFIX="--"
 # Define environment variables to exclude (internal variables set by base images)
 <% if (modelServer === 'vllm') { %>
-EXCLUDE_VARS=("VLLM_USAGE_SOURCE")
+EXCLUDE_VARS=("VLLM_USAGE_SOURCE" "VLLM_ENABLE_CUDA_COMPATIBILITY")
 <% } else if (modelServer === 'sglang') { %>
 EXCLUDE_VARS=()
 <% } else if (modelServer === 'tensorrt-llm') { %>
@@ -270,8 +275,14 @@ for var in "${env_vars[@]}"; do
     # Remove prefix, convert to lowercase, and replace underscores with dashes
     arg_name=$(echo "${key#"${PREFIX}"}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')
+    # Boolean handling: true = flag only, false = skip entirely
+    if [ "$value" = "false" ]; then
+        continue
+    fi
     SERVER_ARGS+=("${ARG_PREFIX}${arg_name}")
-    if [ -n "$value" ]; then
+    if [ -n "$value" ] && [ "$value" != "true" ]; then
         SERVER_ARGS+=("$value")
     fi
 done

package/templates/code/serving.properties CHANGED Viewed

@@ -15,7 +15,7 @@ option.model_id=<%= modelName %>
 option.model_id=<%= artifactUri %>
 <% } else { %>
 # Model will be loaded from /opt/ml/model at runtime
-# (JumpStart model without artifact URI — requires SageMaker ModelDataUrl)
+# (requires SageMaker ModelDataUrl or MODEL_ARTIFACT_URI)
 # option.model_id=/opt/ml/model
 <% } %>
@@ -71,7 +71,7 @@ option.model_id=<%= modelName %>
 option.model_id=<%= artifactUri %>
 <% } else { %>
 # Model will be loaded from /opt/ml/model at runtime
-# (JumpStart model without artifact URI — requires SageMaker ModelDataUrl)
+# (requires SageMaker ModelDataUrl or MODEL_ARTIFACT_URI)
 # option.model_id=/opt/ml/model
 <% } %>