npm - @aws/ml-container-creator - Versions diffs - 0.3.0 → 0.4.0 - Mend

@aws/ml-container-creator 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/config/bootstrap-stack.json +86 -7
package/config/defaults.json +1 -1
package/package.json +3 -1
package/servers/instance-sizer/index.js +36 -2
package/servers/instance-sizer/lib/instance-ranker.js +114 -10
package/servers/instance-sizer/lib/quota-resolver.js +368 -0
package/servers/instance-sizer/package.json +2 -0
package/servers/lib/catalogs/instances.json +527 -12
package/servers/lib/catalogs/model-servers.json +15 -15
package/servers/lib/catalogs/model-sizes.json +27 -0
package/servers/lib/catalogs/models.json +71 -0
package/servers/lib/schemas/image-catalog.schema.json +9 -1
package/src/app.js +77 -2
package/src/lib/bootstrap-command-handler.js +96 -3
package/src/lib/cli-handler.js +2 -2
package/src/lib/config-manager.js +78 -1
package/src/lib/prompt-runner.js +96 -9
package/src/lib/prompts.js +66 -4
package/src/lib/schema-sync.js +31 -0
package/src/lib/template-manager.js +49 -1
package/src/lib/validate-runner.js +125 -2
package/templates/Dockerfile +10 -2
package/templates/code/cuda_compat.sh +22 -0
package/templates/code/serve +3 -0
package/templates/code/start_server.sh +3 -0
package/templates/diffusors/Dockerfile +2 -1
package/templates/diffusors/serve +3 -0
package/templates/do/README.md +33 -0
package/templates/do/benchmark +646 -0
package/templates/do/clean +86 -0
package/templates/do/config +26 -3
package/templates/do/deploy +6 -1
package/templates/do/register +8 -1
package/templates/triton/Dockerfile +5 -0

package/servers/lib/catalogs/model-sizes.json CHANGED Viewed

@@ -46,6 +46,33 @@
             "minVramGb": 184,
             "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
         },
+        "meta-llama/Llama-3.1-8B*": {
+            "parameterCount": 8030261248,
+            "defaultDtype": "bfloat16",
+            "architecture": "LlamaForCausalLM",
+            "maxPositionEmbeddings": 131072,
+            "recommendedQuantizations": ["awq", "gptq"],
+            "minVramGb": 20,
+            "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
+        },
+        "meta-llama/Llama-3.2-1B*": {
+            "parameterCount": 1235814400,
+            "defaultDtype": "bfloat16",
+            "architecture": "LlamaForCausalLM",
+            "maxPositionEmbeddings": 131072,
+            "recommendedQuantizations": ["awq", "gptq"],
+            "minVramGb": 5,
+            "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
+        },
+        "meta-llama/Llama-3.2-3B*": {
+            "parameterCount": 3212749824,
+            "defaultDtype": "bfloat16",
+            "architecture": "LlamaForCausalLM",
+            "maxPositionEmbeddings": 131072,
+            "recommendedQuantizations": ["awq", "gptq"],
+            "minVramGb": 9,
+            "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
+        },
         "mistralai/Mistral-7B*": {
             "parameterCount": 7241732096,
             "defaultDtype": "bfloat16",

package/servers/lib/catalogs/models.json CHANGED Viewed

@@ -532,6 +532,77 @@
             "text-generation"
         ]
     },
+    "meta-llama/Llama-3.1-8B*": {
+        "parameterCount": 8030261248,
+        "defaultDtype": "bfloat16",
+        "architecture": "LlamaForCausalLM",
+        "maxPositionEmbeddings": 131072,
+        "recommendedQuantizations": [
+            "awq",
+            "gptq"
+        ],
+        "modelType": "transformer",
+        "tasks": [
+            "text-generation"
+        ]
+    },
+    "meta-llama/Llama-3.1-70B*": {
+        "parameterCount": 70553706496,
+        "defaultDtype": "bfloat16",
+        "architecture": "LlamaForCausalLM",
+        "maxPositionEmbeddings": 131072,
+        "recommendedQuantizations": [
+            "awq",
+            "gptq"
+        ],
+        "modelType": "transformer",
+        "tasks": [
+            "text-generation"
+        ]
+    },
+    "meta-llama/Llama-3.1-405B*": {
+        "parameterCount": 405000000000,
+        "defaultDtype": "bfloat16",
+        "architecture": "LlamaForCausalLM",
+        "maxPositionEmbeddings": 131072,
+        "recommendedQuantizations": [
+            "awq",
+            "gptq",
+            "fp8"
+        ],
+        "modelType": "transformer",
+        "tasks": [
+            "text-generation"
+        ]
+    },
+    "meta-llama/Llama-3.2-1B*": {
+        "parameterCount": 1235814400,
+        "defaultDtype": "bfloat16",
+        "architecture": "LlamaForCausalLM",
+        "maxPositionEmbeddings": 131072,
+        "recommendedQuantizations": [
+            "awq",
+            "gptq"
+        ],
+        "modelType": "transformer",
+        "tasks": [
+            "text-generation"
+        ]
+    },
+    "meta-llama/Llama-3.2-3B*": {
+        "parameterCount": 3212749824,
+        "defaultDtype": "bfloat16",
+        "architecture": "LlamaForCausalLM",
+        "maxPositionEmbeddings": 131072,
+        "recommendedQuantizations": [
+            "awq",
+            "gptq"
+        ],
+        "modelType": "transformer",
+        "tasks": [
+            "text-generation"
+        ]
+    },
     "Qwen/Qwen-7B*": {
         "parameterCount": 7721324544,
         "defaultDtype": "bfloat16",

package/servers/lib/schemas/image-catalog.schema.json CHANGED Viewed

@@ -62,7 +62,15 @@
                             }
                         },
                         "inferenceAmiVersion": {
-                            "type": "string"
+                            "type": "string",
+                            "enum": [
+                                "al2023-ami-sagemaker-inference-cpu-0",
+                                "al2-ami-sagemaker-inference-gpu-2",
+                                "al2-ami-sagemaker-inference-gpu-2-1",
+                                "al2-ami-sagemaker-inference-neuron-2",
+                                "al2-ami-sagemaker-inference-gpu-3-1",
+                                "al2023-ami-sagemaker-inference-gpu-4-1"
+                            ]
                         }
                     },
                     "additionalProperties": false

package/src/app.js CHANGED Viewed

@@ -322,6 +322,17 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
         ignorePatterns.push('**/sample_model/**');
     }
+    // Exclude do/benchmark when benchmarking is not selected
+    if (!answers.includeBenchmark) {
+        ignorePatterns.push('**/do/benchmark');
+    }
+    // Exclude do/test when hosted-model-endpoint is not selected
+    const testTypes = answers.testTypes || [];
+    if (!testTypes.includes('hosted-model-endpoint')) {
+        ignorePatterns.push('**/do/test');
+    }
     // Always exclude triton and diffusors source directories
     ignorePatterns.push('**/triton/**');
     ignorePatterns.push('**/diffusors/**');
@@ -417,6 +428,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
     _copyFile(path.join(LIB_DIR, 'manifest-cli.js'), path.join(doLibDir, 'manifest-cli.js'));
     _copyFile(path.join(LIB_DIR, 'asset-manager.js'), path.join(doLibDir, 'asset-manager.js'));
     _copyFile(path.join(LIB_DIR, 'bootstrap-config.js'), path.join(doLibDir, 'bootstrap-config.js'));
+    // Generate .gitignore with benchmarks/ when benchmarking is enabled
+    if (answers.includeBenchmark) {
+        const gitignorePath = path.join(destDir, '.gitignore');
+        const gitignoreContent = '# Benchmark results (generated by do/benchmark)\nbenchmarks/\n';
+        if (fs.existsSync(gitignorePath)) {
+            const existing = fs.readFileSync(gitignorePath, 'utf8');
+            if (!existing.includes('benchmarks/')) {
+                fs.appendFileSync(gitignorePath, `\n${gitignoreContent}`);
+            }
+        } else {
+            fs.writeFileSync(gitignorePath, gitignoreContent);
+        }
+    }
 }
 /**
@@ -529,7 +554,7 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
         codebuildProjectName: null,
         modelName: null,
         modelFormat: null,
-        includeSampleModel: false,
+        includeSampleModel: true,
         includeTesting: true,
         testTypes: [],
         buildTimestamp: new Date().toISOString(),
@@ -641,6 +666,55 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
             // Silently continue - defaults are already set
         }
     }
+    // Populate baseImage from the catalog when still falsy (covers --skip-prompts and
+    // cases where MCP/CLI/config did not provide a base image).
+    // Precedence: MCP > CLI > config > catalog default (this block).
+    if (!answers.baseImage && registryConfigManager?.frameworkRegistry) {
+        const backendKey = answers.backend || answers.modelServer;
+        if (backendKey) {
+            const frameworkVersions = registryConfigManager.frameworkRegistry[backendKey];
+            if (frameworkVersions) {
+                let resolvedConfig = null;
+                if (answers.frameworkVersion && frameworkVersions[answers.frameworkVersion]) {
+                    resolvedConfig = frameworkVersions[answers.frameworkVersion];
+                } else {
+                    // Fall back to latest version
+                    const versions = Object.keys(frameworkVersions).sort((a, b) =>
+                        b.localeCompare(a, undefined, { numeric: true })
+                    );
+                    if (versions.length > 0) {
+                        resolvedConfig = frameworkVersions[versions[0]];
+                    }
+                }
+                if (resolvedConfig?.baseImage) {
+                    answers.baseImage = resolvedConfig.baseImage;
+                }
+            }
+        }
+    }
+    // Populate icGpuCount from instance catalog when not explicitly set.
+    // The deploy template uses IC_GPU_COUNT unconditionally for NumberOfAcceleratorDevicesRequired,
+    // so it must always have a value for GPU deployments.
+    if (answers.icGpuCount == null && answers.instanceType) {
+        // Use gpuCount from instance-sizer recommendation if available
+        if (answers.gpuCount) {
+            answers.icGpuCount = answers.gpuCount;
+        } else {
+            // Look up from instances catalog
+            try {
+                const catalogPath = path.resolve(__dirname, '..', 'servers', 'lib', 'catalogs', 'instances.json');
+                const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
+                const instanceInfo = catalogData?.catalog?.[answers.instanceType];
+                if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
+                    answers.icGpuCount = instanceInfo.gpus;
+                }
+            } catch {
+                // Silently continue — template fallback handles missing value
+            }
+        }
+    }
 }
 /**
@@ -977,7 +1051,8 @@ function _setExecutablePermissions(destDir) {
         'do/submit',
         'do/register',
         'do/ci',
-        'do/manifest'
+        'do/manifest',
+        'do/benchmark'
     ];
     shellScripts.forEach(script => {

package/src/lib/bootstrap-command-handler.js CHANGED Viewed

@@ -116,6 +116,11 @@ export default class BootstrapCommandHandler {
         console.log('\n🚀 Bootstrap — Shared AWS Infrastructure Setup\n');
+        // Verify AWS CLI v2 is installed
+        if (!this._verifyCliV2()) {
+            return;
+        }
         // Determine bootstrap profile name
         let profileName;
         if (nonInteractive) {
@@ -194,6 +199,9 @@ export default class BootstrapCommandHandler {
             if (stackOutputs.BatchS3BucketName) {
                 profileData.batchS3Bucket = stackOutputs.BatchS3BucketName;
             }
+            if (stackOutputs.BenchmarkS3BucketName) {
+                profileData.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
+            }
             console.log('  ✅ Bootstrap stack deployed successfully');
         } catch (error) {
@@ -382,6 +390,9 @@ export default class BootstrapCommandHandler {
                 if (outputs.BatchS3BucketName) {
                     console.log(`  ✅ S3 bucket (batch): ${outputs.BatchS3BucketName}`);
                 }
+                if (outputs.BenchmarkS3BucketName) {
+                    console.log(`  ✅ S3 bucket (benchmark): ${outputs.BenchmarkS3BucketName}`);
+                }
                 if (outputs.StackVersion) {
                     console.log(`  📋 Stack version: ${outputs.StackVersion}`);
                 }
@@ -452,6 +463,20 @@ export default class BootstrapCommandHandler {
                     console.log(`  ⚠️  S3 bucket: ${profile.config.batchS3Bucket} — could not validate`);
                 }
             }
+            if (profile.config.benchmarkS3Bucket) {
+                try {
+                    const benchmarkExists = this._resourceExists(
+                        `s3api head-bucket --bucket ${profile.config.benchmarkS3Bucket}`,
+                        profile.config.awsProfile
+                    );
+                    console.log(benchmarkExists
+                        ? `  ✅ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket}`
+                        : `  ⚠️  S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — missing`);
+                } catch {
+                    console.log(`  ⚠️  S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — could not validate`);
+                }
+            }
         }
         // Display deployed resources from manifest
@@ -1005,6 +1030,7 @@ export default class BootstrapCommandHandler {
             if (stackOutputs.EcrRepositoryName) profileConfig.ecrRepositoryName = stackOutputs.EcrRepositoryName;
             if (stackOutputs.AsyncS3BucketName) profileConfig.asyncS3Bucket = stackOutputs.AsyncS3BucketName;
             if (stackOutputs.BatchS3BucketName) profileConfig.batchS3Bucket = stackOutputs.BatchS3BucketName;
+            if (stackOutputs.BenchmarkS3BucketName) profileConfig.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
             profileConfig.stackName = stackName;
             console.log('  ✅ Bootstrap stack updated');
@@ -1278,11 +1304,28 @@ export default class BootstrapCommandHandler {
                         'sagemaker:DescribeEndpointConfig',
                         'sagemaker:DescribeModel',
                         'sagemaker:DescribeInferenceComponent',
+                        'sagemaker:ListInferenceComponents',
                         'sagemaker:InvokeEndpoint',
                         'sagemaker:InvokeEndpointAsync'
                     ],
                     Resource: '*'
                 },
+                {
+                    Sid: 'SageMakerBenchmarking',
+                    Effect: 'Allow',
+                    Action: [
+                        'sagemaker:CreateAIBenchmarkJob',
+                        'sagemaker:DescribeAIBenchmarkJob',
+                        'sagemaker:ListAIBenchmarkJobs',
+                        'sagemaker:StopAIBenchmarkJob',
+                        'sagemaker:DeleteAIBenchmarkJob',
+                        'sagemaker:CreateAIWorkloadConfig',
+                        'sagemaker:DescribeAIWorkloadConfig',
+                        'sagemaker:ListAIWorkloadConfigs',
+                        'sagemaker:DeleteAIWorkloadConfig'
+                    ],
+                    Resource: '*'
+                },
                 {
                     Sid: 'ECRPull',
                     Effect: 'Allow',
@@ -1329,6 +1372,29 @@ export default class BootstrapCommandHandler {
                     Effect: 'Allow',
                     Action: 'sns:Publish',
                     Resource: 'arn:aws:sns:*:*:ml-container-creator-*'
+                },
+                {
+                    Sid: 'SecretsManagerBenchmark',
+                    Effect: 'Allow',
+                    Action: [
+                        'secretsmanager:CreateSecret',
+                        'secretsmanager:PutSecretValue',
+                        'secretsmanager:GetSecretValue',
+                        'secretsmanager:DescribeSecret'
+                    ],
+                    Resource: 'arn:aws:secretsmanager:*:*:secret:ml-container-creator/*'
+                },
+                {
+                    Sid: 'QuotaAndAvailability',
+                    Effect: 'Allow',
+                    Action: [
+                        'service-quotas:GetServiceQuota',
+                        'service-quotas:ListServiceQuotas',
+                        'sagemaker:ListTrainingPlans',
+                        'sagemaker:DescribeTrainingPlan',
+                        'sagemaker:ListEndpoints'
+                    ],
+                    Resource: '*'
                 }
             ]
         };
@@ -1478,9 +1544,15 @@ export default class BootstrapCommandHandler {
     /**
      * Optionally create S3 buckets for async/batch deployments.
+     * Always creates the benchmark S3 bucket (unconditional).
      * @returns {Promise<object|null>} Bucket names or null if skipped
      */
     async _setupS3Buckets() {
+        // Always create benchmark bucket (unconditional — avoids re-bootstrap when benchmarking is enabled later)
+        const benchmarkBucketName = `ml-container-creator-benchmark-${this._currentRegion}-${this._currentAccountId}`;
+        const tags = this._buildResourceTags();
+        const benchmarkS3Bucket = await this._createS3Bucket(benchmarkBucketName, tags);
         const { useS3 } = await this._promptFn([{
             type: 'confirm',
             name: 'useS3',
@@ -1489,17 +1561,16 @@ export default class BootstrapCommandHandler {
         }]);
         if (!useS3) {
-            return null;
+            return { benchmarkS3Bucket };
         }
         const asyncBucketName = `ml-container-creator-async-${this._currentRegion}-${this._currentAccountId}`;
         const batchBucketName = `ml-container-creator-batch-${this._currentRegion}-${this._currentAccountId}`;
-        const tags = this._buildResourceTags();
         const asyncS3Bucket = await this._createS3Bucket(asyncBucketName, tags);
         const batchS3Bucket = await this._createS3Bucket(batchBucketName, tags);
-        return { asyncS3Bucket, batchS3Bucket };
+        return { asyncS3Bucket, batchS3Bucket, benchmarkS3Bucket };
     }
     /**
@@ -1556,6 +1627,28 @@ export default class BootstrapCommandHandler {
     // ── AWS CLI helpers ─────────────────────────────────────────────
+    /**
+     * Verify AWS CLI v2 is installed. Returns true if v2 is detected, false otherwise.
+     * Extracted as a method so tests can override it.
+     * @returns {boolean}
+     */
+    _verifyCliV2() {
+        try {
+            const versionOutput = execSync('aws --version', { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
+            if (!versionOutput.includes('aws-cli/2')) {
+                console.log(`  ❌ AWS CLI v2 is required. Detected: ${versionOutput.split(' ')[0]}`);
+                console.log('  Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html');
+                console.log('  Some features (benchmarking, newer SageMaker APIs) require CLI v2.\n');
+                return false;
+            }
+            return true;
+        } catch {
+            console.log('  ❌ AWS CLI not found.');
+            console.log('  Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html\n');
+            return false;
+        }
+    }
     /**
      * Execute an AWS CLI command and return parsed JSON output.
      * @param {string} command - AWS CLI command (without 'aws' prefix)

package/src/lib/cli-handler.js CHANGED Viewed

@@ -375,7 +375,7 @@ For more information, visit: https://github.com/awslabs/ml-container-creator
                 type: 'confirm',
                 name: 'includeSampleModel',
                 message: 'Include sample model:',
-                default: false,
+                default: true,
                 when: answers => answers.framework !== 'transformers'
             },
             {
@@ -515,7 +515,7 @@ ml-container-creator \\
             'framework': 'sklearn',
             'modelServer': 'flask',
             'modelFormat': 'pkl',
-            'includeSampleModel': false,
+            'includeSampleModel': true,
             'includeTesting': true,
             'testTypes': ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
             'buildTarget': 'codebuild',

package/src/lib/config-manager.js CHANGED Viewed

@@ -979,6 +979,83 @@ export default class ConfigManager {
                 default: 1.0,
                 valueSpace: 'bounded',
                 schemaValidated: true
+            },
+            includeBenchmark: {
+                cliOption: 'include-benchmark',
+                envVar: 'ML_INCLUDE_BENCHMARK',
+                configFile: true,
+                packageJson: false,
+                mcp: false,
+                promptable: true,
+                required: false,
+                default: false,
+                valueSpace: 'bounded'
+            },
+            benchmarkConcurrency: {
+                cliOption: 'benchmark-concurrency',
+                envVar: null,
+                configFile: true,
+                packageJson: false,
+                mcp: false,
+                promptable: true,
+                required: false,
+                default: 10,
+                valueSpace: 'bounded'
+            },
+            benchmarkInputTokensMean: {
+                cliOption: 'benchmark-input-tokens',
+                envVar: null,
+                configFile: true,
+                packageJson: false,
+                mcp: false,
+                promptable: true,
+                required: false,
+                default: 550,
+                valueSpace: 'bounded'
+            },
+            benchmarkOutputTokensMean: {
+                cliOption: 'benchmark-output-tokens',
+                envVar: null,
+                configFile: true,
+                packageJson: false,
+                mcp: false,
+                promptable: true,
+                required: false,
+                default: 150,
+                valueSpace: 'bounded'
+            },
+            benchmarkStreaming: {
+                cliOption: 'benchmark-streaming',
+                envVar: null,
+                configFile: true,
+                packageJson: false,
+                mcp: false,
+                promptable: true,
+                required: false,
+                default: true,
+                valueSpace: 'bounded'
+            },
+            benchmarkRequestCount: {
+                cliOption: 'benchmark-request-count',
+                envVar: null,
+                configFile: true,
+                packageJson: false,
+                mcp: false,
+                promptable: true,
+                required: false,
+                default: null,
+                valueSpace: 'bounded'
+            },
+            benchmarkS3OutputPath: {
+                cliOption: 'benchmark-s3-output-path',
+                envVar: 'ML_BENCHMARK_S3_OUTPUT_PATH',
+                configFile: true,
+                packageJson: false,
+                mcp: false,
+                promptable: true,
+                required: false,
+                default: null,
+                valueSpace: 'bounded'
             }
         };
     }
@@ -1011,7 +1088,7 @@ export default class ConfigManager {
      */
     _parseValue(parameter, value) {
         // Handle boolean parameters
-        if (parameter === 'includeSampleModel' || parameter === 'includeTesting' || parameter === 'skipPrompts') {
+        if (parameter === 'includeSampleModel' || parameter === 'includeTesting' || parameter === 'skipPrompts' || parameter === 'includeBenchmark' || parameter === 'benchmarkStreaming') {
             return value === true || value === 'true';
         }