npm - @aws/ml-container-creator - Versions diffs - 0.2.6 → 0.4.0 - Mend

@aws/ml-container-creator 0.2.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/bin/cli.js +38 -2
package/config/bootstrap-stack.json +94 -1
package/config/defaults.json +1 -1
package/infra/ci-harness/package-lock.json +22 -9
package/package.json +3 -1
package/servers/instance-sizer/index.js +45 -8
package/servers/instance-sizer/lib/instance-ranker.js +140 -11
package/servers/instance-sizer/lib/model-resolver.js +10 -6
package/servers/instance-sizer/lib/quota-resolver.js +368 -0
package/servers/instance-sizer/package.json +2 -0
package/servers/lib/catalogs/instances.json +527 -12
package/servers/lib/catalogs/model-servers.json +298 -20
package/servers/lib/catalogs/model-sizes.json +27 -0
package/servers/lib/catalogs/models.json +101 -0
package/servers/lib/schemas/image-catalog.schema.json +15 -1
package/servers/model-picker/index.js +2 -1
package/src/app.js +96 -2
package/src/lib/architecture-sync.js +171 -0
package/src/lib/arn-detection.js +22 -0
package/src/lib/bootstrap-command-handler.js +178 -3
package/src/lib/cli-handler.js +2 -2
package/src/lib/config-manager.js +121 -1
package/src/lib/cross-cutting-checker.js +119 -0
package/src/lib/deployment-entry-schema.js +1 -2
package/src/lib/prompt-runner.js +514 -20
package/src/lib/prompts.js +67 -5
package/src/lib/registry-command-handler.js +236 -0
package/src/lib/schema-sync.js +31 -0
package/src/lib/secret-classification.js +56 -0
package/src/lib/secrets-command-handler.js +550 -0
package/src/lib/template-manager.js +49 -1
package/src/lib/validate-runner.js +174 -2
package/src/lib/validation-report.js +8 -1
package/src/prompt-adapter.js +3 -2
package/templates/Dockerfile +10 -2
package/templates/code/cuda_compat.sh +22 -0
package/templates/code/serve +3 -0
package/templates/code/start_server.sh +3 -0
package/templates/diffusors/Dockerfile +2 -1
package/templates/diffusors/serve +3 -0
package/templates/do/README.md +33 -0
package/templates/do/benchmark +646 -0
package/templates/do/build +22 -0
package/templates/do/clean +86 -0
package/templates/do/config +41 -6
package/templates/do/deploy +66 -6
package/templates/do/logs +18 -3
package/templates/do/register +8 -1
package/templates/do/run +10 -0
package/templates/triton/Dockerfile +5 -0

package/src/app.js CHANGED Viewed

@@ -119,6 +119,23 @@ export async function run(projectName, options) {
     let answers;
     if (configManager.shouldSkipPrompts()) {
         console.log('\n🚀 Skipping prompts - using configuration from other sources');
+        // Fail-fast if required parameters are missing
+        const missing = configManager.getMissingRequiredParameters();
+        if (missing.length > 0) {
+            console.error('\n❌ Cannot skip prompts — required parameters are missing:\n');
+            for (const param of missing) {
+                const matrix = configManager._getParameterMatrix()[param];
+                const cliFlag = matrix?.cliOption ? `--${matrix.cliOption}` : '';
+                const envVar = matrix?.envVar || '';
+                const hints = [cliFlag, envVar].filter(Boolean).join(' or ');
+                console.error(`   • ${param}${hints ? ` (${hints})` : ''}`);
+            }
+            console.error('\n   Provide these via CLI flags, environment variables, or a config file.');
+            console.error('   Run "ml-container-creator --help" for available options.\n');
+            process.exit(1);
+        }
         answers = configManager.getFinalConfiguration();
         // Infer modelSource from model name prefix if not set
@@ -305,6 +322,17 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
         ignorePatterns.push('**/sample_model/**');
     }
+    // Exclude do/benchmark when benchmarking is not selected
+    if (!answers.includeBenchmark) {
+        ignorePatterns.push('**/do/benchmark');
+    }
+    // Exclude do/test when hosted-model-endpoint is not selected
+    const testTypes = answers.testTypes || [];
+    if (!testTypes.includes('hosted-model-endpoint')) {
+        ignorePatterns.push('**/do/test');
+    }
     // Always exclude triton and diffusors source directories
     ignorePatterns.push('**/triton/**');
     ignorePatterns.push('**/diffusors/**');
@@ -400,6 +428,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
     _copyFile(path.join(LIB_DIR, 'manifest-cli.js'), path.join(doLibDir, 'manifest-cli.js'));
     _copyFile(path.join(LIB_DIR, 'asset-manager.js'), path.join(doLibDir, 'asset-manager.js'));
     _copyFile(path.join(LIB_DIR, 'bootstrap-config.js'), path.join(doLibDir, 'bootstrap-config.js'));
+    // Generate .gitignore with benchmarks/ when benchmarking is enabled
+    if (answers.includeBenchmark) {
+        const gitignorePath = path.join(destDir, '.gitignore');
+        const gitignoreContent = '# Benchmark results (generated by do/benchmark)\nbenchmarks/\n';
+        if (fs.existsSync(gitignorePath)) {
+            const existing = fs.readFileSync(gitignorePath, 'utf8');
+            if (!existing.includes('benchmarks/')) {
+                fs.appendFileSync(gitignorePath, `\n${gitignoreContent}`);
+            }
+        } else {
+            fs.writeFileSync(gitignorePath, gitignoreContent);
+        }
+    }
 }
 /**
@@ -493,7 +535,9 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
         chatTemplate: null,
         chatTemplateSource: null,
         hfToken: null,
+        hfTokenArn: null,
         ngcApiKey: null,
+        ngcTokenArn: null,
         envVars: {},
         inferenceAmiVersion: null,
         accelerator: null,
@@ -510,7 +554,7 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
         codebuildProjectName: null,
         modelName: null,
         modelFormat: null,
-        includeSampleModel: false,
+        includeSampleModel: true,
         includeTesting: true,
         testTypes: [],
         buildTimestamp: new Date().toISOString(),
@@ -622,6 +666,55 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
             // Silently continue - defaults are already set
         }
     }
+    // Populate baseImage from the catalog when still falsy (covers --skip-prompts and
+    // cases where MCP/CLI/config did not provide a base image).
+    // Precedence: MCP > CLI > config > catalog default (this block).
+    if (!answers.baseImage && registryConfigManager?.frameworkRegistry) {
+        const backendKey = answers.backend || answers.modelServer;
+        if (backendKey) {
+            const frameworkVersions = registryConfigManager.frameworkRegistry[backendKey];
+            if (frameworkVersions) {
+                let resolvedConfig = null;
+                if (answers.frameworkVersion && frameworkVersions[answers.frameworkVersion]) {
+                    resolvedConfig = frameworkVersions[answers.frameworkVersion];
+                } else {
+                    // Fall back to latest version
+                    const versions = Object.keys(frameworkVersions).sort((a, b) =>
+                        b.localeCompare(a, undefined, { numeric: true })
+                    );
+                    if (versions.length > 0) {
+                        resolvedConfig = frameworkVersions[versions[0]];
+                    }
+                }
+                if (resolvedConfig?.baseImage) {
+                    answers.baseImage = resolvedConfig.baseImage;
+                }
+            }
+        }
+    }
+    // Populate icGpuCount from instance catalog when not explicitly set.
+    // The deploy template uses IC_GPU_COUNT unconditionally for NumberOfAcceleratorDevicesRequired,
+    // so it must always have a value for GPU deployments.
+    if (answers.icGpuCount == null && answers.instanceType) {
+        // Use gpuCount from instance-sizer recommendation if available
+        if (answers.gpuCount) {
+            answers.icGpuCount = answers.gpuCount;
+        } else {
+            // Look up from instances catalog
+            try {
+                const catalogPath = path.resolve(__dirname, '..', 'servers', 'lib', 'catalogs', 'instances.json');
+                const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
+                const instanceInfo = catalogData?.catalog?.[answers.instanceType];
+                if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
+                    answers.icGpuCount = instanceInfo.gpus;
+                }
+            } catch {
+                // Silently continue — template fallback handles missing value
+            }
+        }
+    }
 }
 /**
@@ -958,7 +1051,8 @@ function _setExecutablePermissions(destDir) {
         'do/submit',
         'do/register',
         'do/ci',
-        'do/manifest'
+        'do/manifest',
+        'do/benchmark'
     ];
     shellScripts.forEach(script => {

package/src/lib/architecture-sync.js ADDED Viewed

@@ -0,0 +1,171 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Architecture Sync
+ *
+ * Fetches model registry source files from server GitHub repositories
+ * and extracts supported model_type values into the model-servers catalog.
+ */
+import { readFileSync, writeFileSync } from 'node:fs';
+/**
+ * Parse vLLM's model registry Python source to extract model_type keys.
+ *
+ * vLLM's registry maps architecture class names to (module, impl_class) tuples:
+ *   "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
+ *   "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
+ *
+ * The module name (first tuple element) corresponds to the model_type.
+ * Also matches older formats where model_type is used directly as dict key.
+ *
+ * @param {string} source - Python source code content
+ * @returns {string[]} Sorted array of model_type strings
+ */
+export const parseVllmRegistry = (source) => {
+    const modelTypes = new Set();
+    const patterns = [
+        // Tuple value format: ("module_name", "ClassName") — extract module_name
+        /\("([a-z][a-z0-9_]*)"\s*,\s*"[A-Z]/g,
+        // Direct lowercase key format (older registries): "model_type": (
+        /"([a-z][a-z0-9_]*)":\s*\(/g,
+        // Direct lowercase key format: "model_type": ClassName
+        /"([a-z][a-z0-9_]*)":\s*[A-Z]/g,
+        // Direct lowercase key format: "model_type": [
+        /"([a-z][a-z0-9_]*)":\s*\[/g
+    ];
+    for (const pattern of patterns) {
+        let match;
+        while ((match = pattern.exec(source)) !== null) {
+            modelTypes.add(match[1]);
+        }
+    }
+    return [...modelTypes].sort();
+};
+/**
+ * Parse SGLang's model_registry.py to extract model_type keys.
+ *
+ * Matches patterns like:
+ *   "llama": ModelClass,
+ *   "qwen2": (ModulePath, ClassName),
+ *
+ * @param {string} source - Python source code content
+ * @returns {string[]} Sorted array of model_type strings
+ */
+export const parseSglangRegistry = (source) => {
+    const modelTypes = new Set();
+    const patterns = [
+        /"([a-z][a-z0-9_]*)":\s*\(/g,
+        /"([a-z][a-z0-9_]*)":\s*[A-Z]/g,
+        /"([a-z][a-z0-9_]*)":\s*\[/g
+    ];
+    for (const pattern of patterns) {
+        let match;
+        while ((match = pattern.exec(source)) !== null) {
+            modelTypes.add(match[1]);
+        }
+    }
+    return [...modelTypes].sort();
+};
+/**
+ * Parse TensorRT-LLM's models __init__.py to extract model_type keys.
+ *
+ * Matches patterns from MODEL_MAP or similar dict structures:
+ *   "llama": LlamaForCausalLM,
+ *   "gpt2": GPT2LMHeadModel,
+ *
+ * @param {string} source - Python source code content
+ * @returns {string[]} Sorted array of model_type strings
+ */
+export const parseTensorRTRegistry = (source) => {
+    const modelTypes = new Set();
+    const patterns = [
+        /"([a-z][a-z0-9_]*)":\s*[A-Z]/g,
+        /"([a-z][a-z0-9_]*)":\s*\(/g,
+        /'([a-z][a-z0-9_]*)':\s*[A-Z]/g,
+        /'([a-z][a-z0-9_]*)':\s*\(/g
+    ];
+    for (const pattern of patterns) {
+        let match;
+        while ((match = pattern.exec(source)) !== null) {
+            modelTypes.add(match[1]);
+        }
+    }
+    return [...modelTypes].sort();
+};
+/**
+ * Configuration mapping each server to its GitHub repository,
+ * registry file path, tag prefix, and parser function.
+ */
+export const SERVER_REGISTRY_SOURCES = {
+    vllm: {
+        repo: 'vllm-project/vllm',
+        file: 'vllm/model_executor/models/registry.py',
+        tagPrefix: 'v',
+        parser: parseVllmRegistry
+    },
+    sglang: {
+        repo: 'sgl-project/sglang',
+        file: 'python/sglang/srt/models/model_registry.py',
+        tagPrefix: 'v',
+        parser: parseSglangRegistry
+    },
+    'tensorrt-llm': {
+        repo: 'NVIDIA/TensorRT-LLM',
+        file: 'tensorrt_llm/models/__init__.py',
+        tagPrefix: 'v',
+        parser: parseTensorRTRegistry
+    }
+};
+/**
+ * Sync supported model architectures from server GitHub repositories
+ * into the model-servers catalog.
+ *
+ * For each server entry in the catalog that has a matching source config,
+ * fetches the model registry file from GitHub at the version tag and
+ * parses it to extract supported model_type values.
+ *
+ * @param {string} catalogPath - Path to model-servers.json
+ * @returns {object} Summary with counts and failures
+ */
+export const syncArchitectures = async (catalogPath) => {
+    const catalog = JSON.parse(readFileSync(catalogPath, 'utf8'));
+    const summary = { servers: [], failures: [] };
+    for (const [server, entries] of Object.entries(catalog)) {
+        const source = SERVER_REGISTRY_SOURCES[server];
+        if (!source) continue;
+        for (const entry of entries) {
+            const version = entry.labels?.framework_version;
+            if (!version) continue;
+            const tag = `${source.tagPrefix}${version}`;
+            const url = `https://raw.githubusercontent.com/${source.repo}/${tag}/${source.file}`;
+            try {
+                const response = await fetch(url);
+                if (!response.ok) {
+                    summary.failures.push({ server, version, reason: `HTTP ${response.status}` });
+                    console.log(`   ⚠️  ${server} ${version}: fetch failed (HTTP ${response.status})`);
+                    continue;
+                }
+                const content = await response.text();
+                entry.supportedModelTypes = source.parser(content);
+                summary.servers.push({ server, version, count: entry.supportedModelTypes.length });
+                console.log(`   ✓ ${server} ${version}: ${entry.supportedModelTypes.length} architectures`);
+            } catch (err) {
+                summary.failures.push({ server, version, reason: err.message });
+                console.log(`   ⚠️  ${server} ${version}: fetch failed (${err.message})`);
+            }
+        }
+    }
+    writeFileSync(catalogPath, JSON.stringify(catalog, null, 4));
+    return summary;
+};

package/src/lib/arn-detection.js ADDED Viewed

@@ -0,0 +1,22 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * ARN Detection Utility
+ *
+ * Provides a pure function for distinguishing AWS Secrets Manager ARNs
+ * from plaintext values. Used by the prompt flow and CLI to determine
+ * whether user input should be treated as a secret reference or a
+ * literal token value.
+ */
+const SECRETS_MANAGER_ARN_PREFIX = 'arn:aws:secretsmanager:';
+/**
+ * Determines if a value is a Secrets Manager ARN.
+ * @param {*} value - The input value to check
+ * @returns {boolean} True if the value is a Secrets Manager ARN
+ */
+export function isSecretsManagerArn(value) {
+    return typeof value === 'string' && value.startsWith(SECRETS_MANAGER_ARN_PREFIX);
+}

package/src/lib/bootstrap-command-handler.js CHANGED Viewed

@@ -24,6 +24,8 @@ import { fileURLToPath } from 'node:url';
 import BootstrapConfig from './bootstrap-config.js';
 import AwsProfileParser from './aws-profile-parser.js';
 import AssetManager from './asset-manager.js';
+import McpCommandHandler from './mcp-command-handler.js';
+import RegistryCommandHandler from './registry-command-handler.js';
 import { runPrompts } from '../prompt-adapter.js';
 const __filename = fileURLToPath(import.meta.url);
@@ -114,6 +116,11 @@ export default class BootstrapCommandHandler {
         console.log('\n🚀 Bootstrap — Shared AWS Infrastructure Setup\n');
+        // Verify AWS CLI v2 is installed
+        if (!this._verifyCliV2()) {
+            return;
+        }
         // Determine bootstrap profile name
         let profileName;
         if (nonInteractive) {
@@ -192,6 +199,9 @@ export default class BootstrapCommandHandler {
             if (stackOutputs.BatchS3BucketName) {
                 profileData.batchS3Bucket = stackOutputs.BatchS3BucketName;
             }
+            if (stackOutputs.BenchmarkS3BucketName) {
+                profileData.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
+            }
             console.log('  ✅ Bootstrap stack deployed successfully');
         } catch (error) {
@@ -311,6 +321,9 @@ export default class BootstrapCommandHandler {
         // Display summary
         this._displaySummary(profileName, profileData);
+        // Step 6: Post-setup chain (mcp init → sync-architectures → sync-schemas)
+        await this._runPostSetupChain(options);
     }
     /**
@@ -377,6 +390,9 @@ export default class BootstrapCommandHandler {
                 if (outputs.BatchS3BucketName) {
                     console.log(`  ✅ S3 bucket (batch): ${outputs.BatchS3BucketName}`);
                 }
+                if (outputs.BenchmarkS3BucketName) {
+                    console.log(`  ✅ S3 bucket (benchmark): ${outputs.BenchmarkS3BucketName}`);
+                }
                 if (outputs.StackVersion) {
                     console.log(`  📋 Stack version: ${outputs.StackVersion}`);
                 }
@@ -447,6 +463,20 @@ export default class BootstrapCommandHandler {
                     console.log(`  ⚠️  S3 bucket: ${profile.config.batchS3Bucket} — could not validate`);
                 }
             }
+            if (profile.config.benchmarkS3Bucket) {
+                try {
+                    const benchmarkExists = this._resourceExists(
+                        `s3api head-bucket --bucket ${profile.config.benchmarkS3Bucket}`,
+                        profile.config.awsProfile
+                    );
+                    console.log(benchmarkExists
+                        ? `  ✅ S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket}`
+                        : `  ⚠️  S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — missing`);
+                } catch {
+                    console.log(`  ⚠️  S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — could not validate`);
+                }
+            }
         }
         // Display deployed resources from manifest
@@ -1000,6 +1030,7 @@ export default class BootstrapCommandHandler {
             if (stackOutputs.EcrRepositoryName) profileConfig.ecrRepositoryName = stackOutputs.EcrRepositoryName;
             if (stackOutputs.AsyncS3BucketName) profileConfig.asyncS3Bucket = stackOutputs.AsyncS3BucketName;
             if (stackOutputs.BatchS3BucketName) profileConfig.batchS3Bucket = stackOutputs.BatchS3BucketName;
+            if (stackOutputs.BenchmarkS3BucketName) profileConfig.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
             profileConfig.stackName = stackName;
             console.log('  ✅ Bootstrap stack updated');
@@ -1054,6 +1085,74 @@ export default class BootstrapCommandHandler {
         // Save updated profile
         this.config.setProfile(name, profileConfig);
         console.log(`\n✅ Update complete for profile "${name}"`);
+        // Re-run post-setup chain after updating AWS resources
+        await this._runPostSetupChain(options);
+    }
+    /**
+     * Run the post-setup chain: mcp init → registry sync-architectures → sync-schemas.
+     * Each step is independent — failures are collected and reported at the end.
+     *
+     * @param {object} options - Parsed CLI options (checks skipPostSetup)
+     */
+    async _runPostSetupChain(options = {}) {
+        if (options['skip-post-setup']) {
+            console.log('\n⏭️  Skipping post-setup chain (--skip-post-setup)');
+            return;
+        }
+        console.log('\n🔗 Running post-setup configuration...\n');
+        const failures = [];
+        // 1. MCP init — register bundled MCP servers
+        console.log('📡 Registering MCP servers...');
+        try {
+            const generatorAdapter = {
+                destinationPath(...segments) {
+                    return path.resolve(process.cwd(), ...segments);
+                }
+            };
+            const mcpHandler = new McpCommandHandler(generatorAdapter);
+            await mcpHandler.handle(['init'], {});
+        } catch (error) {
+            failures.push({ step: 'mcp init', error: error.message });
+            console.log(`  ⚠️  mcp init failed: ${error.message}`);
+        }
+        // 2. Registry sync-architectures — populate supportedModelTypes
+        console.log('\n📋 Syncing model architecture registry...');
+        try {
+            const registryHandler = new RegistryCommandHandler();
+            await registryHandler.handle(['sync-architectures'], {});
+        } catch (error) {
+            failures.push({ step: 'registry sync-architectures', error: error.message });
+            console.log(`  ⚠️  registry sync-architectures failed: ${error.message}`);
+        }
+        // 3. Schema sync — download AWS service models
+        console.log('\n📐 Syncing service schemas...');
+        try {
+            await this._handleSyncSchemas();
+        } catch (error) {
+            failures.push({ step: 'sync-schemas', error: error.message });
+            console.log(`  ⚠️  sync-schemas failed: ${error.message}`);
+        }
+        // Report results
+        if (failures.length === 0) {
+            console.log('\n✅ Bootstrap complete — all systems operational');
+        } else {
+            console.log(`\n⚠️  Bootstrap complete with ${failures.length} warning${failures.length === 1 ? '' : 's'}:`);
+            for (const { step, error } of failures) {
+                console.log(`  • ${step}: ${error}`);
+            }
+            console.log('\n  These steps can be re-run individually:');
+            console.log('    ml-container-creator mcp init');
+            console.log('    ml-container-creator registry sync-architectures');
+            console.log('    ml-container-creator bootstrap sync-schemas');
+        }
     }
     /**
@@ -1205,11 +1304,28 @@ export default class BootstrapCommandHandler {
                         'sagemaker:DescribeEndpointConfig',
                         'sagemaker:DescribeModel',
                         'sagemaker:DescribeInferenceComponent',
+                        'sagemaker:ListInferenceComponents',
                         'sagemaker:InvokeEndpoint',
                         'sagemaker:InvokeEndpointAsync'
                     ],
                     Resource: '*'
                 },
+                {
+                    Sid: 'SageMakerBenchmarking',
+                    Effect: 'Allow',
+                    Action: [
+                        'sagemaker:CreateAIBenchmarkJob',
+                        'sagemaker:DescribeAIBenchmarkJob',
+                        'sagemaker:ListAIBenchmarkJobs',
+                        'sagemaker:StopAIBenchmarkJob',
+                        'sagemaker:DeleteAIBenchmarkJob',
+                        'sagemaker:CreateAIWorkloadConfig',
+                        'sagemaker:DescribeAIWorkloadConfig',
+                        'sagemaker:ListAIWorkloadConfigs',
+                        'sagemaker:DeleteAIWorkloadConfig'
+                    ],
+                    Resource: '*'
+                },
                 {
                     Sid: 'ECRPull',
                     Effect: 'Allow',
@@ -1242,12 +1358,43 @@ export default class BootstrapCommandHandler {
                     Effect: 'Allow',
                     Action: [
                         's3:GetObject',
+                        's3:PutObject',
+                        's3:AbortMultipartUpload',
                         's3:ListBucket'
                     ],
                     Resource: [
                         'arn:aws:s3:::ml-container-creator-*',
                         'arn:aws:s3:::ml-container-creator-*/*'
                     ]
+                },
+                {
+                    Sid: 'SNSPublish',
+                    Effect: 'Allow',
+                    Action: 'sns:Publish',
+                    Resource: 'arn:aws:sns:*:*:ml-container-creator-*'
+                },
+                {
+                    Sid: 'SecretsManagerBenchmark',
+                    Effect: 'Allow',
+                    Action: [
+                        'secretsmanager:CreateSecret',
+                        'secretsmanager:PutSecretValue',
+                        'secretsmanager:GetSecretValue',
+                        'secretsmanager:DescribeSecret'
+                    ],
+                    Resource: 'arn:aws:secretsmanager:*:*:secret:ml-container-creator/*'
+                },
+                {
+                    Sid: 'QuotaAndAvailability',
+                    Effect: 'Allow',
+                    Action: [
+                        'service-quotas:GetServiceQuota',
+                        'service-quotas:ListServiceQuotas',
+                        'sagemaker:ListTrainingPlans',
+                        'sagemaker:DescribeTrainingPlan',
+                        'sagemaker:ListEndpoints'
+                    ],
+                    Resource: '*'
                 }
             ]
         };
@@ -1397,9 +1544,15 @@ export default class BootstrapCommandHandler {
     /**
      * Optionally create S3 buckets for async/batch deployments.
+     * Always creates the benchmark S3 bucket (unconditional).
      * @returns {Promise<object|null>} Bucket names or null if skipped
      */
     async _setupS3Buckets() {
+        // Always create benchmark bucket (unconditional — avoids re-bootstrap when benchmarking is enabled later)
+        const benchmarkBucketName = `ml-container-creator-benchmark-${this._currentRegion}-${this._currentAccountId}`;
+        const tags = this._buildResourceTags();
+        const benchmarkS3Bucket = await this._createS3Bucket(benchmarkBucketName, tags);
         const { useS3 } = await this._promptFn([{
             type: 'confirm',
             name: 'useS3',
@@ -1408,17 +1561,16 @@ export default class BootstrapCommandHandler {
         }]);
         if (!useS3) {
-            return null;
+            return { benchmarkS3Bucket };
         }
         const asyncBucketName = `ml-container-creator-async-${this._currentRegion}-${this._currentAccountId}`;
         const batchBucketName = `ml-container-creator-batch-${this._currentRegion}-${this._currentAccountId}`;
-        const tags = this._buildResourceTags();
         const asyncS3Bucket = await this._createS3Bucket(asyncBucketName, tags);
         const batchS3Bucket = await this._createS3Bucket(batchBucketName, tags);
-        return { asyncS3Bucket, batchS3Bucket };
+        return { asyncS3Bucket, batchS3Bucket, benchmarkS3Bucket };
     }
     /**
@@ -1475,6 +1627,28 @@ export default class BootstrapCommandHandler {
     // ── AWS CLI helpers ─────────────────────────────────────────────
+    /**
+     * Verify AWS CLI v2 is installed. Returns true if v2 is detected, false otherwise.
+     * Extracted as a method so tests can override it.
+     * @returns {boolean}
+     */
+    _verifyCliV2() {
+        try {
+            const versionOutput = execSync('aws --version', { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
+            if (!versionOutput.includes('aws-cli/2')) {
+                console.log(`  ❌ AWS CLI v2 is required. Detected: ${versionOutput.split(' ')[0]}`);
+                console.log('  Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html');
+                console.log('  Some features (benchmarking, newer SageMaker APIs) require CLI v2.\n');
+                return false;
+            }
+            return true;
+        } catch {
+            console.log('  ❌ AWS CLI not found.');
+            console.log('  Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html\n');
+            return false;
+        }
+    }
     /**
      * Execute an AWS CLI command and return parsed JSON output.
      * @param {string} command - AWS CLI command (without 'aws' prefix)
@@ -1649,6 +1823,7 @@ SETUP OPTIONS:
   --skip-s3                           Skip S3 bucket creation
   --ci                                Provision CI testing infrastructure
   --skip-ci                           Skip CI infrastructure provisioning
+  --skip-post-setup                   Skip post-setup chain (mcp init, sync-architectures, sync-schemas)
 STATUS OPTIONS:
   --verify                            Check each active resource against AWS APIs for drift detection

package/src/lib/cli-handler.js CHANGED Viewed

@@ -375,7 +375,7 @@ For more information, visit: https://github.com/awslabs/ml-container-creator
                 type: 'confirm',
                 name: 'includeSampleModel',
                 message: 'Include sample model:',
-                default: false,
+                default: true,
                 when: answers => answers.framework !== 'transformers'
             },
             {
@@ -515,7 +515,7 @@ ml-container-creator \\
             'framework': 'sklearn',
             'modelServer': 'flask',
             'modelFormat': 'pkl',
-            'includeSampleModel': false,
+            'includeSampleModel': true,
             'includeTesting': true,
             'testTypes': ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
             'buildTarget': 'codebuild',