npm - @aws/ml-container-creator - Versions diffs - 0.2.6 → 0.4.0 - Mend

@aws/ml-container-creator 0.2.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/bin/cli.js +38 -2
package/config/bootstrap-stack.json +94 -1
package/config/defaults.json +1 -1
package/infra/ci-harness/package-lock.json +22 -9
package/package.json +3 -1
package/servers/instance-sizer/index.js +45 -8
package/servers/instance-sizer/lib/instance-ranker.js +140 -11
package/servers/instance-sizer/lib/model-resolver.js +10 -6
package/servers/instance-sizer/lib/quota-resolver.js +368 -0
package/servers/instance-sizer/package.json +2 -0
package/servers/lib/catalogs/instances.json +527 -12
package/servers/lib/catalogs/model-servers.json +298 -20
package/servers/lib/catalogs/model-sizes.json +27 -0
package/servers/lib/catalogs/models.json +101 -0
package/servers/lib/schemas/image-catalog.schema.json +15 -1
package/servers/model-picker/index.js +2 -1
package/src/app.js +96 -2
package/src/lib/architecture-sync.js +171 -0
package/src/lib/arn-detection.js +22 -0
package/src/lib/bootstrap-command-handler.js +178 -3
package/src/lib/cli-handler.js +2 -2
package/src/lib/config-manager.js +121 -1
package/src/lib/cross-cutting-checker.js +119 -0
package/src/lib/deployment-entry-schema.js +1 -2
package/src/lib/prompt-runner.js +514 -20
package/src/lib/prompts.js +67 -5
package/src/lib/registry-command-handler.js +236 -0
package/src/lib/schema-sync.js +31 -0
package/src/lib/secret-classification.js +56 -0
package/src/lib/secrets-command-handler.js +550 -0
package/src/lib/template-manager.js +49 -1
package/src/lib/validate-runner.js +174 -2
package/src/lib/validation-report.js +8 -1
package/src/prompt-adapter.js +3 -2
package/templates/Dockerfile +10 -2
package/templates/code/cuda_compat.sh +22 -0
package/templates/code/serve +3 -0
package/templates/code/start_server.sh +3 -0
package/templates/diffusors/Dockerfile +2 -1
package/templates/diffusors/serve +3 -0
package/templates/do/README.md +33 -0
package/templates/do/benchmark +646 -0
package/templates/do/build +22 -0
package/templates/do/clean +86 -0
package/templates/do/config +41 -6
package/templates/do/deploy +66 -6
package/templates/do/logs +18 -3
package/templates/do/register +8 -1
package/templates/do/run +10 -0
package/templates/triton/Dockerfile +5 -0

package/src/lib/prompts.js CHANGED Viewed

@@ -583,7 +583,7 @@ const modulePrompts = [
         type: 'confirm',
         name: 'includeSampleModel',
         message: 'Include sample Abalone classifier?',
-        default: false,
+        default: true,
         when: (answers) => {
             const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
             const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
@@ -622,7 +622,10 @@ const modulePrompts = [
             // Transformers and Triton LLM backends only support hosted endpoint tests
             if (architecture === 'transformers') {
-                return ['hosted-model-endpoint'];
+                return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
+            }
+            if (architecture === 'diffusors') {
+                return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
             }
             if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
                 return ['hosted-model-endpoint'];
@@ -635,7 +638,10 @@ const modulePrompts = [
             const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
             if (architecture === 'transformers') {
-                return ['hosted-model-endpoint'];
+                return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
+            }
+            if (architecture === 'diffusors') {
+                return ['hosted-model-endpoint', 'sagemaker-ai-automated-benchmarking'];
             }
             if (architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm')) {
                 return ['hosted-model-endpoint'];
@@ -700,7 +706,12 @@ const infraInstancePrompts = [
         when: answers => answers.deploymentTarget === 'realtime-inference' || answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform' || answers.deploymentTarget === 'hyperpod-eks',
         message: (answers) => {
             const framework = answers.framework || answers.deploymentConfig?.split('-')[0];
+            // Skip table when MCP sizer already displayed annotated results
+            if (answers._mcpInstanceChoices && answers._mcpInstanceChoices.length > 0) {
+                return 'Select instance type:';
+            }
             const table = new Table({
                 head: [
                     chalk.cyan('Instance Type'),
@@ -1053,7 +1064,7 @@ function formatImageChoices(entries, isTransformer) {
             ? `${entry.repository.padEnd(30)} ${entry.tag.padEnd(16)} ${entry.architecture.padEnd(7)} ${cuda.padEnd(6)} ${python.padEnd(8)} ${date}`
             : `${entry.repository.padEnd(30)} ${entry.tag.padEnd(16)} ${entry.architecture.padEnd(7)} ${python.padEnd(8)} ${date}`;
-        return { name, value: entry.image };
+        return { name, value: entry.image, _meta: { labels: entry.labels, accelerator: entry.accelerator } };
     });
 }
@@ -1110,6 +1121,56 @@ const baseImagePrompts = [
     }
 ];
+/**
+ * Benchmark prompts for SageMaker AI Benchmarking (NVIDIA AIPerf)
+ * Sub-prompts shown when 'sagemaker-ai-automated-benchmarking' is selected in testTypes.
+ * Requirements: 2.1, 2.2, 2.3, 2.4, 2.5
+ */
+const benchmarkPrompts = [
+    {
+        type: 'number',
+        name: 'benchmarkConcurrency',
+        message: 'Concurrent requests for benchmark:',
+        default: 10,
+        when: (answers) => answers.includeBenchmark === true
+    },
+    {
+        type: 'number',
+        name: 'benchmarkInputTokensMean',
+        message: 'Mean input tokens per request:',
+        default: 550,
+        when: (answers) => answers.includeBenchmark === true
+    },
+    {
+        type: 'number',
+        name: 'benchmarkOutputTokensMean',
+        message: 'Mean output tokens per request:',
+        default: 150,
+        when: (answers) => answers.includeBenchmark === true
+    },
+    {
+        type: 'confirm',
+        name: 'benchmarkStreaming',
+        message: 'Enable streaming for benchmark?',
+        default: true,
+        when: (answers) => answers.includeBenchmark === true
+    },
+    {
+        type: 'input',
+        name: 'benchmarkRequestCount',
+        message: 'Total request count (leave empty for service default):',
+        default: '',
+        when: (answers) => answers.includeBenchmark === true
+    },
+    {
+        type: 'input',
+        name: 'benchmarkS3OutputPath',
+        message: 'Benchmark results S3 path (leave empty for auto-created bucket):',
+        default: '',
+        when: (answers) => answers.includeBenchmark === true
+    }
+];
 export {
     deploymentConfigPrompts,
     frameworkPrompts, // Deprecated: kept for backward compatibility
@@ -1123,6 +1184,7 @@ export {
     hfTokenPrompts,
     ngcApiKeyPrompts,
     modulePrompts,
+    benchmarkPrompts,
     infrastructurePrompts,
     infraRegionAndTargetPrompts,
     infraInstancePrompts,

package/src/lib/registry-command-handler.js CHANGED Viewed

@@ -24,6 +24,8 @@ import { readFileSync } from 'node:fs';
 import { execSync } from 'node:child_process';
 import { fileURLToPath } from 'node:url';
 import DeploymentRegistry, { reconstructReplayFlags } from './deployment-registry.js';
+import { syncArchitectures } from './architecture-sync.js';
+import HuggingFaceClient from './huggingface-client.js';
 const PERSONAL_REGISTRY_PATH = path.join(os.homedir(), '.ml-container-creator', 'registry.json');
 const PROJECT_REGISTRY_PATH = path.join(process.cwd(), '.ml-container-creator', 'registry.json');
@@ -71,6 +73,15 @@ export default class RegistryCommandHandler {
         case 'search':
             this._handleSearch(options);
             break;
+        case 'sync-architectures':
+            await this._handleSyncArchitectures();
+            break;
+        case 'list-architectures':
+            this._handleListArchitectures(args, options);
+            break;
+        case 'check':
+            await this._handleCheck(args);
+            break;
         default:
             console.log(`Unknown registry subcommand: ${subcommand}`);
             this._showRegistryHelp();
@@ -431,6 +442,220 @@ export default class RegistryCommandHandler {
         console.log('');
     }
+    /**
+     * registry sync-architectures
+     *
+     * Fetches model registry source files from server GitHub repositories
+     * and populates supportedModelTypes in the model-servers catalog.
+     */
+    async _handleSyncArchitectures() {
+        const __filename = fileURLToPath(import.meta.url);
+        const __dirname = path.dirname(__filename);
+        const catalogPath = path.resolve(__dirname, '../../servers/lib/catalogs/model-servers.json');
+        console.log('\n📋 Syncing model architecture registry...\n');
+        const summary = await syncArchitectures(catalogPath);
+        console.log('\n── Summary ──────────────────────────────────────');
+        if (summary.servers.length > 0) {
+            console.log('\n   Architectures synced:');
+            for (const { server, version, count } of summary.servers) {
+                console.log(`     ${server} ${version}: ${count} architectures`);
+            }
+        }
+        if (summary.failures.length > 0) {
+            console.log('\n   Failures:');
+            for (const { server, version, reason } of summary.failures) {
+                console.log(`     ${server} ${version}: ${reason}`);
+            }
+        }
+        if (summary.servers.length === 0 && summary.failures.length === 0) {
+            console.log('\n   No server entries found with matching registry sources.');
+        }
+        console.log('');
+    }
+    /**
+     * registry list-architectures [--server <name>] [--verbose]
+     *
+     * Displays a table of server versions and their supported architecture counts.
+     * With --server or --verbose, shows the full list of supported model types.
+     *
+     * @param {object} options - Parsed CLI options
+     */
+    _handleListArchitectures(args, options) {
+        const __filename = fileURLToPath(import.meta.url);
+        const __dirname = path.dirname(__filename);
+        const catalogPath = path.resolve(__dirname, '../../servers/lib/catalogs/model-servers.json');
+        let catalog;
+        try {
+            catalog = JSON.parse(readFileSync(catalogPath, 'utf8'));
+        } catch (err) {
+            console.log(`Error: Could not read model-servers catalog: ${err.message}`);
+            return;
+        }
+        // Parse --server and --verbose from pass-through args (Commander's passThroughOptions
+        // puts options after the subcommand into the args array)
+        let serverFilter = options.server || null;
+        let verbose = options.verbose || false;
+        for (const arg of args) {
+            if (arg.startsWith('--server=')) {
+                serverFilter = arg.split('=')[1];
+            } else if (arg === '--server' && args.indexOf(arg) + 1 < args.length) {
+                serverFilter = args[args.indexOf(arg) + 1];
+            } else if (arg === '--verbose') {
+                verbose = true;
+            }
+        }
+        // Collect rows: { server, version, count, types }
+        const rows = [];
+        for (const [server, entries] of Object.entries(catalog)) {
+            if (serverFilter && server !== serverFilter) continue;
+            for (const entry of entries) {
+                const version = entry.labels?.framework_version || '(unknown)';
+                const types = entry.supportedModelTypes || [];
+                rows.push({ server, version, count: types.length, types });
+            }
+        }
+        if (rows.length === 0) {
+            if (serverFilter) {
+                console.log(`No entries found for server "${serverFilter}".`);
+            } else {
+                console.log('No server entries found in catalog.');
+            }
+            return;
+        }
+        // Display summary table
+        console.log('\nModel Architecture Support:\n');
+        console.log('  Server                Version      Architectures');
+        console.log('  ────────────────────  ───────────  ─────────────');
+        for (const row of rows) {
+            const srv = row.server.padEnd(20);
+            const ver = row.version.padEnd(11);
+            const cnt = row.count === 0 ? '(not synced)' : String(row.count);
+            console.log(`  ${srv}  ${ver}  ${cnt}`);
+        }
+        console.log('');
+        // Show full list when --server or --verbose is set
+        if (serverFilter || verbose) {
+            for (const row of rows) {
+                if (row.types.length === 0) continue;
+                console.log(`  ${row.server} ${row.version} supported model types:`);
+                console.log(`    ${row.types.join(', ')}`);
+                console.log('');
+            }
+        }
+    }
+    /**
+     * registry check <model-id>
+     *
+     * Fetches a model's config.json from HuggingFace, extracts the model_type,
+     * and checks compatibility against all server versions in the catalog.
+     *
+     * @param {string[]} args - Remaining positional args (args[1] = model-id)
+     */
+    async _handleCheck(args) {
+        const modelId = args[1];
+        if (!modelId) {
+            console.log('Usage: ml-container-creator registry check <model-id>');
+            console.log('Example: ml-container-creator registry check meta-llama/Llama-2-7b-chat-hf');
+            return;
+        }
+        const __filename = fileURLToPath(import.meta.url);
+        const __dirname = path.dirname(__filename);
+        const catalogPath = path.resolve(__dirname, '../../servers/lib/catalogs/model-servers.json');
+        // Fetch model's config.json from HuggingFace
+        console.log(`\n🔍 Checking model: ${modelId}\n`);
+        console.log('   Fetching model config from HuggingFace...');
+        const hfClient = new HuggingFaceClient({ timeout: 10000 });
+        const config = await hfClient.fetchModelConfig(modelId);
+        if (!config) {
+            console.log(`\n   ❌ Could not fetch config.json for "${modelId}".`);
+            console.log('      Verify the model ID is correct and accessible on HuggingFace.');
+            return;
+        }
+        const modelType = config.model_type;
+        if (!modelType) {
+            console.log(`\n   ❌ No "model_type" field found in config.json for "${modelId}".`);
+            return;
+        }
+        console.log(`   Model type: ${modelType}`);
+        // Load model-servers catalog
+        let catalog;
+        try {
+            catalog = JSON.parse(readFileSync(catalogPath, 'utf8'));
+        } catch (err) {
+            console.log(`\n   ❌ Could not read model-servers catalog: ${err.message}`);
+            return;
+        }
+        // Check model_type against all server entries
+        const compatible = [];
+        const incompatible = [];
+        let hasAnyData = false;
+        for (const [server, entries] of Object.entries(catalog)) {
+            for (const entry of entries) {
+                const version = entry.labels?.framework_version || '(unknown)';
+                const supported = entry.supportedModelTypes;
+                if (!supported || supported.length === 0) continue;
+                hasAnyData = true;
+                const modelTypeLower = modelType.toLowerCase();
+                if (supported.includes(modelTypeLower) || supported.includes(modelType)) {
+                    compatible.push({ server, version });
+                } else {
+                    incompatible.push({ server, version });
+                }
+            }
+        }
+        // Display results
+        if (!hasAnyData) {
+            console.log('\n   ⚠️  No architecture data available. Run "registry sync-architectures" first.');
+            return;
+        }
+        if (compatible.length > 0) {
+            console.log('\n   ✅ Compatible server versions:');
+            for (const { server, version } of compatible) {
+                console.log(`      • ${server} ${version}`);
+            }
+        }
+        if (incompatible.length > 0) {
+            console.log('\n   ⚠️  Potentially incompatible server versions:');
+            for (const { server, version } of incompatible) {
+                console.log(`      • ${server} ${version}`);
+            }
+        }
+        if (compatible.length === 0) {
+            console.log(`\n   ⚠️  Model architecture "${modelType}" was not found in any server's supported types.`);
+            console.log('      This may indicate the model requires a newer server version,');
+            console.log('      or it may work via trust_remote_code. Check server documentation for details.');
+        }
+        console.log('');
+    }
     /**
      * Show registry usage help.
      */
@@ -449,6 +674,9 @@ SUBCOMMANDS:
   export [id] [--status <status>]     Export entries as JSON
   import <file> [--merge|--replace]   Import entries from JSON
   search [filters]                    Search entries with glob matching
+  sync-architectures                  Sync supported model types from server repos
+  list-architectures                  Show supported architectures per server version
+  check <model-id>                    Check model compatibility with server versions
 FILTER OPTIONS (for list and search):
   --backend <backend>                 Filter by backend (e.g., vllm, flask)
@@ -467,6 +695,10 @@ IMPORT OPTIONS:
   --merge                             Keep both existing and imported on conflict
   --replace                           Overwrite existing with imported on conflict
+LIST-ARCHITECTURES OPTIONS:
+  --server <name>                     Show full model type list for a specific server
+  --verbose                           Show full model type list for all servers
 OTHER OPTIONS:
   --project                           Use project-level registry instead of personal
@@ -481,6 +713,10 @@ EXAMPLES:
   ml-container-creator registry export a1b2c3d4
   ml-container-creator registry import team-deployments.json --merge
   ml-container-creator registry search --model "meta-llama/*" --backend vllm
+  ml-container-creator registry list-architectures
+  ml-container-creator registry list-architectures --server vllm
+  ml-container-creator registry list-architectures --verbose
+  ml-container-creator registry check meta-llama/Llama-2-7b-chat-hf
 `);
     }

package/src/lib/schema-sync.js CHANGED Viewed

@@ -188,6 +188,37 @@ export function loadServiceModel(serviceName, registryPath) {
     return readFileSync(modelPath, 'utf8');
 }
+/**
+ * Check whether the SageMaker service model includes the CreateAIBenchmarkJob operation shape.
+ * Used to determine if benchmark parameter validation can be performed.
+ *
+ * @param {string} [registryPath] - Override registry path
+ * @returns {{ available: boolean, reason?: string }}
+ */
+export function hasBenchmarkShape(registryPath) {
+    const regPath = registryPath || getRegistryPath();
+    const modelContent = loadServiceModel('sagemaker', regPath);
+    if (!modelContent) {
+        return { available: false, reason: 'SageMaker service model not found in registry' };
+    }
+    try {
+        const model = JSON.parse(modelContent);
+        const operations = model.operations || {};
+        const shapes = model.shapes || {};
+        // Check for the CreateAIBenchmarkJob operation or its input shape
+        if (operations.CreateAIBenchmarkJob || shapes.CreateAIBenchmarkJobRequest) {
+            return { available: true };
+        }
+        return { available: false, reason: 'service model does not include AI Benchmark operations' };
+    } catch {
+        return { available: false, reason: 'Failed to parse SageMaker service model' };
+    }
+}
 /**
  * Store a service model in the registry.
  * @param {string} serviceName - Service name (e.g., 'sagemaker')

package/src/lib/secret-classification.js ADDED Viewed

@@ -0,0 +1,56 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Secret Classification Registry
+ *
+ * Single source of truth for all secret type metadata. Each entry defines
+ * the identifier, display name, applicable stages, purpose, CLI flags,
+ * environment variable names, and prompt labels for a secret type.
+ *
+ * Adding a new secret type requires only adding a new entry to this array —
+ * the CLI, prompt flow, and do-script templates derive behavior from this registry.
+ */
+export const SECRET_CLASSIFICATIONS = Object.freeze([
+    {
+        identifier: 'hf-token',
+        displayName: 'HuggingFace Token',
+        stages: ['build-time', 'runtime'],
+        purpose: 'Gated model download from HuggingFace Hub',
+        cliFlag: 'hf-token-arn',
+        cliFlagPlaintext: 'hf-token',
+        envVar: 'HF_TOKEN',
+        envVarArn: 'HF_TOKEN_ARN',
+        promptLabel: 'HuggingFace token'
+    },
+    {
+        identifier: 'ngc-token',
+        displayName: 'NVIDIA NGC Token',
+        stages: ['build-time'],
+        purpose: 'Pulling base images from NVIDIA NGC registry',
+        cliFlag: 'ngc-token-arn',
+        cliFlagPlaintext: 'ngc-token',
+        envVar: 'NGC_API_KEY',
+        envVarArn: 'NGC_API_KEY_ARN',
+        promptLabel: 'NVIDIA NGC API key'
+    }
+]);
+/**
+ * Look up a classification entry by identifier.
+ * @param {string} identifier - e.g. 'hf-token'
+ * @returns {Object|undefined}
+ */
+export function getClassification(identifier) {
+    return SECRET_CLASSIFICATIONS.find(c => c.identifier === identifier);
+}
+/**
+ * Get all classifications applicable to a given stage.
+ * @param {string} stage - 'build-time' or 'runtime'
+ * @returns {Object[]}
+ */
+export function getClassificationsForStage(stage) {
+    return SECRET_CLASSIFICATIONS.filter(c => c.stages.includes(stage));
+}