npm - @aws/ml-container-creator - Versions diffs - 0.9.0 → 0.10.0 - Mend

@aws/ml-container-creator 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/bin/cli.js +31 -137
package/config/parameter-schema-v2.json +2065 -0
package/package.json +6 -3
package/servers/lib/catalogs/jumpstart-public.json +101 -16
package/servers/lib/catalogs/models.json +182 -26
package/src/app.js +6 -389
package/src/lib/bootstrap-command-handler.js +75 -1078
package/src/lib/bootstrap-profile-manager.js +634 -0
package/src/lib/bootstrap-provisioners.js +421 -0
package/src/lib/config-loader.js +405 -0
package/src/lib/config-manager.js +59 -1668
package/src/lib/config-mcp-client.js +118 -0
package/src/lib/config-validator.js +634 -0
package/src/lib/cuda-resolver.js +140 -0
package/src/lib/e2e-catalog-validator.js +251 -3
package/src/lib/e2e-ci-recorder.js +103 -0
package/src/lib/generated/cli-options.js +471 -0
package/src/lib/generated/parameter-matrix.js +671 -0
package/src/lib/generated/validation-rules.js +202 -0
package/src/lib/marketplace-flow.js +276 -0
package/src/lib/mcp-query-runner.js +768 -0
package/src/lib/parameter-schema-validator.js +62 -18
package/src/lib/prompt-runner.js +41 -1504
package/src/lib/prompts/feature-prompts.js +172 -0
package/src/lib/prompts/index.js +48 -0
package/src/lib/prompts/infrastructure-prompts.js +690 -0
package/src/lib/prompts/model-prompts.js +552 -0
package/src/lib/prompts/project-prompts.js +70 -0
package/src/lib/prompts.js +2 -1446
package/src/lib/registry-command-handler.js +135 -3
package/src/lib/secrets-prompt-runner.js +251 -0
package/src/lib/template-variable-resolver.js +398 -0
package/templates/code/serve +5 -134
package/templates/code/serve.d/lmi.ejs +19 -0
package/templates/code/serve.d/sglang.ejs +47 -0
package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
package/templates/code/serve.d/vllm.ejs +48 -0
package/templates/do/clean +1 -1387
package/templates/do/clean.d/async-inference.ejs +508 -0
package/templates/do/clean.d/batch-transform.ejs +512 -0
package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
package/templates/do/clean.d/managed-inference.ejs +1043 -0
package/templates/do/deploy +1 -1766
package/templates/do/deploy.d/async-inference.ejs +501 -0
package/templates/do/deploy.d/batch-transform.ejs +529 -0
package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
package/templates/do/deploy.d/managed-inference.ejs +726 -0
package/config/parameter-schema.json +0 -88

package/src/lib/registry-command-handler.js CHANGED Viewed

@@ -20,12 +20,14 @@
 import os from 'node:os';
 import path from 'node:path';
+import fs from 'node:fs';
 import { readFileSync } from 'node:fs';
 import { execSync } from 'node:child_process';
 import { fileURLToPath } from 'node:url';
 import DeploymentRegistry, { reconstructReplayFlags } from './deployment-registry.js';
 import { syncArchitectures } from './architecture-sync.js';
 import HuggingFaceClient from './huggingface-client.js';
+import { computeConfigId } from './ci-register-helpers.js';
 const PERSONAL_REGISTRY_PATH = path.join(os.homedir(), '.ml-container-creator', 'registry.json');
 const PROJECT_REGISTRY_PATH = path.join(process.cwd(), '.ml-container-creator', 'registry.json');
@@ -53,7 +55,7 @@ export default class RegistryCommandHandler {
             await this._handleLog(options);
             break;
         case 'list':
-            this._handleList(options);
+            await this._handleList(options);
             break;
         case 'get':
             this._handleGet(args[1]);
@@ -177,10 +179,11 @@ export default class RegistryCommandHandler {
      *
      * Displays entries from both personal and project-level registries.
      * Supports filtering by backend, architecture, model, instance-type, and status.
+     * When the e2e-status MCP server is reachable, enriches output with E2E status.
      *
      * @param {object} options - Parsed CLI options
      */
-    _handleList(options) {
+    async _handleList(options) {
         const filters = this._extractFilters(options);
         const personalRegistry = new DeploymentRegistry(PERSONAL_REGISTRY_PATH);
@@ -197,6 +200,9 @@ export default class RegistryCommandHandler {
             return;
         }
+        // Attempt to fetch E2E status from the MCP server (silently degrades if unavailable)
+        const e2eStatusMap = await this._fetchE2eStatus(allEntries);
         console.log('\nDeployment Registry Entries:\n');
         for (const entry of allEntries) {
             const id = entry.id || '(no id)';
@@ -206,7 +212,16 @@ export default class RegistryCommandHandler {
             const it = entry.infrastructure?.instanceType || '(none)';
             const st = entry.status || '(none)';
             const src = entry._source === 'project' ? ' [project]' : '';
-            console.log(`  ${id}  ${ts}  ${dc}  ${mn}  ${it}  ${st}${src}`);
+            // Append E2E status column only when MCP server provided data
+            let e2eCol = '';
+            if (e2eStatusMap) {
+                const configId = this._deriveConfigIdFromEntry(entry);
+                const e2e = configId ? e2eStatusMap.get(configId) : null;
+                e2eCol = e2e ? `  [E2E: ${e2e.testStatus}]` : '  [E2E: untested]';
+            }
+            console.log(`  ${id}  ${ts}  ${dc}  ${mn}  ${it}  ${st}${src}${e2eCol}`);
         }
         console.log('');
     }
@@ -764,4 +779,121 @@ EXAMPLES:
         const projectRegistry = new DeploymentRegistry(PROJECT_REGISTRY_PATH);
         return projectRegistry.get(id);
     }
+    /**
+     * Attempt to fetch E2E status from the e2e-status MCP server.
+     * Silently returns null if the server is unreachable, disabled, or returns an error.
+     * No error is shown to the user in any failure case.
+     *
+     * @param {Array} entries - Registry entries to fetch status for
+     * @returns {Promise<Map<string, object>|null>} Map of configId → status, or null if unavailable
+     */
+    async _fetchE2eStatus(entries) {
+        try {
+            // Load MCP config to check if e2e-status server is configured
+            const __fn = fileURLToPath(import.meta.url);
+            const generatorRoot = path.resolve(path.dirname(__fn), '..', '..');
+            const mcpConfigPath = path.join(generatorRoot, 'config', 'mcp.json');
+            if (!fs.existsSync(mcpConfigPath)) return null;
+            const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
+            const serverConfig = mcpConfig.mcpServers?.['e2e-status'];
+            if (!serverConfig) return null;
+            // Respect disabled flag if present
+            if (serverConfig.disabled === true) return null;
+            // Derive configIds from entries
+            const configIds = [];
+            for (const entry of entries) {
+                const configId = this._deriveConfigIdFromEntry(entry);
+                if (configId) configIds.push(configId);
+            }
+            if (configIds.length === 0) return null;
+            // Spawn the MCP server and call get_e2e_status
+            const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
+            const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
+            const resolvedArgs = (serverConfig.args || []).map(arg => {
+                if (arg && !path.isAbsolute(arg) && !arg.startsWith('-')) {
+                    return path.resolve(generatorRoot, arg);
+                }
+                return arg;
+            });
+            const transport = new StdioClientTransport({
+                command: serverConfig.command,
+                args: resolvedArgs,
+                env: { ...process.env, ...(serverConfig.env || {}) },
+                stderr: 'pipe'
+            });
+            const client = new Client(
+                { name: 'ml-container-creator', version: '1.0.0' },
+                { capabilities: {} }
+            );
+            // Use a short timeout to avoid blocking the CLI
+            const timeoutMs = 5000;
+            const result = await Promise.race([
+                (async () => {
+                    await client.connect(transport);
+                    const response = await client.callTool({
+                        name: 'get_e2e_status',
+                        arguments: { configIds }
+                    });
+                    await client.close();
+                    return response;
+                })(),
+                new Promise(resolve => setTimeout(() => resolve(null), timeoutMs))
+            ]);
+            if (!result) return null;
+            // Parse the response
+            const textBlock = result.content?.find(b => b.type === 'text');
+            if (!textBlock) return null;
+            const parsed = JSON.parse(textBlock.text);
+            if (!parsed.results || !Array.isArray(parsed.results)) return null;
+            // Build a map of configId → status object
+            const statusMap = new Map();
+            for (const item of parsed.results) {
+                statusMap.set(item.configId, item);
+            }
+            return statusMap;
+        } catch {
+            // Silently degrade — no error shown to user
+            return null;
+        }
+    }
+    /**
+     * Derive a configId from a registry entry using the same hashing algorithm
+     * as do/register --ci. Returns null if the entry lacks sufficient data.
+     *
+     * @param {object} entry - A deployment registry entry
+     * @returns {string|null} 16-char hex configId, or null
+     */
+    _deriveConfigIdFromEntry(entry) {
+        try {
+            const deploymentConfig = entry.deployment?.deploymentConfig || '';
+            const modelName = entry.model?.modelName || 'none';
+            const instanceType = entry.infrastructure?.instanceType || '';
+            const region = entry.infrastructure?.region || 'us-west-2';
+            const deploymentTarget = entry.deployment?.deploymentTarget || 'realtime-inference';
+            // Need at least deploymentConfig and instanceType to produce a meaningful hash
+            if (!deploymentConfig && !instanceType) return null;
+            return computeConfigId(deploymentConfig, modelName, instanceType, region, deploymentTarget);
+        } catch {
+            return null;
+        }
+    }
 }

package/src/lib/secrets-prompt-runner.js ADDED Viewed

@@ -0,0 +1,251 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Secrets Prompt Runner - Handles secret selection and plaintext entry prompts.
+ * Uses delegation pattern: receives parent PromptRunner reference to access shared state.
+ */
+import { execSync } from 'node:child_process';
+import { SECRET_CLASSIFICATIONS } from './secret-classification.js';
+import { isSecretsManagerArn } from './arn-detection.js';
+import BootstrapConfig from './bootstrap-config.js';
+export default class SecretsPromptRunner {
+    constructor(runner) {
+        this.runner = runner;
+    }
+    /**
+     * Run secret prompts using the Secret_Classification registry.
+     * @param {object} previousAnswers - Answers from previous prompt phases
+     * @param {object} explicitConfig - Explicit CLI/config values
+     * @param {object} existingConfig - Existing project configuration
+     * @returns {Promise<object>} Object with token/ARN values keyed by config field names
+     */
+    async _runSecretPrompts(previousAnswers, explicitConfig, existingConfig) {
+        const results = {};
+        for (const classification of SECRET_CLASSIFICATIONS) {
+            if (!this._secretStagesApply(classification, previousAnswers)) continue;
+            const arnConfigKey = this._getArnConfigKey(classification);
+            const plaintextConfigKey = this._getPlaintextConfigKey(classification);
+            if (explicitConfig[arnConfigKey]) {
+                results[arnConfigKey] = explicitConfig[arnConfigKey];
+                continue;
+            }
+            if (explicitConfig[plaintextConfigKey]) {
+                results[plaintextConfigKey] = explicitConfig[plaintextConfigKey];
+                continue;
+            }
+            const managedSecrets = await this._listManagedSecrets(classification.identifier);
+            if (managedSecrets.length > 0) {
+                const answer = await this._promptSecretSelection(classification, managedSecrets, previousAnswers);
+                Object.assign(results, answer);
+            } else {
+                const answer = await this._promptPlaintextFallback(classification, previousAnswers, explicitConfig, existingConfig);
+                Object.assign(results, answer);
+            }
+        }
+        return results;
+    }
+    _secretStagesApply(classification, answers) {
+        const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
+        const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
+        if (classification.identifier === 'hf-token') {
+            const isTransformers = architecture === 'transformers';
+            const isDiffusors = architecture === 'diffusors';
+            const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
+            if (!isTransformers && !isDiffusors && !isTritonLlm) return false;
+            const modelSource = answers.modelSource;
+            if (modelSource && modelSource !== 'huggingface') return false;
+            return true;
+        }
+        if (classification.identifier === 'ngc-token') {
+            if (architecture === 'triton') return false;
+            if (architecture === 'diffusors') return false;
+            return architecture === 'transformers' && backend === 'tensorrt-llm';
+        }
+        return classification.stages.length > 0;
+    }
+    _getArnConfigKey(classification) {
+        const keyMap = {
+            'hf-token': 'hfTokenArn',
+            'ngc-token': 'ngcTokenArn'
+        };
+        return keyMap[classification.identifier] || `${classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase())}Arn`;
+    }
+    _getPlaintextConfigKey(classification) {
+        const keyMap = {
+            'hf-token': 'hfToken',
+            'ngc-token': 'ngcApiKey'
+        };
+        return keyMap[classification.identifier] || classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
+    }
+    async _listManagedSecrets(secretType) {
+        // Allow test overrides on the parent runner
+        if (this.runner._listManagedSecrets && this.runner._listManagedSecrets !== this._listManagedSecrets) {
+            return this.runner._listManagedSecrets(secretType);
+        }
+        try {
+            const bootstrapConfig = new BootstrapConfig();
+            const activeProfile = bootstrapConfig.getActiveProfile();
+            if (!activeProfile) return [];
+            const profile = activeProfile.config.awsProfile;
+            const region = activeProfile.config.awsRegion;
+            if (!profile || !region) return [];
+            const command = `aws secretsmanager list-secrets --filters Key=tag-key,Values=mlcc:managed-by Key=tag-value,Values=ml-container-creator --region ${region} --profile ${profile} --output json`;
+            const output = execSync(command, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 });
+            const trimmed = output.trim();
+            if (!trimmed) return [];
+            const result = JSON.parse(trimmed);
+            const secrets = result.SecretList || [];
+            return secrets
+                .filter(secret => {
+                    const typeTag = (secret.Tags || []).find(t => t.Key === 'mlcc:secret-type');
+                    return typeTag && typeTag.Value === secretType;
+                })
+                .map(secret => ({
+                    name: secret.Name,
+                    arn: secret.ARN
+                }));
+        } catch {
+            return [];
+        }
+    }
+    async _promptSecretSelection(classification, managedSecrets, previousAnswers) {
+        const arnConfigKey = this._getArnConfigKey(classification);
+        console.log(`\n🔐 ${classification.displayName}`);
+        console.log(`   ${classification.purpose}`);
+        const choices = [
+            ...managedSecrets.map(secret => ({
+                name: `🔒 ${secret.name} (${secret.arn})`,
+                value: secret.arn,
+                short: secret.name
+            })),
+            { name: '✏️  Enter plaintext token', value: '__plaintext__', short: 'Plaintext' },
+            { name: '⏭️  Skip (use environment variable)', value: '__skip__', short: 'Skip' }
+        ];
+        const { secretSelection } = await this.runner._runPrompts([{
+            type: 'list',
+            name: 'secretSelection',
+            message: `Select ${classification.promptLabel}:`,
+            choices
+        }]);
+        if (secretSelection === '__skip__') {
+            return {};
+        }
+        if (secretSelection === '__plaintext__') {
+            return this._promptPlaintextEntry(classification, previousAnswers);
+        }
+        return { [arnConfigKey]: secretSelection };
+    }
+    async _promptPlaintextEntry(classification, _previousAnswers) {
+        const arnConfigKey = this._getArnConfigKey(classification);
+        const plaintextConfigKey = this._getPlaintextConfigKey(classification);
+        const { tokenValue } = await this.runner._runPrompts([{
+            type: 'input',
+            name: 'tokenValue',
+            message: `${classification.promptLabel} (enter token, ARN, or leave empty):`,
+            validate: (input) => {
+                if (!input || input.trim() === '') return true;
+                if (input.trim().startsWith('$')) return true;
+                return true;
+            }
+        }]);
+        if (!tokenValue || tokenValue.trim() === '') {
+            return {};
+        }
+        const value = tokenValue.trim();
+        if (isSecretsManagerArn(value)) {
+            return { [arnConfigKey]: value };
+        }
+        return { [plaintextConfigKey]: value };
+    }
+    async _promptPlaintextFallback(classification, _previousAnswers, _explicitConfig, _existingConfig) {
+        const arnConfigKey = this._getArnConfigKey(classification);
+        const plaintextConfigKey = this._getPlaintextConfigKey(classification);
+        if (this.runner.configManager?.isAutoPrompt()) {
+            return {};
+        }
+        if (classification.identifier === 'hf-token') {
+            console.log('\n🔐 HuggingFace Authentication');
+            console.log('   Many models (e.g. Llama, Mistral) are gated and require a token.');
+            console.log('   💡 Tip: Use `ml-container-creator secrets create --type hf-token` to store');
+            console.log('   your token in AWS Secrets Manager for zero-knowledge operation.');
+            console.log('   For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.\n');
+        } else if (classification.identifier === 'ngc-token') {
+            console.log('\n🔐 NVIDIA NGC Authentication');
+            console.log('   TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
+            console.log('   💡 Tip: Use `ml-container-creator secrets create --type ngc-token` to store');
+            console.log('   your key in AWS Secrets Manager for zero-knowledge operation.');
+            console.log('   For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
+        } else {
+            console.log(`\n🔐 ${classification.displayName}`);
+            console.log(`   ${classification.purpose}\n`);
+        }
+        const { tokenValue } = await this.runner._runPrompts([{
+            type: 'input',
+            name: 'tokenValue',
+            message: `${classification.promptLabel} (enter token, ARN, "$${classification.envVar}" for env var, or leave empty):`,
+            validate: (input) => {
+                if (!input || input.trim() === '') return true;
+                if (input.trim().startsWith('$')) return true;
+                if (classification.identifier === 'hf-token' && !input.startsWith('hf_') && !isSecretsManagerArn(input)) {
+                    console.warn('\n⚠️  Warning: HuggingFace tokens typically start with "hf_"');
+                    console.warn('   If this is intentional, you can ignore this warning.');
+                }
+                return true;
+            }
+        }]);
+        if (!tokenValue || tokenValue.trim() === '') {
+            return {};
+        }
+        const value = tokenValue.trim();
+        if (isSecretsManagerArn(value)) {
+            return { [arnConfigKey]: value };
+        }
+        return { [plaintextConfigKey]: value };
+    }
+}