npm - @aws/ml-container-creator - Versions diffs - 0.2.3 → 0.2.5 - Mend

@aws/ml-container-creator 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +1 -1
package/bin/cli.js +4 -3
package/config/parameter-schema.json +1 -1
package/package.json +1 -1
package/src/app.js +17 -1
package/src/lib/auto-prompt-builder.js +172 -0
package/src/lib/ci-register-helpers.js +1 -1
package/src/lib/cli-handler.js +1 -1
package/src/lib/config-manager.js +177 -3
package/src/lib/parameter-schema-validator.js +10 -10
package/src/lib/prompt-runner.js +51 -7
package/src/lib/prompts.js +7 -7
package/src/lib/template-manager.js +2 -2
package/templates/do/clean +6 -6
package/templates/do/config +6 -6
package/templates/do/deploy +5 -5
package/templates/do/export +5 -5
package/templates/do/logs +4 -4
package/templates/do/register +3 -3
package/templates/do/test +4 -4

package/README.md CHANGED Viewed

@@ -15,7 +15,7 @@ A CLI tool that creates SageMaker-compatible Docker containers for deploying ML
 | Deployment Target | Description |
 |---|---|
-| Managed Inference | SageMaker real-time endpoints |
+| Real-Time Inference | SageMaker real-time endpoints |
 | Async Inference | SageMaker async endpoints with S3 output |
 | Batch Transform | SageMaker batch processing |
 | HyperPod EKS | Kubernetes-based deployment |

package/bin/cli.js CHANGED Viewed

@@ -27,7 +27,8 @@ program
     // --- General ---
     .addOption(new Option('--skip-prompts', 'Skip interactive prompts and use configuration from other sources'))
-    .addOption(new Option('--config <path>', 'Path to configuration file'))
+    .addOption(new Option('--auto-prompt', 'Fill defaults, prompt only for missing required values'))
+    .addOption(new Option('--config <path>', 'Path to JSON configuration file'))
     .addOption(new Option('--project-name <name>', 'Project name'))
     .addOption(new Option('--project-dir <dir>', 'Output directory path'))
     .addOption(new Option('--force', 'Overwrite existing output directory without prompting'))
@@ -41,7 +42,7 @@ program
     .addOption(new Option('--base-image <image>', 'Base container image for Dockerfile'))
     // --- Build & Infrastructure ---
-    .addOption(new Option('--deployment-target <target>', 'Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)'))
+    .addOption(new Option('--deployment-target <target>', 'Deployment target (realtime-inference, async-inference, batch-transform, hyperpod-eks)'))
     .addOption(new Option('--instance-type <type>', 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'))
     .addOption(new Option('--region <region>', 'AWS region'))
     .addOption(new Option('--role-arn <arn>', 'IAM role ARN for SageMaker execution'))
@@ -154,7 +155,7 @@ program.configureHelp({
         for (const opt of allOptions) {
             const long = opt.long || '';
-            if (['--skip-prompts', '--config', '--project-name', '--project-dir', '--force', '--version', '--help'].includes(long)) {
+            if (['--skip-prompts', '--auto-prompt', '--config', '--project-name', '--project-dir', '--force', '--version', '--help'].includes(long)) {
                 groups.general.push(opt);
             } else if (['--deployment-config', '--framework', '--model-format', '--model-name', '--model-server', '--base-image'].includes(long)) {
                 groups.model.push(opt);

package/config/parameter-schema.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "schemaVersion": "1.0.0",
     "deploymentTargets": {
-        "managed-inference": {
+        "realtime-inference": {
             "endpoint": {
                 "initialInstanceCount": {
                     "type": "integer",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aws/ml-container-creator",
-  "version": "0.2.3",
+  "version": "0.2.5",
   "description": "Generator for SageMaker AI BYOC paradigm for predictive inference use-cases.",
   "type": "module",
   "main": "src/app.js",

package/src/app.js CHANGED Viewed

@@ -156,6 +156,22 @@ export async function run(projectName, options) {
             console.log('   If your model package lacks an InferenceSpecification, use the S3 path');
             console.log('   directly instead: --model-name="s3://bucket/path/model.tar.gz"\n');
         }
+    } else if (configManager.isAutoPrompt()) {
+        // Auto-prompt mode: run the wizard with all resolved values pre-filled.
+        // The wizard skips prompts for values already in explicitConfig and
+        // uses phase-level gates to skip irrelevant sections entirely.
+        // This gives context-aware prompting (correct MCP queries, filtered choices)
+        // while only asking for what's truly missing.
+        console.log('\n🔄 Auto-prompt mode — prompting only for missing values with full context');
+        const promptRunner = new PromptRunner({
+            configManager,
+            options: kebabOptions,
+            registryConfigManager,
+            baseConfig
+        });
+        const promptAnswers = await promptRunner.run();
+        answers = configManager.getFinalConfiguration(promptAnswers);
     } else {
         const promptRunner = new PromptRunner({
             configManager,
@@ -482,7 +498,7 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
         testTypes: [],
         buildTimestamp: new Date().toISOString(),
         buildTarget: 'codebuild',
-        deploymentTarget: 'managed-inference',
+        deploymentTarget: 'realtime-inference',
         hyperPodCluster: null,
         hyperPodNamespace: 'default',
         hyperPodReplicas: 1,

package/src/lib/auto-prompt-builder.js ADDED Viewed

@@ -0,0 +1,172 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Auto-Prompt Builder — generates targeted prompts for missing required parameters.
+ *
+ * Used by --auto-prompt mode to ask only for values that cannot be inferred
+ * or defaulted from the provided CLI flags.
+ */
+/**
+ * Builds a minimal set of prompts for the given missing parameters.
+ * Each prompt is self-contained and doesn't depend on multi-phase wizard state.
+ *
+ * @param {string[]} missingParams - Parameter names that need values
+ * @param {object} currentConfig - Current configuration (with defaults filled)
+ * @returns {Array} Array of prompt objects compatible with runPrompts()
+ */
+export function buildAutoPrompts(missingParams, currentConfig) {
+    const prompts = [];
+    for (const param of missingParams) {
+        const builder = PROMPT_BUILDERS[param];
+        if (builder) {
+            const prompt = builder(currentConfig);
+            if (prompt) {
+                prompts.push(prompt);
+            }
+        } else {
+            // Fallback: generic text input for unknown parameters
+            prompts.push({
+                type: 'input',
+                name: param,
+                message: `Enter value for ${param}:`
+            });
+        }
+    }
+    return prompts;
+}
+/**
+ * Map of parameter names to prompt builder functions.
+ * Each builder receives the current config and returns a prompt object.
+ */
+const PROMPT_BUILDERS = {
+    deploymentConfig: (_config) => ({
+        type: 'list',
+        name: 'deploymentConfig',
+        message: 'Select deployment configuration:',
+        choices: [
+            { type: 'separator', separator: '── Large Language Models ──' },
+            { name: 'Transformers with vLLM', value: 'transformers-vllm' },
+            { name: 'Transformers with SGLang', value: 'transformers-sglang' },
+            { name: 'Transformers with TensorRT-LLM', value: 'transformers-tensorrt-llm' },
+            { name: 'Transformers with LMI', value: 'transformers-lmi' },
+            { name: 'Transformers with DJL', value: 'transformers-djl' },
+            { type: 'separator', separator: '── HTTP Serving ──' },
+            { name: 'HTTP with Flask', value: 'http-flask' },
+            { name: 'HTTP with FastAPI', value: 'http-fastapi' },
+            { type: 'separator', separator: '── NVIDIA Triton ──' },
+            { name: 'Triton FIL (XGBoost, LightGBM)', value: 'triton-fil' },
+            { name: 'Triton ONNX Runtime', value: 'triton-onnxruntime' },
+            { name: 'Triton TensorFlow', value: 'triton-tensorflow' },
+            { name: 'Triton PyTorch', value: 'triton-pytorch' },
+            { name: 'Triton vLLM', value: 'triton-vllm' },
+            { name: 'Triton TensorRT-LLM', value: 'triton-tensorrtllm' },
+            { name: 'Triton Python Backend', value: 'triton-python' },
+            { type: 'separator', separator: '── Diffusion Models ──' },
+            { name: 'Diffusors with vLLM Omni', value: 'diffusors-vllm-omni' }
+        ]
+    }),
+    instanceType: (config) => {
+        const architecture = config.architecture || 'http';
+        const isGpu = architecture === 'transformers' || architecture === 'triton' || architecture === 'diffusors';
+        const gpuChoices = [
+            { name: 'ml.g5.xlarge  (1× A10G 24GB — small LLMs)', value: 'ml.g5.xlarge' },
+            { name: 'ml.g5.2xlarge (1× A10G 24GB — medium LLMs)', value: 'ml.g5.2xlarge' },
+            { name: 'ml.g5.4xlarge (1× A10G 24GB — larger models)', value: 'ml.g5.4xlarge' },
+            { name: 'ml.g5.12xlarge (4× A10G 96GB — large LLMs)', value: 'ml.g5.12xlarge' },
+            { name: 'ml.g5.48xlarge (8× A10G 192GB — very large)', value: 'ml.g5.48xlarge' },
+            { name: 'ml.g6.xlarge  (1× L4 24GB)', value: 'ml.g6.xlarge' },
+            { name: 'ml.g6.2xlarge (1× L4 24GB)', value: 'ml.g6.2xlarge' },
+            { name: 'ml.p4d.24xlarge (8× A100 320GB)', value: 'ml.p4d.24xlarge' },
+            { name: 'ml.p5.48xlarge (8× H100 640GB)', value: 'ml.p5.48xlarge' },
+            { name: 'Custom (enter manually)', value: '_custom' }
+        ];
+        const cpuChoices = [
+            { name: 'ml.m5.large   (2 vCPU, 8GB — lightweight)', value: 'ml.m5.large' },
+            { name: 'ml.m5.xlarge  (4 vCPU, 16GB — small models)', value: 'ml.m5.xlarge' },
+            { name: 'ml.m5.2xlarge (8 vCPU, 32GB — medium models)', value: 'ml.m5.2xlarge' },
+            { name: 'ml.m5.4xlarge (16 vCPU, 64GB — large models)', value: 'ml.m5.4xlarge' },
+            { name: 'ml.c5.xlarge  (4 vCPU, 8GB — compute-heavy)', value: 'ml.c5.xlarge' },
+            { name: 'ml.c5.2xlarge (8 vCPU, 16GB — compute-heavy)', value: 'ml.c5.2xlarge' },
+            { name: 'Custom (enter manually)', value: '_custom' }
+        ];
+        return {
+            type: 'list',
+            name: 'instanceType',
+            message: `Select instance type${isGpu ? ' (GPU recommended for this architecture)' : ''}:`,
+            choices: isGpu ? gpuChoices : cpuChoices
+        };
+    },
+    deploymentTarget: (_config) => ({
+        type: 'list',
+        name: 'deploymentTarget',
+        message: 'Select deployment target:',
+        choices: [
+            { name: 'Real-Time Inference', value: 'realtime-inference' },
+            { name: 'Async Inference', value: 'async-inference' },
+            { name: 'Batch Transform', value: 'batch-transform' },
+            { name: 'HyperPod EKS', value: 'hyperpod-eks' }
+        ]
+    }),
+    modelFormat: (config) => {
+        const engine = config.engine || 'sklearn';
+        const formatMap = {
+            sklearn: [
+                { name: 'pkl (pickle)', value: 'pkl' },
+                { name: 'joblib', value: 'joblib' }
+            ],
+            xgboost: [
+                { name: 'json', value: 'json' },
+                { name: 'model (binary)', value: 'model' },
+                { name: 'ubj (universal binary JSON)', value: 'ubj' }
+            ],
+            tensorflow: [
+                { name: 'keras', value: 'keras' },
+                { name: 'h5', value: 'h5' },
+                { name: 'SavedModel', value: 'SavedModel' }
+            ]
+        };
+        const choices = formatMap[engine] || formatMap.sklearn;
+        return {
+            type: 'list',
+            name: 'modelFormat',
+            message: `Select model format for ${engine}:`,
+            choices
+        };
+    },
+    awsRegion: (_config) => ({
+        type: 'list',
+        name: 'awsRegion',
+        message: 'Select AWS region:',
+        choices: [
+            { name: 'us-east-1 (N. Virginia)', value: 'us-east-1' },
+            { name: 'us-west-2 (Oregon)', value: 'us-west-2' },
+            { name: 'eu-west-1 (Ireland)', value: 'eu-west-1' },
+            { name: 'ap-northeast-1 (Tokyo)', value: 'ap-northeast-1' },
+            { name: 'ap-southeast-1 (Singapore)', value: 'ap-southeast-1' },
+            { name: 'Custom (enter manually)', value: '_custom' }
+        ]
+    }),
+    buildTarget: (_config) => ({
+        type: 'list',
+        name: 'buildTarget',
+        message: 'Select build target:',
+        choices: [
+            { name: 'CodeBuild (recommended)', value: 'codebuild' }
+        ]
+    })
+};

package/src/lib/ci-register-helpers.js CHANGED Viewed

@@ -25,7 +25,7 @@ import { createHash } from 'node:crypto';
  * @param {string} modelName - e.g. "meta-llama/Llama-2-7b-chat-hf", defaults to "none"
  * @param {string} instanceType - e.g. "ml.g5.xlarge"
  * @param {string} region - e.g. "us-east-1"
- * @param {string} deploymentTarget - e.g. "managed-inference"
+ * @param {string} deploymentTarget - e.g. "realtime-inference"
  * @returns {string} 16-character lowercase hex string
  */
 export function computeConfigId(deploymentConfig, modelName, instanceType, region, deploymentTarget) {

package/src/lib/cli-handler.js CHANGED Viewed

@@ -190,7 +190,7 @@ CLI OPTIONS:
   --instance-type=<type>      SageMaker instance type (e.g., ml.m5.large, ml.g5.xlarge)
   --region=<region>           AWS region
   --role-arn=<arn>            AWS IAM role ARN for SageMaker execution
-  --deployment-target=<target> Deployment target (managed-inference|hyperpod-eks)
+  --deployment-target=<target> Deployment target (realtime-inference|async-inference|batch-transform|hyperpod-eks)
   --hyperpod-cluster=<name> HyperPod EKS cluster name
   --hyperpod-namespace=<ns> Kubernetes namespace for HyperPod (default: default)
   --hyperpod-replicas=<n>   Number of replicas for HyperPod (default: 1)

package/src/lib/config-manager.js CHANGED Viewed

@@ -78,6 +78,7 @@ export default class ConfigManager {
         this.args = args || [];
         this.config = {};
         this.skipPrompts = false;
+        this.autoPrompt = false;
         this.deploymentConfigResolver = new DeploymentConfigResolver();
         this.parameterMatrix = this._getParameterMatrix();
         this.schemaValidator = new ParameterSchemaValidator();
@@ -106,6 +107,9 @@ export default class ConfigManager {
         await this._loadCliArguments();
         await this._loadCliOptions();
+        // Normalize deprecated values to canonical equivalents
+        this._normalizeDeprecatedValues();
         // Query configured MCP servers for unbounded parameter values
         await this._queryMcpServers();
@@ -113,6 +117,18 @@ export default class ConfigManager {
         this.skipPrompts = this.options['skip-prompts'] ||
                           this._hasCompleteConfiguration();
+        // Auto-prompt mode: fill defaults like skip-prompts, but prompt for truly missing values
+        this.autoPrompt = this.options['auto-prompt'] === true;
+        if (this.autoPrompt) {
+            // In auto-prompt mode, we don't skip prompts entirely — we'll selectively prompt
+            this.skipPrompts = false;
+            // Pre-fill defaults for required parameters that can be auto-generated.
+            // Promote these into explicitConfig so the wizard skips them.
+            // This means the wizard only prompts for values that are truly ambiguous.
+            this._fillAutoPromptDefaults();
+        }
         return this.config;
     }
@@ -182,8 +198,8 @@ export default class ConfigManager {
             }
         }
-        // When skipping prompts, provide reasonable defaults for missing required parameters
-        if (this.skipPrompts) {
+        // When skipping prompts or in auto-prompt mode, provide reasonable defaults for missing required parameters
+        if (this.skipPrompts || this.autoPrompt) {
             Object.entries(this.parameterMatrix).forEach(([param, config]) => {
                 if (config.required &&
                     (finalConfig[param] === null || finalConfig[param] === undefined)) {
@@ -635,7 +651,7 @@ export default class ConfigManager {
                 mcp: false,
                 promptable: true,
                 required: true,
-                default: 'managed-inference',
+                default: 'realtime-inference',
                 valueSpace: 'bounded'
             },
             hyperPodCluster: {
@@ -1373,6 +1389,35 @@ export default class ConfigManager {
         this._parseEnvVarOptions('server-env', 'serverEnvVars');
     }
+    /**
+     * Normalizes deprecated parameter values to their canonical equivalents.
+     * Prints a deprecation warning when a deprecated value is encountered.
+     * @private
+     */
+    _normalizeDeprecatedValues() {
+        const DEPRECATED_VALUES = {
+            deploymentTarget: {
+                'managed-inference': {
+                    canonical: 'realtime-inference',
+                    message: '--deployment-target=managed-inference is deprecated, use realtime-inference instead'
+                }
+            }
+        };
+        for (const [param, aliases] of Object.entries(DEPRECATED_VALUES)) {
+            const currentValue = this.config[param];
+            if (currentValue && aliases[currentValue]) {
+                const { canonical, message } = aliases[currentValue];
+                console.log(`\n⚠️  Deprecation: ${message}`);
+                this.config[param] = canonical;
+                // Also update explicit config if it was set there
+                if (this.explicitConfig && this.explicitConfig[param] === currentValue) {
+                    this.explicitConfig[param] = canonical;
+                }
+            }
+        }
+    }
     /**
      * Parse --model-env or --server-env CLI options into env var collections.
      * Supports both array (multiple flags) and single string values.
@@ -1828,6 +1873,135 @@ export default class ConfigManager {
         return autoGeneratable.includes(param);
     }
+    /**
+     * Fills auto-prompt defaults for parameters that have sensible defaults
+     * or can be inferred from the current config. Promotes these into
+     * explicitConfig so the wizard skips them.
+     *
+     * Only fills parameters that:
+     * - Have a non-null default in the parameter matrix, OR
+     * - Can be auto-generated (instanceType, modelFormat, etc.)
+     *
+     * Does NOT fill parameters that are truly ambiguous and need user input
+     * (e.g., deploymentConfig when not provided).
+     * @private
+     */
+    _fillAutoPromptDefaults() {
+        if (!this.explicitConfig) {
+            this.explicitConfig = {};
+        }
+        // Derive architecture from deploymentConfig if available
+        let architecture = this.config.architecture;
+        if (!architecture && this.config.deploymentConfig) {
+            try {
+                const parts = this.deploymentConfigResolver.decompose(this.config.deploymentConfig);
+                architecture = parts.architecture;
+                this.config.architecture = parts.architecture;
+                this.config.backend = parts.backend;
+                this.config.engine = parts.engine;
+            } catch {
+                // Invalid deploymentConfig — will be caught by validation
+            }
+        }
+        Object.entries(this.parameterMatrix).forEach(([param, config]) => {
+            // Skip if already explicitly set
+            if (this.explicitConfig[param] !== undefined && this.explicitConfig[param] !== null) {
+                return;
+            }
+            // For optional parameters: mark them as explicit (with null) so the wizard skips them.
+            // The downstream template logic handles defaults for optional params.
+            if (!config.required) {
+                // Don't override if there's already a value in config
+                if (this.config[param] !== undefined && this.config[param] !== null) {
+                    this.explicitConfig[param] = this.config[param];
+                } else if (config.default !== null && config.default !== undefined) {
+                    this.config[param] = config.default;
+                    this.explicitConfig[param] = config.default;
+                }
+                return;
+            }
+            // For required parameters: fill auto-generatable values
+            if (this.config[param] === undefined || this.config[param] === null) {
+                if (param === 'instanceType') {
+                    const arch = architecture || 'http';
+                    this.config[param] = arch === 'http' ? 'ml.m5.large' : 'ml.g5.xlarge';
+                } else if (param === 'modelFormat') {
+                    if (architecture === 'transformers' || architecture === 'triton' || architecture === 'diffusors') {
+                        return; // Not needed for these architectures
+                    }
+                    const engine = this.config.engine || 'sklearn';
+                    const formatMap = { sklearn: 'pkl', xgboost: 'json', tensorflow: 'keras' };
+                    this.config[param] = formatMap[engine] || 'pkl';
+                } else if (param === 'projectName') {
+                    this.config[param] = this._generateProjectName(architecture);
+                } else {
+                    return; // Can't fill — leave for prompting
+                }
+            }
+            // Promote non-null values to explicitConfig so the wizard skips them
+            if (this.config[param] !== undefined && this.config[param] !== null) {
+                if (config.default !== null || this._canAutoGenerate(param)) {
+                    this.explicitConfig[param] = this.config[param];
+                }
+            }
+        });
+    }
+    /**
+     * Returns whether auto-prompt mode is active
+     * @returns {boolean}
+     */
+    isAutoPrompt() {
+        return this.autoPrompt;
+    }
+    /**
+     * Gets the list of required parameters that are truly missing and cannot be
+     * auto-generated or defaulted. Used by auto-prompt mode to determine which
+     * specific prompts to show.
+     *
+     * @returns {string[]} Array of parameter names that need prompting
+     */
+    getMissingRequiredParameters() {
+        const missing = [];
+        Object.entries(this.parameterMatrix).forEach(([param, config]) => {
+            if (!config.required || !config.promptable) return;
+            const value = this.config[param];
+            const hasValue = value !== undefined && value !== null;
+            if (hasValue) return;
+            // Special case: modelFormat is not required for transformers/triton/diffusors
+            if (param === 'modelFormat') {
+                const architecture = this.config.architecture;
+                if (architecture === 'transformers' || architecture === 'triton' || architecture === 'diffusors') {
+                    return;
+                }
+                // Can be inferred from engine
+                if (this.config.engine || this.config.deploymentConfig) {
+                    return;
+                }
+            }
+            // Skip params that can be auto-generated
+            if (this._canAutoGenerate(param)) return;
+            // Skip params that have a non-null default
+            if (config.default !== null && config.default !== undefined) return;
+            missing.push(param);
+        });
+        return missing;
+    }
     /**
      * Generates a project name based on framework
      * @param {string} framework - The ML framework

package/src/lib/parameter-schema-validator.js CHANGED Viewed

@@ -27,15 +27,15 @@ const SUPPORTED_SCHEMA_VERSION = '1.0.0';
  * Format: 'deploymentTarget.category.schemaKey'
  */
 const PARAMETER_NAME_MAP = {
-    endpointInitialInstanceCount: 'managed-inference.endpoint.initialInstanceCount',
-    endpointDataCapturePercent: 'managed-inference.endpoint.dataCapturePercent',
-    endpointVariantName: 'managed-inference.endpoint.variantName',
-    endpointVolumeSize: 'managed-inference.endpoint.volumeSize',
-    icCpuCount: 'managed-inference.inferenceComponent.cpuCount',
-    icMemorySize: 'managed-inference.inferenceComponent.memorySize',
-    icGpuCount: 'managed-inference.inferenceComponent.gpuCount',
-    icCopyCount: 'managed-inference.inferenceComponent.copyCount',
-    icModelWeight: 'managed-inference.inferenceComponent.modelWeight'
+    endpointInitialInstanceCount: 'realtime-inference.endpoint.initialInstanceCount',
+    endpointDataCapturePercent: 'realtime-inference.endpoint.dataCapturePercent',
+    endpointVariantName: 'realtime-inference.endpoint.variantName',
+    endpointVolumeSize: 'realtime-inference.endpoint.volumeSize',
+    icCpuCount: 'realtime-inference.inferenceComponent.cpuCount',
+    icMemorySize: 'realtime-inference.inferenceComponent.memorySize',
+    icGpuCount: 'realtime-inference.inferenceComponent.gpuCount',
+    icCopyCount: 'realtime-inference.inferenceComponent.copyCount',
+    icModelWeight: 'realtime-inference.inferenceComponent.modelWeight'
 };
 export default class ParameterSchemaValidator {
@@ -96,7 +96,7 @@ export default class ParameterSchemaValidator {
     /**
      * Resolve a parameter name to its schema constraint object.
      * @param {string} parameterName - ConfigManager key (e.g., 'endpointVolumeSize')
-     * @param {string} [deploymentTarget] - Deployment target override (e.g., 'managed-inference')
+     * @param {string} [deploymentTarget] - Deployment target override (e.g., 'realtime-inference')
      * @returns {Object|null} Constraint object or null if not found
      */
     _resolveConstraint(parameterName, deploymentTarget) {

package/src/lib/prompt-runner.js CHANGED Viewed

@@ -81,9 +81,9 @@ export default class PromptRunner {
         const regionPreviousAnswers = bootstrapRegion ? { _bootstrapRegion: bootstrapRegion } : {};
         const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, regionPreviousAnswers, explicitConfig, existingConfig);
-        // 1b. Instance type — query MCP and prompt for managed-inference, async-inference, batch-transform, and hyperpod-eks
+        // 1b. Instance type — query MCP and prompt for realtime-inference, async-inference, batch-transform, and hyperpod-eks
         let instanceAnswers = {};
-        if (regionAndTargetAnswers.deploymentTarget === 'managed-inference' ||
+        if (regionAndTargetAnswers.deploymentTarget === 'realtime-inference' ||
             regionAndTargetAnswers.deploymentTarget === 'async-inference' ||
             regionAndTargetAnswers.deploymentTarget === 'batch-transform' ||
             regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks') {
@@ -521,7 +521,18 @@ export default class PromptRunner {
         // First, add any existing config values to previousAnswers so they're available for defaults
         const allPreviousAnswers = { ...existingConfig, ...previousAnswers };
-        return await this._runPrompts(promptablePrompts.map(prompt => ({
+        // Collect explicit values for prompts that will be skipped.
+        // When a prompt is skipped because its value is in explicitConfig,
+        // the prompt library won't include it in the returned answers.
+        // Downstream code expects the value to be present, so we inject it.
+        const skippedValues = {};
+        for (const prompt of promptablePrompts) {
+            if (explicitConfig[prompt.name] !== undefined && explicitConfig[prompt.name] !== null) {
+                skippedValues[prompt.name] = explicitConfig[prompt.name];
+            }
+        }
+        const promptedAnswers = await this._runPrompts(promptablePrompts.map(prompt => ({
             ...prompt,
             // Wrap message to inject previousAnswers so prompts can access _mcpInstanceChoices etc.
             message: typeof prompt.message === 'function' ? (answers) => {
@@ -541,14 +552,34 @@ export default class PromptRunner {
             } : (existingConfig[prompt.name] !== undefined && existingConfig[prompt.name] !== null) ?
                 existingConfig[prompt.name] : undefined,
             // Skip prompt ONLY if we have explicit config (not defaults)
+            // In auto-prompt mode, also skip optional prompts (not required in parameter matrix)
             when: prompt.when ? (answers) => {
                 // Skip if we have the value from explicit config (CLI, env vars, config files)
                 if (explicitConfig[prompt.name] !== undefined && explicitConfig[prompt.name] !== null) {
                     return false;
                 }
+                // In auto-prompt mode, skip optional/non-matrix parameters entirely
+                if (this.configManager?.isAutoPrompt()) {
+                    const paramConfig = this.configManager.parameterMatrix[prompt.name];
+                    // Skip if not in matrix (supplementary prompt) or if optional
+                    if (!paramConfig || !paramConfig.required) {
+                        return false;
+                    }
+                }
                 return prompt.when({...allPreviousAnswers, ...answers});
-            } : (explicitConfig[prompt.name] !== undefined && explicitConfig[prompt.name] !== null) ?
-                () => false : undefined,
+            } : (_answers) => {
+                // No original when condition — skip if explicit or if auto-prompt + optional/non-matrix
+                if (explicitConfig[prompt.name] !== undefined && explicitConfig[prompt.name] !== null) {
+                    return false;
+                }
+                if (this.configManager?.isAutoPrompt()) {
+                    const paramConfig = this.configManager.parameterMatrix[prompt.name];
+                    if (!paramConfig || !paramConfig.required) {
+                        return false;
+                    }
+                }
+                return true;
+            },
             // Provide access to previous answers for conditional logic
             // For unbounded parameters, inject MCP-provided choices if available
             choices: prompt.choices ? (answers) => {
@@ -563,6 +594,9 @@ export default class PromptRunner {
                 return prompt.choices;
             } : undefined
         })));
+        // Merge skipped explicit values into the answers so downstream code sees them
+        return { ...skippedValues, ...promptedAnswers };
     }
     /**
@@ -638,7 +672,7 @@ export default class PromptRunner {
     /**
      * Query MCP instance-recommender server after deployment target is known.
-     * Only runs when deploymentTarget is managed-inference.
+     * Only runs when deploymentTarget is realtime-inference.
      * Populates configManager.mcpChoices so _runPhase injects them into list prompts.
      * @private
      */
@@ -1314,12 +1348,22 @@ export default class PromptRunner {
             return inferenceAmiVersion ? { cudaVersion, inferenceAmiVersion } : null;
         }
-        // Multiple options — let the user choose
+        // Multiple options — let the user choose (or auto-select in auto-prompt mode)
         const defaultVersion = frameworkAccel?.version
             && compatibleVersions.includes(frameworkAccel.version)
             ? frameworkAccel.version
             : instanceInfo.accelerator.default || compatibleVersions[compatibleVersions.length - 1];
+        // In auto-prompt mode, auto-select the default without prompting
+        if (this.configManager?.isAutoPrompt()) {
+            const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[defaultVersion];
+            if (inferenceAmiVersion) {
+                console.log(`\n🔧 CUDA ${defaultVersion} auto-selected (auto-prompt mode)`);
+                console.log(`   AMI: ${inferenceAmiVersion}`);
+            }
+            return inferenceAmiVersion ? { cudaVersion: defaultVersion, inferenceAmiVersion } : null;
+        }
         const choices = compatibleVersions.map(v => {
             const ami = PromptRunner.CUDA_AMI_MAP[v] || 'unknown';
             const isDefault = v === defaultVersion ? ' (recommended)' : '';

package/src/lib/prompts.js CHANGED Viewed

@@ -649,7 +649,7 @@ const modulePrompts = [
 /**
  * Infrastructure prompts split into sub-phases so the prompt runner can
  * interleave MCP queries between them (e.g. query instance-recommender
- * only after we know the deployment target is managed-inference).
+ * only after we know the deployment target is realtime-inference).
  *
  * Ordering: Region → Deployment Target → Instance/HyperPod → Build Target → Role
  */
@@ -683,21 +683,21 @@ const infraRegionAndTargetPrompts = [
         name: 'deploymentTarget',
         message: 'Deployment target?',
         choices: [
-            { name: 'SageMaker Managed Inference - Real Time', value: 'managed-inference' },
-            { name: 'SageMaker Managed Inference - Async', value: 'async-inference' },
-            { name: 'SageMaker Managed Inference - Batch', value: 'batch-transform' },
+            { name: 'SageMaker Real-Time Inference', value: 'realtime-inference' },
+            { name: 'SageMaker Async Inference', value: 'async-inference' },
+            { name: 'SageMaker Batch Transform', value: 'batch-transform' },
             { name: 'SageMaker HyperPod - EKS', value: 'hyperpod-eks' }
         ],
-        default: 'managed-inference'
+        default: 'realtime-inference'
     }
 ];
-// Sub-phase B: Instance type (only when deploymentTarget === 'managed-inference')
+// Sub-phase B: Instance type (only when deploymentTarget === 'realtime-inference')
 const infraInstancePrompts = [
     {
         type: 'list',
         name: 'instanceType',
-        when: answers => answers.deploymentTarget === 'managed-inference' || answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform' || answers.deploymentTarget === 'hyperpod-eks',
+        when: answers => answers.deploymentTarget === 'realtime-inference' || answers.deploymentTarget === 'async-inference' || answers.deploymentTarget === 'batch-transform' || answers.deploymentTarget === 'hyperpod-eks',
         message: (answers) => {
             const framework = answers.framework || answers.deploymentConfig?.split('-')[0];

package/src/lib/template-manager.js CHANGED Viewed

@@ -64,7 +64,7 @@ export default class TemplateManager {
                 'diffusors-vllm-omni'
             ],
             buildTargets: ['codebuild'],
-            deploymentTargets: ['managed-inference', 'async-inference', 'batch-transform', 'hyperpod-eks'],
+            deploymentTargets: ['realtime-inference', 'async-inference', 'batch-transform', 'hyperpod-eks'],
             testTypes: ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
             awsRegions: [
                 'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2',
@@ -135,7 +135,7 @@ export default class TemplateManager {
         // Validate batch transform specific fields
         this._validateBatchTransformConfig();
-        // Validate instance type format (ml.*.*) - only for managed-inference
+        // Validate instance type format (ml.*.*) - only for realtime-inference
         if (this.answers.instanceType && this.answers.instanceType !== 'custom') {
             const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
             if (!instancePattern.test(this.answers.instanceType)) {

package/templates/do/clean CHANGED Viewed

@@ -24,7 +24,7 @@ done
 # Function to display usage
 show_usage() {
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
     echo "Usage: ./do/clean [local|ecr|endpoint|codebuild|all]"
 <% } else if (deploymentTarget === 'async-inference') { %>
     echo "Usage: ./do/clean [local|ecr|endpoint|codebuild|all]"
@@ -37,7 +37,7 @@ show_usage() {
     echo "Cleanup targets:"
     echo "  local     - Remove local Docker images"
     echo "  ecr       - Remove images from Amazon ECR"
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
     echo "  endpoint  - Delete SageMaker endpoint, configuration, and model"
 <% } else if (deploymentTarget === 'async-inference') { %>
     echo "  endpoint  - Delete SageMaker async endpoint, configuration, and inference component"
@@ -51,7 +51,7 @@ show_usage() {
     echo ""
     echo "Examples:"
     echo "  ./do/clean local      # Remove local Docker images only"
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
     echo "  ./do/clean endpoint   # Delete SageMaker resources only"
 <% } else if (deploymentTarget === 'async-inference') { %>
     echo "  ./do/clean endpoint   # Delete SageMaker async resources only"
@@ -198,7 +198,7 @@ clean_ecr() {
     fi
 }
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
 # Function to clean SageMaker endpoint and inference components
 clean_endpoint() {
     echo "🧹 Cleaning SageMaker resources"
@@ -720,7 +720,7 @@ case "${CLEANUP_TARGET}" in
     ecr)
         clean_ecr
         ;;
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
     endpoint)
         clean_endpoint
         ;;
@@ -761,7 +761,7 @@ case "${CLEANUP_TARGET}" in
         echo ""
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
         # Clean SageMaker resources
         if clean_endpoint; then
             CLEANED_ITEMS+=("SageMaker resources")

package/templates/do/config CHANGED Viewed

@@ -24,8 +24,8 @@ export CODEBUILD_PROJECT_NAME="${PROJECT_NAME}-build-$(date +%Y%m%d)"
 # Deployment configuration — WHERE the model runs
 export DEPLOYMENT_TARGET="<%= deploymentTarget %>"
-<% if (deploymentTarget === 'managed-inference') { %>
-# SageMaker Managed Inference configuration
+<% if (deploymentTarget === 'realtime-inference') { %>
+# SageMaker Real-Time Inference configuration
 export INSTANCE_TYPE="<%= instanceType %>"
 <% if (inferenceAmiVersion) { %>
 export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
@@ -33,7 +33,7 @@ export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
 <% } %>
 <% if (deploymentTarget === 'async-inference') { %>
-# SageMaker Managed Inference - Async configuration
+# SageMaker Async Inference configuration
 export INSTANCE_TYPE="<%= instanceType %>"
 <% if (inferenceAmiVersion) { %>
 export INFERENCE_AMI_VERSION="<%= inferenceAmiVersion %>"
@@ -77,7 +77,7 @@ export FSX_VOLUME_HANDLE="<%= fsxVolumeHandle %>"
 <% } %>
 <% if (deploymentTarget === 'batch-transform') { %>
-# SageMaker Managed Inference - Batch configuration
+# SageMaker Batch Transform configuration
 export INSTANCE_TYPE="<%= instanceType %>"
 # Resolve AWS account ID at runtime for default resource names
@@ -187,7 +187,7 @@ export BASE_IMAGE=${BASE_IMAGE:-<%= baseImage %>}
 # Allow environment variable overrides
 export AWS_REGION=${AWS_REGION:-<%= awsRegion %>}
-<% if (deploymentTarget === 'managed-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
+<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
 export INSTANCE_TYPE=${INSTANCE_TYPE:-<%= instanceType %>}
 <% } %>
 export ECR_REPOSITORY_NAME=${ECR_REPOSITORY_NAME:-ml-container-creator}
@@ -241,7 +241,7 @@ echo "   Model env vars: <%= Object.keys(modelEnvVars).length %>"
 <% if (typeof serverEnvVars !== 'undefined' && serverEnvVars && Object.keys(serverEnvVars).length > 0) { %>
 echo "   Server env vars: <%= Object.keys(serverEnvVars).length %>"
 <% } %>
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
 echo "   Instance: ${INSTANCE_TYPE}"
 <% } else if (deploymentTarget === 'async-inference') { %>
 echo "   Instance: ${INSTANCE_TYPE}"

package/templates/do/deploy CHANGED Viewed

@@ -36,7 +36,7 @@ echo "   Deployment config: ${DEPLOYMENT_CONFIG}"
 echo "   Region: ${AWS_REGION}"
 echo "   Build target: ${BUILD_TARGET}"
 echo "   Deployment target: ${DEPLOYMENT_TARGET}"
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
 echo "   Instance type: ${INSTANCE_TYPE}"
 <% } else if (deploymentTarget === 'async-inference') { %>
 echo "   Instance type: ${INSTANCE_TYPE}"
@@ -95,9 +95,9 @@ fi
 echo "✅ ECR image found: ${ECR_REPOSITORY}:${PROJECT_NAME}-latest"
 IMAGE_TAG="${PROJECT_NAME}-latest"
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
 # ============================================================
-# SageMaker Managed Inference Deployment (Inference Components)
+# SageMaker Real-Time Inference Deployment (Inference Components)
 # ============================================================
 # Validate execution role ARN
@@ -520,7 +520,7 @@ echo "   ./do/clean endpoint"
 <% } else if (deploymentTarget === 'async-inference') { %>
 # ============================================================
-# SageMaker Managed Inference - Async Deployment (Model-Based)
+# SageMaker Async Inference Deployment (Model-Based)
 # SageMaker async inference does NOT support Inference Components.
 # Flow: create-model → create-endpoint-config (with AsyncInferenceConfig) → create-endpoint
 # ============================================================
@@ -1151,7 +1151,7 @@ _update_config_var "KUBECONFIG" "${KUBECONFIG_PATH}"
 <% } else if (deploymentTarget === 'batch-transform') { %>
 # ============================================================
-# SageMaker Managed Inference - Batch Deployment
+# SageMaker Batch Transform Deployment
 # Flow: create-model → create-transform-job → poll until completion
 # ============================================================

package/templates/do/export CHANGED Viewed

@@ -42,8 +42,8 @@ if [ "${1:-}" = "--json" ]; then
     # Deployment target
     JSON="${JSON},\"deploymentTarget\":\"${DEPLOYMENT_TARGET}\""
-<% if (deploymentTarget === 'managed-inference') { %>
-    # SageMaker Managed Inference
+<% if (deploymentTarget === 'realtime-inference') { %>
+    # SageMaker Real-Time Inference
     JSON="${JSON},\"instanceType\":\"${INSTANCE_TYPE}\""
 <% } else if (deploymentTarget === 'async-inference') { %>
     # SageMaker Async Inference
@@ -206,11 +206,11 @@ fi
 # Deployment target
 CMD="${CMD} --deployment-target=${DEPLOYMENT_TARGET}"
-<% if (deploymentTarget === 'managed-inference') { %>
-# SageMaker Managed Inference
+<% if (deploymentTarget === 'realtime-inference') { %>
+# SageMaker Real-Time Inference
 CMD="${CMD} --instance-type=${INSTANCE_TYPE}"
 <% } else if (deploymentTarget === 'batch-transform') { %>
-# SageMaker Managed Inference - Batch
+# SageMaker Batch Transform
 CMD="${CMD} --instance-type=${INSTANCE_TYPE}"
 CMD="${CMD} --batch-input-path=${BATCH_INPUT_PATH}"
 CMD="${CMD} --batch-output-path=${BATCH_OUTPUT_PATH}"

package/templates/do/logs CHANGED Viewed

@@ -10,9 +10,9 @@ set -o pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
 # ============================================================
-# SageMaker Managed Inference Logs (CloudWatch)
+# SageMaker Real-Time Inference Logs (CloudWatch)
 # ============================================================
 # Allow inference component name as argument or from config
@@ -95,7 +95,7 @@ aws logs tail "${LOG_GROUP}" \
 <% } else if (deploymentTarget === 'async-inference') { %>
 # ============================================================
-# SageMaker Managed Inference - Async Logs (CloudWatch)
+# SageMaker Async Inference Logs (CloudWatch)
 # ============================================================
 ENDPOINT="${1:-${ENDPOINT_NAME:-}}"
@@ -166,7 +166,7 @@ aws logs tail "${LOG_GROUP}" \
 <% } else if (deploymentTarget === 'batch-transform') { %>
 # ============================================================
-# SageMaker Managed Inference - Batch Logs (CloudWatch)
+# SageMaker Batch Transform Logs (CloudWatch)
 # ============================================================
 # Allow transform job name as argument or from config

package/templates/do/register CHANGED Viewed

@@ -266,7 +266,7 @@ echo "   Backend:           ${BACKEND}"
 <% if (framework === 'transformers') { %>
 echo "   Model name:        ${MODEL_NAME:-N/A}"
 <% } %>
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
 echo "   Instance type:     ${INSTANCE_TYPE}"
 <% } else if (deploymentTarget === 'batch-transform') { %>
 echo "   Instance:          ${INSTANCE_TYPE} x ${BATCH_INSTANCE_COUNT}"
@@ -498,7 +498,7 @@ DJEOF
     if [ "${CI_MODE}" = true ]; then
         echo ""
         echo "⚠️  CI Integration is experimental and currently only tested for"
-        echo "   SageMaker Managed Inference — Real Time endpoints."
+        echo "   SageMaker Real-Time Inference endpoints."
         echo ""
         # Compute configId
@@ -542,7 +542,7 @@ if [ -n "${MODEL_NAME:-}" ]; then
 fi
 <% } %>
-<% if (deploymentTarget === 'managed-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
+<% if (deploymentTarget === 'realtime-inference' || deploymentTarget === 'async-inference' || deploymentTarget === 'batch-transform') { %>
 CMD_ARGS+=("--instance-type" "${INSTANCE_TYPE}")
 <% } %>

package/templates/do/test CHANGED Viewed

@@ -10,9 +10,9 @@ set -o pipefail
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "${SCRIPT_DIR}/config"
-<% if (deploymentTarget === 'managed-inference') { %>
+<% if (deploymentTarget === 'realtime-inference') { %>
 # ============================================================
-# SageMaker Managed Inference Testing
+# SageMaker Real-Time Inference Testing
 # ============================================================
 # Parse arguments
@@ -295,7 +295,7 @@ fi
 <% } else if (deploymentTarget === 'async-inference') { %>
 # ============================================================
-# SageMaker Managed Inference - Async Testing
+# SageMaker Async Inference Testing
 # ============================================================
 # Parse arguments
@@ -875,7 +875,7 @@ fi
 <% } else if (deploymentTarget === 'batch-transform') { %>
 # ============================================================
-# SageMaker Managed Inference - Batch Testing
+# SageMaker Batch Transform Testing
 # ============================================================
 # Parse arguments: local or batch test mode