npm - @aws/ml-container-creator - Versions diffs - 1.0.2 → 1.0.4 - Mend

@aws/ml-container-creator 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +1 -1
package/bin/cli.js +1 -1
package/config/tune-catalog.json +303 -1
package/infra/ci-harness/lib/ci-harness-stack.ts +43 -0
package/package.json +3 -2
package/servers/base-image-picker/index.js +65 -18
package/servers/instance-sizer/index.js +32 -0
package/servers/lib/catalogs/fleet-drivers.json +38 -0
package/servers/lib/catalogs/model-arch-support.json +51 -0
package/servers/lib/catalogs/model-servers.json +2842 -1516
package/servers/lib/schemas/image-catalog.schema.json +12 -0
package/src/app.js +6 -4
package/src/lib/bootstrap-command-handler.js +12 -2
package/src/lib/bootstrap-profile-manager.js +16 -0
package/src/lib/cross-cutting-checker.js +6 -1
package/src/lib/generated/cli-options.js +1 -1
package/src/lib/generated/parameter-matrix.js +1 -1
package/src/lib/generated/validation-rules.js +1 -1
package/src/lib/mcp-query-runner.js +110 -3
package/src/lib/prompt-runner.js +66 -22
package/src/lib/template-variable-resolver.js +8 -0
package/src/lib/train-config-builder.js +339 -0
package/templates/do/.benchmark_writer.py +3 -0
package/templates/do/.eval_helper.py +409 -0
package/templates/do/.register_helper.py +185 -11
package/templates/do/.train_build_request.py +102 -113
package/templates/do/.train_helper.py +433 -0
package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
package/templates/do/adapter +157 -0
package/templates/do/benchmark +60 -3
package/templates/do/deploy.d/managed-inference.ejs +83 -0
package/templates/do/evaluate +272 -0
package/templates/do/lib/resolve-instance.sh +155 -0
package/templates/do/register +5 -0
package/templates/do/test +1 -0
package/templates/do/train +879 -126
package/templates/do/training/config.yaml +83 -11
package/templates/do/training/dpo/accelerate_config.yaml +24 -0
package/templates/do/training/dpo/defaults.yaml +26 -0
package/templates/do/training/dpo/prompts.json +8 -0
package/templates/do/training/dpo/train.py +363 -0
package/templates/do/training/sft/accelerate_config.yaml +22 -0
package/templates/do/training/sft/defaults.yaml +18 -0
package/templates/do/training/sft/prompts.json +7 -0
package/templates/do/training/sft/train.py +310 -0
package/templates/do/tune +11 -2
package/templates/do/.train_poll_parser.py +0 -135
package/templates/do/.train_status_parser.py +0 -187
/package/templates/do/training/{train.py → custom/train.py} +0 -0

package/servers/lib/schemas/image-catalog.schema.json CHANGED Viewed

@@ -159,6 +159,18 @@
                     "items": {
                         "type": "string"
                     }
+                },
+                "min_driver_version": {
+                    "type": "string",
+                    "description": "Minimum GPU driver version required (e.g., '550.54')"
+                },
+                "cuda_toolkit": {
+                    "type": "string",
+                    "description": "CUDA toolkit version bundled in the image (e.g., '12.4')"
+                },
+                "transformers_version": {
+                    "type": "string",
+                    "description": "Bundled transformers library version (e.g., '4.52.0')"
                 }
             },
             "additionalProperties": false

package/src/app.js CHANGED Viewed

@@ -366,10 +366,11 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
     const trainIncluded = answers.deploymentTarget !== 'batch-transform';
     if (!trainIncluded) {
         ignorePatterns.push('**/do/train');
+        ignorePatterns.push('**/do/.train_helper.py');
         ignorePatterns.push('**/do/.train_build_request.py');
-        ignorePatterns.push('**/do/.train_status_parser.py');
-        ignorePatterns.push('**/do/.train_poll_parser.py');
         ignorePatterns.push('**/do/training/**');
+        ignorePatterns.push('**/do/evaluate');
+        ignorePatterns.push('**/do/.eval_helper.py');
     }
     // Exclude feedback.sh when neither tune nor train is included
@@ -404,10 +405,11 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
         ignorePatterns.push('**/do/.adapter_helper.py');
         ignorePatterns.push('**/do/.register_helper.py');
         ignorePatterns.push('**/do/train');
+        ignorePatterns.push('**/do/.train_helper.py');
         ignorePatterns.push('**/do/.train_build_request.py');
-        ignorePatterns.push('**/do/.train_status_parser.py');
-        ignorePatterns.push('**/do/.train_poll_parser.py');
         ignorePatterns.push('**/do/training/**');
+        ignorePatterns.push('**/do/evaluate');
+        ignorePatterns.push('**/do/.eval_helper.py');
         ignorePatterns.push('**/do/add-ic');
         ignorePatterns.push('**/do/run');
         ignorePatterns.push('**/sample_model/**');

package/src/lib/bootstrap-command-handler.js CHANGED Viewed

@@ -64,6 +64,7 @@ export default class BootstrapCommandHandler {
     _handlePrune() { return this.profileManager._handlePrune(); }
     _handleSyncSchemas() { return this.profileManager._handleSyncSchemas(); }
     _handleSyncModelFamilies() { return this.profileManager._handleSyncModelFamilies(); }
+    _handleSyncServingVersions() { return this.profileManager._handleSyncServingVersions(); }
     /**
      * Dispatch bootstrap subcommands.
@@ -132,6 +133,9 @@ export default class BootstrapCommandHandler {
         case 'sync-model-families':
             await this._handleSyncModelFamilies();
             break;
+        case 'sync-serving-versions':
+            await this._handleSyncServingVersions();
+            break;
         // Migration path: upgrades legacy profiles to current naming conventions.
         // Corrects stackName to mlcc-bootstrap-{profileName}, renames sharedStackFrom
         // to sharedInfraFrom. Idempotent — safe to run multiple times.
@@ -1467,7 +1471,9 @@ SUBCOMMANDS:
   prune                               Remove deleted and unknown records from the deployment manifest
   update                              Re-deploy bootstrap stacks using active profile (no prompts)
   migrate                             Upgrade legacy profiles to current naming conventions
+  sync-schemas                        Download AWS service model schemas (sagemaker, iam, ecr, s3)
   sync-model-families                 Discover tune-eligible models from JumpStart Hub and update catalog
+  sync-serving-versions               Discover latest vLLM/SGLang/TRT-LLM image versions and update catalog
 SETUP OPTIONS:
   --non-interactive                   Run without interactive prompts
@@ -1477,8 +1483,10 @@ SETUP OPTIONS:
   --role-arn <arn>                    Use existing IAM role ARN (skip role creation)
   --skip-s3                           Skip S3 bucket creation
   --ci                                Provision CI testing infrastructure
+  --benchmark-infra                   Provision Athena/Glue benchmark infrastructure (requires --ci)
   --skip-ci                           Skip CI infrastructure provisioning
   --skip-post-setup                   Skip post-setup chain (mcp init, sync-architectures, sync-schemas)
+  --ignore-staleness                  Suppress schema staleness warnings
 STATUS OPTIONS:
   --verify                            Check each active resource against AWS APIs for drift detection
@@ -1495,13 +1503,15 @@ EXAMPLES:
   ml-container-creator bootstrap list
   ml-container-creator bootstrap remove dev
   ml-container-creator bootstrap remove dev --force --delete-stack
+  ml-container-creator bootstrap update
+  ml-container-creator bootstrap update --ci --benchmark-infra
   ml-container-creator bootstrap scan
+  ml-container-creator bootstrap sync-schemas
   ml-container-creator bootstrap sync-model-families
+  ml-container-creator bootstrap sync-serving-versions
   ml-container-creator bootstrap migrate
   ml-container-creator bootstrap --non-interactive --profile my-aws-profile --region us-west-2
-  ml-container-creator bootstrap --non-interactive --profile my-aws-profile --role-arn arn:aws:iam::123456789012:role/MyRole --skip-s3
   ml-container-creator bootstrap --non-interactive --profile my-aws-profile --region us-west-2 --ci
-  ml-container-creator bootstrap --non-interactive --profile my-aws-profile --region us-west-2 --skip-ci
 `);
     }

package/src/lib/bootstrap-profile-manager.js CHANGED Viewed

@@ -655,4 +655,20 @@ export default class BootstrapProfileManager {
             process.exit(1);
         }
     }
+    /**
+     * Handle sync-serving-versions subcommand: discover latest container image
+     * versions for vLLM, SGLang, and TensorRT-LLM and update the model-servers catalog.
+     */
+    async _handleSyncServingVersions() {
+        console.log('\n🔄 Sync Serving Versions — Discovering latest container images...\n');
+        try {
+            const { syncServingVersions } = await import('../../scripts/sync-serving-versions.js');
+            const result = await syncServingVersions();
+            console.log(`\n✅ Sync complete: ${result.totalAdded} new, ${result.totalRemoved} pruned\n`);
+        } catch (err) {
+            console.log(`❌ Sync failed: ${err.message}`);
+            process.exit(1);
+        }
+    }
 }

package/src/lib/cross-cutting-checker.js CHANGED Viewed

@@ -290,7 +290,12 @@ export default class CrossCuttingChecker {
         if (!modelType || !server || !serverVersion) return findings;
         const entries = modelServersCatalog[server] || [];
-        const entry = entries.find(e => e.labels?.framework_version === serverVersion);
+        // Try exact version match first, then fall back to nearest entry with supportedModelTypes
+        let entry = entries.find(e => e.labels?.framework_version === serverVersion);
+        if (!entry?.supportedModelTypes?.length) {
+            // Fall back to any entry that has supportedModelTypes populated
+            entry = entries.find(e => e.supportedModelTypes?.length > 0);
+        }
         if (!entry?.supportedModelTypes?.length) return findings;
         if (!entry.supportedModelTypes.includes(modelType.toLowerCase())) {

package/src/lib/generated/cli-options.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-23T20:55:23.381Z
+// Generated: 2026-06-30T16:45:56.916Z
 /**
  * CLI option definitions derived from parameter-schema-v2.json.

package/src/lib/generated/parameter-matrix.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-parameter-matrix.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-23T20:55:23.482Z
+// Generated: 2026-06-30T16:45:57.021Z
 /**
  * Parameter matrix defining how each parameter is loaded from various sources.

package/src/lib/generated/validation-rules.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-validator.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-23T20:55:23.412Z
+// Generated: 2026-06-30T16:45:56.949Z
 /**
  * Validation rules derived from parameter-schema-v2.json.

package/src/lib/mcp-query-runner.js CHANGED Viewed

@@ -384,6 +384,9 @@ export default class McpQueryRunner {
                 const endpointNames = result.choices.endpointName;
                 const metadata = result.metadata || {};
+                // Store endpoint metadata for later instance type resolution (US-1)
+                this.runner._endpointPickerMetadata = metadata;
                 // Build choices with metadata annotations
                 this.runner._mcpEndpointChoices = endpointNames.map(name => {
                     const meta = metadata[name];
@@ -412,12 +415,15 @@ export default class McpQueryRunner {
     }
     /**
-     * Query MCP base-image-picker server after deployment config is selected.
+     * Query MCP base-image-picker server after deployment config and instance type are known.
      * Populates _mcpBaseImageChoices for the base image selection prompt.
-     * Requirements: 5.1, 5.2, 5.3, 5.4, 9.1, 9.2, 9.3
+     * Requirements: 5.1, 5.2, 5.3, 5.4, 9.1, 9.2, 9.3, US-1 (ordering constraint)
+     * @param {Object} frameworkAnswers - Framework/architecture answers
+     * @param {Object} _explicitConfig - Explicit CLI/config values
+     * @param {Object} [infraContext] - Infrastructure context (instanceType, tensorParallelSize, modelId)
      * @private
      */
-    async _queryMcpForBaseImage(frameworkAnswers, _explicitConfig) {
+    async _queryMcpForBaseImage(frameworkAnswers, _explicitConfig, infraContext = {}) {
         // Skip if base image provided via CLI --base-image flag
         if (this.runner.options['base-image']) return;
@@ -454,6 +460,17 @@ export default class McpQueryRunner {
             context.searchCriteria = searchCriteria.trim();
         }
+        // Pass infrastructure context for driver-aware filtering (US-1 ordering constraint)
+        if (infraContext.instanceType) {
+            context.instanceType = infraContext.instanceType;
+        }
+        if (infraContext.tensorParallelSize !== null && infraContext.tensorParallelSize !== undefined) {
+            context.tensorParallelSize = infraContext.tensorParallelSize;
+        }
+        if (infraContext.modelId) {
+            context.modelId = infraContext.modelId;
+        }
         const result = await cm.queryMcpServer('base-image-picker', context);
         if (result && result.metadata?.baseImage?.length > 0) {
@@ -716,6 +733,96 @@ export default class McpQueryRunner {
         }
     }
+    /**
+     * Resolve instance type from an existing endpoint.
+     * Priority:
+     *   1. Endpoint-picker metadata (already fetched, no network call)
+     *   2. Direct AWS SDK call: DescribeEndpoint → DescribeEndpointConfig
+     *
+     * Reuses the resolution pattern from do/lib/resolve-instance.sh:
+     *   - Check ProductionVariants[0].CurrentInstanceType or InstanceType
+     *   - Fallback: DescribeEndpointConfig → ProductionVariants[0].InstanceType
+     *   - Final fallback: InstancePools[0] (highest priority)
+     *
+     * Requirements: US-1 (ordering constraint — resolve instance type before base image picker)
+     * @param {string} endpointName - The existing endpoint name
+     * @param {string} awsRegion - AWS region for API calls
+     * @returns {Promise<string|null>} Resolved instance type or null on failure
+     * @private
+     */
+    async _resolveEndpointInstanceType(endpointName, awsRegion) {
+        // Strategy 1: Use endpoint-picker metadata (already fetched, no network call)
+        if (this.runner._endpointPickerMetadata) {
+            const meta = this.runner._endpointPickerMetadata[endpointName];
+            if (meta?.instanceType) {
+                // Strip pool annotation if present: "ml.g5.12xlarge (pool: 3 types)" → "ml.g5.12xlarge"
+                const rawInstanceType = meta.instanceType.includes(' (pool:')
+                    ? meta.instanceType.split(' (pool:')[0]
+                    : meta.instanceType;
+                if (rawInstanceType && rawInstanceType !== 'unknown') {
+                    console.log(`   ✓ Resolved instance type from endpoint metadata: ${rawInstanceType}`);
+                    return rawInstanceType;
+                }
+            }
+        }
+        // Strategy 2: Direct AWS SDK call (for custom endpoint names not in picker results)
+        console.log('   🔍 Resolving instance type from existing endpoint...');
+        try {
+            const { SageMakerClient, DescribeEndpointCommand, DescribeEndpointConfigCommand } = await import('@aws-sdk/client-sagemaker');
+            const region = awsRegion || process.env.AWS_REGION || 'us-east-1';
+            const clientOptions = { region };
+            // Use AWS profile if available
+            const awsProfile = this.runner.configManager?.config?.awsProfile
+                || this.runner.options?.profile || process.env.AWS_PROFILE || null;
+            if (awsProfile) {
+                try {
+                    const { fromIni } = await import('@aws-sdk/credential-providers');
+                    clientOptions.credentials = fromIni({ profile: awsProfile });
+                } catch {
+                    // credential-providers not available, use default chain
+                }
+            }
+            const client = new SageMakerClient(clientOptions);
+            // DescribeEndpoint — check ProductionVariants for instance type
+            const epResponse = await client.send(new DescribeEndpointCommand({ EndpointName: endpointName }));
+            const primaryVariant = (epResponse.ProductionVariants || [])[0] || {};
+            let instanceType = primaryVariant.CurrentInstanceType || primaryVariant.InstanceType || null;
+            // Fallback: DescribeEndpointConfig
+            if (!instanceType && epResponse.EndpointConfigName) {
+                const ecResponse = await client.send(
+                    new DescribeEndpointConfigCommand({ EndpointConfigName: epResponse.EndpointConfigName })
+                );
+                const ecVariant = (ecResponse.ProductionVariants || [])[0];
+                if (ecVariant?.InstanceType) {
+                    instanceType = ecVariant.InstanceType;
+                } else if (ecVariant?.InstancePools?.length > 0) {
+                    // Use highest-priority pool entry (lowest Priority number)
+                    const sorted = [...ecVariant.InstancePools].sort((a, b) => (a.Priority || 99) - (b.Priority || 99));
+                    instanceType = sorted[0].InstanceType || null;
+                }
+            }
+            if (instanceType) {
+                console.log(`   ✓ Resolved instance type from endpoint: ${instanceType}`);
+                return instanceType;
+            }
+            console.log('   ↳ Could not determine instance type from endpoint');
+            return null;
+        } catch (err) {
+            // Graceful fallback: if AWS call fails, skip filtering (no driver-aware filter)
+            console.log(`   ⚠️  Could not resolve instance type from endpoint: ${err.message}`);
+            return null;
+        }
+    }
     /**
      * Validate and display instance type compatibility
      * Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6

package/src/lib/prompt-runner.js CHANGED Viewed

@@ -68,6 +68,7 @@ export default class PromptRunner {
     _queryMcpForInstance(...args) { return this.mcpQueryRunner._queryMcpForInstance(...args); }
     _queryMcpForInstanceSizing(...args) { return this.mcpQueryRunner._queryMcpForInstanceSizing(...args); }
     _queryMcpForEndpoints(...args) { return this.mcpQueryRunner._queryMcpForEndpoints(...args); }
+    _resolveEndpointInstanceType(...args) { return this.mcpQueryRunner._resolveEndpointInstanceType(...args); }
     _queryMcpForHyperPod(...args) { return this.mcpQueryRunner._queryMcpForHyperPod(...args); }
     _fetchAndDisplayModelInfo(...args) { return this.mcpQueryRunner._fetchAndDisplayModelInfo(...args); }
     _validateAndDisplayInstanceType(...args) { return this.mcpQueryRunner._validateAndDisplayInstanceType(...args); }
@@ -182,8 +183,8 @@ export default class PromptRunner {
         }
         // ══════════════════════════════════════════════════════════════════════
-        // Phase 2 — How (deployment target + serving profile + base image)
-        // Requirements: 4.3 — instance prompt appears AFTER base image is known
+        // Phase 2 — How (deployment target + serving profile)
+        // Requirements: US-1 — base image selection moved AFTER instance resolution
         // ══════════════════════════════════════════════════════════════════════
         console.log('\n💪 Infrastructure & Deployment');
@@ -192,25 +193,8 @@ export default class PromptRunner {
         const regionPreviousAnswers = bootstrapRegion ? { _bootstrapRegion: bootstrapRegion } : {};
         const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, { ...frameworkAnswers, ...regionPreviousAnswers }, explicitConfig, existingConfig);
-        // 2b. Query base-image-picker MCP server for base image choices
-        await this.mcpQueryRunner._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
-        const baseImagePreviousAnswers = {
-            ...frameworkAnswers,
-            ...engineAnswers,
-            ...(this._mcpBaseImageChoices ? { _mcpBaseImageChoices: this._mcpBaseImageChoices } : {})
-        };
-        const baseImageAnswers = await this._runPhase(
-            baseImagePrompts,
-            baseImagePreviousAnswers,
-            explicitConfig,
-            existingConfig
-        );
-        // Requirements: 4.2-4.5 — Check model architecture compatibility after base image selection
-        this._checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers);
-        // Extract CUDA version from selected base image for instance-sizer context
-        const selectedBaseImageCuda = this._extractCudaFromBaseImage(baseImageAnswers);
+        // NOTE: Base image selection moved to Phase 3 (after instance type resolution)
+        // to enable driver-aware filtering. See US-1 ordering constraint in requirements.
         // ══════════════════════════════════════════════════════════════════════
         // Phase 3 — Where (region + instance [derived] + CUDA/AMI + HyperPod + build target)
@@ -283,7 +267,7 @@ export default class PromptRunner {
                 } else if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
                     // Query instance-sizer with full context
                     await this.mcpQueryRunner._queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, {
-                        cudaVersion: selectedBaseImageCuda,
+                        cudaVersion: null, // base image not yet selected (moved after instance resolution)
                         profileEnvVars: this._selectedProfileEnvVars || {}
                     });
                 } else {
@@ -422,6 +406,66 @@ export default class PromptRunner {
             }
         }
+        // 3b2. Base image selection — AFTER instance type resolved (US-1 ordering constraint)
+        // Pass resolved instanceType and tensorParallelSize for driver-aware filtering
+        let resolvedInstanceType = instanceAnswers.customInstanceType || instanceAnswers.instanceType;
+        let resolvedTensorParallelSize = this._autoTensorParallelism || 1;
+        // For existing endpoints: resolve instance type from the endpoint (US-1 ordering constraint)
+        // The instance type is needed for driver-aware base image filtering even though the user
+        // doesn't select it manually. Pattern reused from do/lib/resolve-instance.sh.
+        if (!resolvedInstanceType && existingEndpointAnswers.existingEndpointName) {
+            const resolvedRegion = regionAndTargetAnswers.customAwsRegion || regionAndTargetAnswers.awsRegion;
+            resolvedInstanceType = await this.mcpQueryRunner._resolveEndpointInstanceType(
+                existingEndpointAnswers.existingEndpointName,
+                resolvedRegion
+            );
+            // Store resolved instance type for downstream use (IC config, GPU count derivation)
+            if (resolvedInstanceType) {
+                existingEndpointAnswers._resolvedEndpointInstanceType = resolvedInstanceType;
+                // Propagate as instanceType so template-variable-resolver derives
+                // icGpuCount and tensorParallelSize from the instance catalog.
+                // Without this, IC_GPU_COUNT defaults to 1 even for multi-GPU instances.
+                existingEndpointAnswers.instanceType = resolvedInstanceType;
+                // Derive GPU count from instance catalog for immediate use (TP for base image filtering)
+                const endpointInstanceEntry = instanceCatalogRaw[resolvedInstanceType];
+                if (endpointInstanceEntry?.gpus && endpointInstanceEntry.gpus > 1) {
+                    existingEndpointAnswers.gpuCount = endpointInstanceEntry.gpus;
+                    existingEndpointAnswers.tensorParallelSize = endpointInstanceEntry.gpus;
+                    this._autoTensorParallelism = endpointInstanceEntry.gpus;
+                    this._autoGpuCount = endpointInstanceEntry.gpus;
+                    console.log(`   ✓ Endpoint instance ${resolvedInstanceType}: ${endpointInstanceEntry.gpus} GPUs (TP=${endpointInstanceEntry.gpus})`);
+                }
+            }
+        }
+        // Re-read tensor parallel size after potential endpoint resolution update
+        resolvedTensorParallelSize = this._autoTensorParallelism || 1;
+        await this.mcpQueryRunner._queryMcpForBaseImage(frameworkAnswers, explicitConfig, {
+            instanceType: resolvedInstanceType,
+            tensorParallelSize: resolvedTensorParallelSize,
+            modelId: phase1ModelId || undefined
+        });
+        const baseImagePreviousAnswers = {
+            ...frameworkAnswers,
+            ...engineAnswers,
+            ...(this._mcpBaseImageChoices ? { _mcpBaseImageChoices: this._mcpBaseImageChoices } : {})
+        };
+        const baseImageAnswers = await this._runPhase(
+            baseImagePrompts,
+            baseImagePreviousAnswers,
+            explicitConfig,
+            existingConfig
+        );
+        // Requirements: 4.2-4.5 — Check model architecture compatibility after base image selection
+        this._checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers);
+        // Extract CUDA version from selected base image for CUDA/AMI auto-resolution
+        const selectedBaseImageCuda = this._extractCudaFromBaseImage(baseImageAnswers);
         // 3c. Async-specific prompts (only when deploymentTarget === 'async-inference')
         let asyncAnswers = {};
         if (regionAndTargetAnswers.deploymentTarget === 'async-inference') {

package/src/lib/template-variable-resolver.js CHANGED Viewed

@@ -454,6 +454,14 @@ export async function _ensureTemplateVariables(answers, registryConfigManager =
                 answers.tensorParallelSize = instanceGpuCount;
                 answers._tpAutoResolved = true;
                 answers._tpAutoResolvedFrom = answers.instanceType;
+                // Also propagate to icEnvVars so IC_ENV_VLLM_TENSOR_PARALLEL_SIZE
+                // (or equivalent) is written in do/config for deploy-time IC creation.
+                if (!answers.icEnvVars) {
+                    answers.icEnvVars = {};
+                }
+                answers.icEnvVars[tpEnvKey] = String(instanceGpuCount);
                 console.log(`    ℹ️  TP degree: ${instanceGpuCount} (auto-detected from ${answers.instanceType})`);
             }
         }