npm - @aws/ml-container-creator - Versions diffs - 0.13.4 → 0.15.0 - Mend

@aws/ml-container-creator 0.13.4 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md +23 -5
package/config/parameter-schema-v2.json +32 -4
package/infra/ci-harness/lib/ci-harness-stack.ts +13 -5
package/infra/ci-harness/package-lock.json +122 -116
package/infra/ci-harness/package.json +1 -1
package/package.json +5 -3
package/pyproject.toml +21 -0
package/requirements.txt +19 -0
package/servers/instance-sizer/index.js +72 -4
package/servers/instance-sizer/lib/model-resolver.js +28 -2
package/src/app.js +17 -0
package/src/lib/bootstrap-command-handler.js +33 -23
package/src/lib/config-loader.js +18 -0
package/src/lib/config-manager.js +6 -1
package/src/lib/dataset-slug.js +152 -0
package/src/lib/generated/cli-options.js +9 -3
package/src/lib/generated/parameter-matrix.js +14 -3
package/src/lib/generated/validation-rules.js +1 -1
package/src/lib/mcp-query-runner.js +6 -0
package/src/lib/prompt-runner.js +5 -0
package/src/lib/prompts/feature-prompts.js +1 -1
package/src/lib/template-manager.js +0 -7
package/src/lib/template-variable-resolver.js +51 -1
package/src/lib/tune-config-state.js +14 -1
package/templates/do/.adapter_helper.py +451 -0
package/templates/do/.benchmark_writer.py +22 -0
package/templates/do/.register_helper.py +1163 -0
package/templates/do/.stage_helper.py +419 -0
package/templates/do/.tune_helper.py +379 -65
package/templates/do/__pycache__/.adapter_helper.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.register_helper.cpython-312.pyc +0 -0
package/templates/do/__pycache__/.tune_helper.cpython-312.pyc +0 -0
package/templates/do/adapter +427 -27
package/templates/do/add-ic +85 -3
package/templates/do/benchmark +173 -15
package/templates/do/config +24 -0
package/templates/do/lib/inference-component.sh +56 -3
package/templates/do/lib/profile.sh +5 -0
package/templates/do/register +552 -6
package/templates/do/stage +91 -272
package/templates/do/test +12 -2
package/templates/do/tune +264 -12

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aws/ml-container-creator",
-  "version": "0.13.4",
+  "version": "0.15.0",
   "description": "Build and deploy custom ML containers on AWS SageMaker with minimal configuration.",
   "main": "src/index.js",
   "bin": {
@@ -76,7 +76,9 @@
     "README.md",
     "LICENSE",
     "LICENSE-THIRD-PARTY",
-    "NOTICE"
+    "NOTICE",
+    "requirements.txt",
+    "pyproject.toml"
   ],
   "type": "module",
   "license": "Apache-2.0",
@@ -127,7 +129,7 @@
     "lint-staged": "^17.0.7",
     "mocha": "^10.2.0",
     "npm-force-resolutions": "^0.0.10",
-    "nyc": "^15.1.0",
+    "nyc": "^18.0.0",
     "sbom": "^0.0.0"
   },
   "lint-staged": {

package/pyproject.toml ADDED Viewed

@@ -0,0 +1,21 @@
+[project]
+name = "ml-container-creator"
+version = "0.13.4"
+description = "Python dependencies for ml-container-creator do/ lifecycle scripts"
+requires-python = ">=3.10"
+dependencies = [
+    "boto3>=1.35.0",
+    "huggingface-hub>=0.25.0",
+    "hf-transfer>=0.1.8",
+    "pyarrow>=17.0.0",
+    "sagemaker-core>=1.0.0",
+    "sagemaker[train]>=3.0.0",
+    "sagemaker[serve]>=3.0.0",
+    "packaging>=24.0",
+    "pyyaml>=6.0",
+]
+[dependency-groups]
+dev = [
+    "pytest>=8.0",
+]

package/requirements.txt ADDED Viewed

@@ -0,0 +1,19 @@
+# Python dependencies for do/ lifecycle scripts
+#
+# Install with uv (recommended):
+#   uv pip install -r requirements.txt
+#
+# Or with pip:
+#   pip install -r requirements.txt
+#
+# Source of truth: pyproject.toml
+boto3>=1.35.0
+huggingface_hub>=0.25.0
+hf_transfer>=0.1.8
+pyarrow>=17.0.0
+sagemaker-core>=1.0.0
+sagemaker[train]>=3.0.0
+sagemaker[serve]>=3.0.0
+packaging>=24.0
+PyYAML>=6.0

package/servers/instance-sizer/index.js CHANGED Viewed

@@ -25,8 +25,8 @@ import { readFileSync } from 'node:fs';
 import { fileURLToPath } from 'node:url';
 import { resolve, dirname } from 'node:path';
 import { resolveModelMetadata } from './lib/model-resolver.js';
-import { estimateVram } from './lib/vram-estimator.js';
-import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js';
+import { estimateVram, computeMaxModelLen } from './lib/vram-estimator.js';
+import { filterAndRankInstances, applyAvailabilityRanking, getPerGpuMemoryGb } from './lib/instance-ranker.js';
 import { QuotaResolver } from './lib/quota-resolver.js';
 import { queryBedrock } from '../lib/bedrock-client.js';
@@ -393,6 +393,66 @@ async function handleGetInstanceRecommendation(params) {
         { limit }
     );
+    // Step 3-max_model_len: When no instance fits at full context, try capping context length
+    // NFR-1 guard: skip this logic for models with recommendedInstances in catalog
+    let suggestedMaxModelLen = null;
+    let contextLengthCapped = false;
+    let originalMaxPositionEmbeddings = null;
+    if (recommendations.length === 0 && !modelMetadata.recommendedInstances && modelMetadata.maxPositionEmbeddings) {
+        // Find the largest available GPU instance
+        const gpuInstances = Object.entries(effectiveCatalog)
+            .filter(([, meta]) => meta.category === 'gpu' && meta.gpus > 0)
+            .map(([name, meta]) => {
+                const perGpu = getPerGpuMemoryGb(meta);
+                return { name, meta, totalVramGb: perGpu ? perGpu * meta.gpus : 0 };
+            })
+            .filter(i => i.totalVramGb > 0)
+            .sort((a, b) => b.totalVramGb - a.totalVramGb);
+        if (gpuInstances.length > 0) {
+            const bestInstance = gpuInstances[0];
+            // Compute model weight memory for computeMaxModelLen
+            const weightsGb = vramEstimate.breakdown.weightsGb;
+            const safeLen = computeMaxModelLen({
+                modelWeightGb: weightsGb,
+                totalGpuMemoryGb: bestInstance.meta.gpuMemoryGb || (bestInstance.totalVramGb / bestInstance.meta.gpus),
+                gpuCount: bestInstance.meta.gpus,
+                numLayers: modelMetadata.numLayers,
+                numKvHeads: modelMetadata.numKvHeads,
+                headDim: modelMetadata.headDim
+            });
+            if (safeLen && safeLen.maxModelLen >= 2048) {
+                // Re-estimate VRAM with capped sequence length
+                const cappedEstimate = estimateVram({
+                    parameterCount: modelMetadata.parameterCount,
+                    dtype: modelMetadata.dtype,
+                    quantization: quantization || undefined,
+                    maxSequenceLength: safeLen.maxModelLen,
+                    batchSize: effectiveBatchSize || undefined
+                });
+                // Re-filter instances with the reduced VRAM requirement
+                recommendations = filterAndRankInstances(
+                    cappedEstimate.vramGb,
+                    effectiveCatalog,
+                    { limit }
+                );
+                suggestedMaxModelLen = safeLen.maxModelLen;
+                contextLengthCapped = true;
+                originalMaxPositionEmbeddings = modelMetadata.maxPositionEmbeddings;
+                log(`Context capped: ${modelMetadata.maxPositionEmbeddings} → ${safeLen.maxModelLen} for ${modelName}`);
+            } else {
+                // AC-1.6: safeLen < 2048 or null — recommend larger instance instead
+                log(`Model ${modelName} cannot fit 2048 context on ${bestInstance.name}, recommending larger instance`);
+            }
+        }
+    }
     // Step 3a: Quota & availability filtering (discover mode only)
     let preQuotaFilterCount = 0;
     let allFilteredByQuota = false;
@@ -521,7 +581,10 @@ async function handleGetInstanceRecommendation(params) {
         content: [{
             type: 'text',
             text: JSON.stringify({
-                values: { instanceType: topRecommendation },
+                values: {
+                    instanceType: topRecommendation,
+                    ...(suggestedMaxModelLen ? { maxModelLen: suggestedMaxModelLen } : {})
+                },
                 choices: { instanceType: rankedList },
                 metadata: {
                     modelName,
@@ -533,7 +596,12 @@ async function handleGetInstanceRecommendation(params) {
                     recommendations: finalRecommendations,
                     source: modelMetadata.source,
                     smartModeUsed,
-                    allFilteredByQuota
+                    allFilteredByQuota,
+                    ...(contextLengthCapped ? {
+                        suggestedMaxModelLen,
+                        contextLengthCapped: true,
+                        originalMaxPositionEmbeddings
+                    } : {})
                 }
             })
         }]

package/servers/instance-sizer/lib/model-resolver.js CHANGED Viewed

@@ -142,13 +142,27 @@ export function extractFromHuggingFaceConfig(config) {
     const architecture = (config.architectures && config.architectures[0]) || 'unknown';
     const maxPositionEmbeddings = config.max_position_embeddings || 4096;
-    return {
+    // Extract architecture params for KV cache computation (computeMaxModelLen)
+    const numLayers = config.num_hidden_layers || null;
+    const numKvHeads = config.num_key_value_heads || config.num_attention_heads || null;
+    const headDim = config.head_dim || (config.hidden_size && config.num_attention_heads
+        ? Math.floor(config.hidden_size / config.num_attention_heads)
+        : null);
+    const result = {
         parameterCount,
         dtype,
         architecture,
         maxPositionEmbeddings,
         source: 'huggingface_api'
     };
+    // Only include architecture params if available (graceful degradation)
+    if (numLayers) result.numLayers = numLayers;
+    if (numKvHeads) result.numKvHeads = numKvHeads;
+    if (headDim) result.headDim = headDim;
+    return result;
 }
 /**
@@ -175,13 +189,25 @@ export async function resolveModelMetadata(modelName, options = {}) {
     const catalogEntry = catalogLookup(modelName, catalog);
     if (catalogEntry) {
-        return {
+        const result = {
             parameterCount: catalogEntry.parameterCount,
             dtype: catalogEntry.defaultDtype || 'float16',
             architecture: catalogEntry.architecture || 'unknown',
             maxPositionEmbeddings: catalogEntry.maxPositionEmbeddings || 4096,
             source: 'catalog'
         };
+        // Pass through recommendedInstances for NFR-1 guard
+        if (catalogEntry.recommendedInstances) {
+            result.recommendedInstances = catalogEntry.recommendedInstances;
+        }
+        // Pass through architecture params if available in catalog
+        if (catalogEntry.numLayers) result.numLayers = catalogEntry.numLayers;
+        if (catalogEntry.numKvHeads) result.numKvHeads = catalogEntry.numKvHeads;
+        if (catalogEntry.headDim) result.headDim = catalogEntry.headDim;
+        return result;
     }
     // Step 2: If discover mode, try HuggingFace Hub

package/src/app.js CHANGED Viewed

@@ -400,6 +400,9 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
         ignorePatterns.push('**/do/adapters/**');
         ignorePatterns.push('**/do/tune');
         ignorePatterns.push('**/do/.tune_helper.py');
+        ignorePatterns.push('**/do/.stage_helper.py');
+        ignorePatterns.push('**/do/.adapter_helper.py');
+        ignorePatterns.push('**/do/.register_helper.py');
         ignorePatterns.push('**/do/train');
         ignorePatterns.push('**/do/.train_build_request.py');
         ignorePatterns.push('**/do/.train_status_parser.py');
@@ -576,6 +579,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
             fs.writeFileSync(gitignorePath, mlccIgnore);
         }
     }
+    // Add __pycache__/ and *.pyc to .gitignore (Python helpers leave bytecode behind)
+    {
+        const gitignorePath = path.join(destDir, '.gitignore');
+        const pycacheIgnore = '# Python bytecode (generated by do/ helper scripts)\n__pycache__/\n*.pyc\n';
+        if (fs.existsSync(gitignorePath)) {
+            const existing = fs.readFileSync(gitignorePath, 'utf8');
+            if (!existing.includes('__pycache__')) {
+                fs.appendFileSync(gitignorePath, `\n${pycacheIgnore}`);
+            }
+        } else {
+            fs.writeFileSync(gitignorePath, pycacheIgnore);
+        }
+    }
 }
 /**

package/src/lib/bootstrap-command-handler.js CHANGED Viewed

@@ -459,39 +459,49 @@ export default class BootstrapCommandHandler {
                     // --no-rollback prevents rollback on AlreadyExists errors for IAM roles
                     // that may pre-exist from a prior deployment or another region.
-                    // Check if benchmark bucket already exists (from a prior torn-down stack with RETAIN policy)
-                    let importBucketCtx = '';
+                    // Check if benchmark results bucket already exists.
+                    // If it does, skip CDK deploy for benchmark infra — just update the profile.
+                    let benchmarkBucketExists = false;
                     if (options.benchmarkInfra) {
+                        const resultsBucketName = `mlcc-benchmark-results-${profileData.accountId}-${profileData.awsRegion}`;
                         try {
                             execSync(
-                                `aws s3api head-bucket --bucket mlcc-benchmark-results-${profileData.accountId}-${profileData.awsRegion}${profileData.awsProfile ? ` --profile ${profileData.awsProfile}` : ''} --region ${profileData.awsRegion}`,
+                                `aws s3api head-bucket --bucket ${resultsBucketName}${profileData.awsProfile ? ` --profile ${profileData.awsProfile}` : ''} --region ${profileData.awsRegion}`,
                                 { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }
                             );
-                            importBucketCtx = ' -c importExistingBenchmarkBucket=true';
-                            console.log('  ℹ️  Benchmark results bucket already exists — importing into stack');
+                            benchmarkBucketExists = true;
+                            console.log(`  ✅ Benchmark results bucket already exists: ${resultsBucketName}`);
+                            console.log('     Skipping CDK deploy for benchmark infra — updating profile only.');
+                            profileData.benchmarkInfraProvisioned = true;
+                            profileData.ciGlueDatabase = profileData.ciGlueDatabase || 'mlcc_ci';
+                            profileData.ciBenchmarkResultsBucket = resultsBucketName;
                         } catch {
                             // Bucket doesn't exist — will be created fresh
                         }
                     }
-                    const cdkDeployCmd = options.benchmarkInfra
-                        ? `npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true${importBucketCtx}`
-                        : 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
-                    execSync(
-                        cdkDeployCmd,
-                        {
-                            cwd: ciHarnessDir,
-                            encoding: 'utf8',
-                            stdio: 'inherit',
-                            env: {
-                                ...process.env,
-                                AWS_REGION: profileData.awsRegion,
-                                CDK_DEFAULT_REGION: profileData.awsRegion,
-                                CDK_DEFAULT_ACCOUNT: profileData.accountId,
-                                AWS_PROFILE: profileData.awsProfile
+                    // Only run CDK deploy if we actually need to create infrastructure
+                    if (!benchmarkBucketExists || !options.benchmarkInfra) {
+                        const cdkDeployCmd = options.benchmarkInfra
+                            ? 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback --parameters MlccCiHarnessStack:CreateBenchmarkInfra=true'
+                            : 'npx cdk deploy MlccCiHarnessStack --require-approval never --no-rollback';
+                        execSync(
+                            cdkDeployCmd,
+                            {
+                                cwd: ciHarnessDir,
+                                encoding: 'utf8',
+                                stdio: 'inherit',
+                                env: {
+                                    ...process.env,
+                                    AWS_REGION: profileData.awsRegion,
+                                    CDK_DEFAULT_REGION: profileData.awsRegion,
+                                    CDK_DEFAULT_ACCOUNT: profileData.accountId,
+                                    AWS_PROFILE: profileData.awsProfile
+                                }
                             }
-                        }
-                    );
-                    console.log('  ✅ CI harness stack deployed');
+                        );
+                        console.log('  ✅ CI harness stack deployed');
+                    }
                     profileData.ciInfraProvisioned = true;
                     profileData.ciTableName = 'mlcc-ci-table';

package/src/lib/config-loader.js CHANGED Viewed

@@ -265,6 +265,21 @@ export default class ConfigLoader {
                 return;
             }
+            // Handle icEnvVars object (deploy-time IC environment variables)
+            if (key === 'icEnvVars' && typeof value === 'object' && value !== null) {
+                if (!this.manager.config.icEnvVars) {
+                    this.manager.config.icEnvVars = {};
+                }
+                const cliIcEnvVars = (this.manager.explicitConfig && this.manager.explicitConfig.icEnvVars) || {};
+                Object.entries(value).forEach(([envKey, envValue]) => {
+                    if (!(envKey in cliIcEnvVars)) {
+                        this.manager.config.icEnvVars[envKey] = envValue;
+                        this.manager._recordSource(`icEnvVars.${envKey}`, envValue, 'config-file');
+                    }
+                });
+                return;
+            }
             if (this.manager._isSourceSupported(key, 'configFile')) {
                 filteredConfig[key] = this.manager._parseValue(key, value);
                 this.manager._recordSource(key, this.manager._parseValue(key, value), 'config-file');
@@ -342,6 +357,9 @@ export default class ConfigLoader {
         // Parse --server-env KEY=VALUE pairs
         this._parseEnvVarOptions('server-env', 'serverEnvVars');
+        // Parse --ic-env KEY=VALUE pairs (deploy-time IC environment variables)
+        this._parseEnvVarOptions('ic-env', 'icEnvVars');
     }
     /**

package/src/lib/config-manager.js CHANGED Viewed

@@ -183,6 +183,9 @@ export default class ConfigManager {
         if (this.config.serverEnvVars && typeof this.config.serverEnvVars === 'object') {
             finalConfig.serverEnvVars = { ...this.config.serverEnvVars };
         }
+        if (this.config.icEnvVars && typeof this.config.icEnvVars === 'object') {
+            finalConfig.icEnvVars = { ...this.config.icEnvVars };
+        }
         // Ensure all parameters from the matrix are included in final config
         // This is important for optional parameters that might be null
@@ -411,7 +414,8 @@ export default class ConfigManager {
             ...endpointParams,
             ...icParams,
             'modelEnvVars',
-            'serverEnvVars'
+            'serverEnvVars',
+            'icEnvVars'
         ]);
         const core = {};
         for (const [key, value] of Object.entries(this.config)) {
@@ -426,6 +430,7 @@ export default class ConfigManager {
             icConfig,
             modelEnvVars: { ...(this.config.modelEnvVars || {}) },
             serverEnvVars: { ...(this.config.serverEnvVars || {}) },
+            icEnvVars: { ...(this.config.icEnvVars || {}) },
             manifest: [...this._sourceManifest]
         };
     }

package/src/lib/dataset-slug.js ADDED Viewed

@@ -0,0 +1,152 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Dataset Slug Derivation
+ *
+ * Derives a deterministic, short slug from a dataset URI for use in
+ * tuning-job-aware adapter naming conventions.
+ *
+ * Slugification rules:
+ * - Lowercase
+ * - Strip non-alphanumeric characters (keep hyphens)
+ * - Truncate to 20 characters
+ * - Replace consecutive hyphens with single hyphen
+ * - Strip leading/trailing hyphens
+ *
+ * Examples:
+ *   hf://org/name           -> "name"
+ *   hf://tatsu-lab/alpaca   -> "alpaca"
+ *   hf://Open-Orca/OpenOrca -> "openorca"
+ *   s3://bucket/path/file.jsonl -> "file"
+ *
+ * Requirements: US-4 (AC-4.2)
+ */
+/**
+ * Derive a dataset slug from a dataset URI.
+ *
+ * @param {string} datasetUri - Dataset URI (s3://... or hf://...)
+ * @returns {string} The derived slug, or empty string if extraction fails
+ */
+export function deriveDatasetSlug(datasetUri) {
+    if (!datasetUri || typeof datasetUri !== 'string') {
+        return '';
+    }
+    let rawName = '';
+    if (datasetUri.startsWith('hf://')) {
+        // hf://org/name[/split][?file=pattern]
+        // Extract the dataset name (second path component)
+        const hfPath = datasetUri.slice(5); // remove "hf://"
+        const withoutQuery = hfPath.split('?')[0]; // remove ?file=...
+        const parts = withoutQuery.split('/');
+        // parts[0] = org, parts[1] = name, parts[2+] = split
+        rawName = parts[1] || parts[0] || '';
+    } else if (datasetUri.startsWith('s3://')) {
+        // s3://bucket/path/file.jsonl -> slug from filename (without extension)
+        const s3Path = datasetUri.slice(5); // remove "s3://"
+        const parts = s3Path.split('/');
+        const filename = parts[parts.length - 1] || '';
+        // Remove file extension
+        const dotIndex = filename.lastIndexOf('.');
+        rawName = dotIndex > 0 ? filename.substring(0, dotIndex) : filename;
+    } else {
+        // Unknown format — try to extract last path component
+        const parts = datasetUri.split('/');
+        rawName = parts[parts.length - 1] || '';
+    }
+    return slugify(rawName);
+}
+/**
+ * Apply slugification rules to a raw name.
+ *
+ * @param {string} raw - Raw name to slugify
+ * @returns {string} Slugified string
+ */
+export function slugify(raw) {
+    if (!raw) return '';
+    let slug = raw
+        .toLowerCase()                      // lowercase
+        .replace(/[^a-z0-9-]/g, '')         // strip non-alphanumeric (keep hyphens)
+        .replace(/-{2,}/g, '-')             // replace consecutive hyphens
+        .replace(/^-+/, '')                 // strip leading hyphens
+        .replace(/-+$/, '');                // strip trailing hyphens
+    // Truncate to 20 chars
+    if (slug.length > 20) {
+        slug = slug.substring(0, 20);
+        // Don't end on a hyphen after truncation
+        slug = slug.replace(/-+$/, '');
+    }
+    return slug;
+}
+/**
+ * Resolve a --from-tune argument to the appropriate config variable name.
+ *
+ * Resolution rules:
+ * - No arg (empty/null) -> TUNE_OUTPUT_PATH_LATEST
+ * - technique only (e.g., "sft") -> TUNE_ADAPTER_PATH_SFT
+ * - technique-dataset compound (e.g., "sft-alpaca") -> TUNE_ADAPTER_PATH_SFT_ALPACA
+ *
+ * @param {string} fromTuneArg - The --from-tune argument value
+ * @param {function} configVarExists - Function that checks if a config var exists
+ * @returns {{ varName: string, technique: string, slug: string, isCompound: boolean, fallback: string|null }}
+ */
+export function resolveFromTuneVar(fromTuneArg, configVarExists) {
+    if (!fromTuneArg) {
+        return {
+            varName: 'TUNE_OUTPUT_PATH_LATEST',
+            technique: '',
+            slug: '',
+            isCompound: false,
+            fallback: null
+        };
+    }
+    const upper = fromTuneArg.toUpperCase();
+    // Check if argument contains a hyphen — potential compound key
+    const hyphenIndex = fromTuneArg.indexOf('-');
+    if (hyphenIndex > 0) {
+        const technique = fromTuneArg.substring(0, hyphenIndex);
+        const slug = fromTuneArg.substring(hyphenIndex + 1);
+        const techniqueUpper = technique.toUpperCase();
+        const slugUpper = slug.toUpperCase().replace(/-/g, '_');
+        const compoundVar = `TUNE_ADAPTER_PATH_${techniqueUpper}_${slugUpper}`;
+        if (configVarExists(compoundVar)) {
+            return {
+                varName: compoundVar,
+                technique,
+                slug,
+                isCompound: true,
+                fallback: null
+            };
+        }
+        // Compound key doesn't exist — fallback to technique-only
+        return {
+            varName: `TUNE_ADAPTER_PATH_${techniqueUpper}`,
+            technique,
+            slug,
+            isCompound: false,
+            fallback: compoundVar // the compound var that was tried but didn't exist
+        };
+    }
+    // No hyphen — technique-only
+    return {
+        varName: `TUNE_ADAPTER_PATH_${upper}`,
+        technique: fromTuneArg,
+        slug: '',
+        isCompound: false,
+        fallback: null
+    };
+}

package/src/lib/generated/cli-options.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-15T20:16:03.840Z
+// Generated: 2026-06-22T13:49:00.815Z
 /**
  * CLI option definitions derived from parameter-schema-v2.json.
@@ -70,7 +70,7 @@ export const cliOptions = [
     {
         'flag': '--enable-lora',
         'description': 'Enable LoRA adapter serving',
-        'defaultValue': false
+        'defaultValue': true
     },
     {
         'flag': '--max-loras <n>',
@@ -85,7 +85,7 @@ export const cliOptions = [
     {
         'flag': '--include-benchmark',
         'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
-        'defaultValue': false
+        'defaultValue': true
     },
     {
         'flag': '--benchmark-concurrency <n>',
@@ -353,6 +353,11 @@ export const cliOptions = [
         'description': 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)',
         'repeatable': true
     },
+    {
+        'flag': '--ic-env <KEY=VALUE>',
+        'description': 'Deploy-time environment variable for inference components (IC_ENV_* prefix), repeatable (e.g. VLLM_MAX_MODEL_LEN=8192)',
+        'repeatable': true
+    },
     {
         'flag': '--include-sample',
         'description': 'Include sample model code',
@@ -464,6 +469,7 @@ export const helpGroups = {
     '--fsx-volume-handle': 'hyperpod',
     '--model-env': 'env',
     '--server-env': 'env',
+    '--ic-env': 'ic',
     '--include-sample': 'features',
     '--include-testing': 'features',
     '--test-types': 'features',

package/src/lib/generated/parameter-matrix.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-parameter-matrix.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-15T20:16:03.952Z
+// Generated: 2026-06-22T13:49:00.924Z
 /**
  * Parameter matrix defining how each parameter is loaded from various sources.
@@ -106,7 +106,7 @@ export const parameterMatrix = {
         'mcp': false,
         'promptable': true,
         'required': false,
-        'default': false,
+        'default': true,
         'valueSpace': 'bounded'
     },
     'maxLoras': {
@@ -139,7 +139,7 @@ export const parameterMatrix = {
         'mcp': false,
         'promptable': true,
         'required': false,
-        'default': false,
+        'default': true,
         'valueSpace': 'bounded'
     },
     'benchmarkConcurrency': {
@@ -569,6 +569,17 @@ export const parameterMatrix = {
         'default': null,
         'valueSpace': 'unbounded'
     },
+    'icEnv': {
+        'cliOption': 'ic-env',
+        'envVar': null,
+        'configFile': true,
+        'packageJson': false,
+        'mcp': false,
+        'promptable': false,
+        'required': false,
+        'default': [],
+        'valueSpace': 'unbounded'
+    },
     'includeSampleModel': {
         'cliOption': 'include-sample',
         'envVar': 'ML_INCLUDE_SAMPLE',

package/src/lib/generated/validation-rules.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-validator.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-15T20:16:03.877Z
+// Generated: 2026-06-22T13:49:00.849Z
 /**
  * Validation rules derived from parameter-schema-v2.json.

package/src/lib/mcp-query-runner.js CHANGED Viewed

@@ -216,6 +216,12 @@ export default class McpQueryRunner {
                 if (parsed.choices?.instanceType?.length > 0) {
                     this.runner._instanceSizerMetadata = parsed.metadata || null;
+                    // Store maxModelLen from sizer if context was capped (AC-1.7)
+                    if (parsed.values?.maxModelLen) {
+                        this.runner._sizerMaxModelLen = parsed.values.maxModelLen;
+                        console.log(`   ✓ Context length capped: max_model_len=${parsed.values.maxModelLen}`);
+                    }
                     // Build display labels with VRAM estimate and utilization percentage
                     const recommendations = parsed.metadata?.recommendations || [];
                     const estimatedVramGb = parsed.metadata?.estimatedVramGb;